diff --git a/.quarto/idx/about/davis/index.qmd.json b/.quarto/idx/about/davis/index.qmd.json new file mode 100644 index 00000000..6e3ee9e5 --- /dev/null +++ b/.quarto/idx/about/davis/index.qmd.json @@ -0,0 +1 @@ +{"markdown":{"yaml":{"description":"Software Engineer","name":"Davis Vaughan","photo":"https://github.com/davisVaughan.png","social":{"github":"davisVaughan","twitter":"dvaughan32","website":"blog.davisvaughan.com"},"team":true},"containsRefs":false,"markdown":"\n\nDavis Vaughan is a Software Engineer at RStudio. He is the author or maintainer of several R packages for finance and data analytics, including tidyquant, timetk, tibbletime, sweep, rray and hardhat. He is well-known for this work around creating modeling packages in R. \n\n","srcMarkdownNoYaml":"\n\nDavis Vaughan is a Software Engineer at RStudio. He is the author or maintainer of several R packages for finance and data analytics, including tidyquant, timetk, tibbletime, sweep, rray and hardhat. He is well-known for this work around creating modeling packages in R. \n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"description":"Software Engineer","name":"Davis Vaughan","photo":"https://github.com/davisVaughan.png","social":{"github":"davisVaughan","twitter":"dvaughan32","website":"blog.davisvaughan.com"},"team":true},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/about/index.qmd.json b/.quarto/idx/about/index.qmd.json new file mode 100644 index 00000000..124ab827 --- /dev/null +++ b/.quarto/idx/about/index.qmd.json @@ -0,0 +1 @@ +{"title":"Meet the team","markdown":{"yaml":{"title":"Meet the team","toc":true,"toc-depth":0,"include-after-body":"../resources.html"},"containsRefs":false,"markdown":"\n","srcMarkdownNoYaml":"\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":0,"include-after-body":["../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../styles.scss","../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Meet the team"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/about/max/index.qmd.json b/.quarto/idx/about/max/index.qmd.json new file mode 100644 index 00000000..df0fddc4 --- /dev/null +++ b/.quarto/idx/about/max/index.qmd.json @@ -0,0 +1 @@ +{"markdown":{"yaml":{"description":"Software Engineer","name":"Max Kuhn","photo":"https://github.com/topepo.png","social":{"github":"topepo","twitter":"topepos","website":"https://topepo.github.io/caret/"},"team":true},"containsRefs":false,"markdown":"\n\nDr. Max Kuhn is a Software Engineer at RStudio. He is the author or maintainer of several R packages for predictive modeling including caret, AppliedPredictiveModeling, Cubist, C50 and SparseLDA. He routinely teaches classes in predictive modeling at Predictive Analytics World and UseR! and his publications include work on neuroscience biomarkers, drug discovery, molecular diagnostics and response surface methodology.\n","srcMarkdownNoYaml":"\n\nDr. Max Kuhn is a Software Engineer at RStudio. He is the author or maintainer of several R packages for predictive modeling including caret, AppliedPredictiveModeling, Cubist, C50 and SparseLDA. He routinely teaches classes in predictive modeling at Predictive Analytics World and UseR! and his publications include work on neuroscience biomarkers, drug discovery, molecular diagnostics and response surface methodology.\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"description":"Software Engineer","name":"Max Kuhn","photo":"https://github.com/topepo.png","social":{"github":"topepo","twitter":"topepos","website":"https://topepo.github.io/caret/"},"team":true},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/books/fes/index.qmd.json b/.quarto/idx/books/fes/index.qmd.json new file mode 100644 index 00000000..0e4cc519 --- /dev/null +++ b/.quarto/idx/books/fes/index.qmd.json @@ -0,0 +1 @@ +{"title":"Feature Engineering & Selection","markdown":{"yaml":{"title":"Feature Engineering & Selection","description":"A Practical Approach for Predictive Models","authors":["Max Kuhn","Kjell Johnson"],"weight":3},"containsRefs":false,"markdown":"\n\n![](cover.jpg) \n\nA primary goal of predictive modeling is to find a reliable and effective predictive relationship between an available set of features and an outcome. This book provides an extensive set of techniques for uncovering effective representations of the features for modeling the outcome and for finding an optimal subset of features to improve a model’s predictive performance.\n\nAn **HTML version** of this text can be found at [`https://bookdown.org/max/FES`](https://bookdown.org/max/FES). \n\nThe data sets and R code are available in the GitHub repository [`https://github.com/topepo/FES`](https://github.com/topepo/FES). \n\nThe physical copies are sold by [**Amazon**](https://www.amazon.com/gp/product/1138079227/ref=as_li_tl?ie=UTF8&tag=apm0a-20&camp=1789&creative=9325&linkCode=as2&creativeASIN=1138079227&linkId=c801e78acfc3bc022dbed02af4851962) and [**Taylor & Francis**](https://www.crcpress.com/Feature-Engineering-and-Selection-A-Practical-Approach-for-Predictive-Models/Kuhn-Johnson/p/book/9781138079229). \n","srcMarkdownNoYaml":"\n\n![](cover.jpg) \n\nA primary goal of predictive modeling is to find a reliable and effective predictive relationship between an available set of features and an outcome. This book provides an extensive set of techniques for uncovering effective representations of the features for modeling the outcome and for finding an optimal subset of features to improve a model’s predictive performance.\n\nAn **HTML version** of this text can be found at [`https://bookdown.org/max/FES`](https://bookdown.org/max/FES). \n\nThe data sets and R code are available in the GitHub repository [`https://github.com/topepo/FES`](https://github.com/topepo/FES). \n\nThe physical copies are sold by [**Amazon**](https://www.amazon.com/gp/product/1138079227/ref=as_li_tl?ie=UTF8&tag=apm0a-20&camp=1789&creative=9325&linkCode=as2&creativeASIN=1138079227&linkId=c801e78acfc3bc022dbed02af4851962) and [**Taylor & Francis**](https://www.crcpress.com/Feature-Engineering-and-Selection-A-Practical-Approach-for-Predictive-Models/Kuhn-Johnson/p/book/9781138079229). \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Feature Engineering & Selection","description":"A Practical Approach for Predictive Models","authors":["Max Kuhn","Kjell Johnson"],"weight":3},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/books/index.qmd.json b/.quarto/idx/books/index.qmd.json new file mode 100644 index 00000000..ccf17900 --- /dev/null +++ b/.quarto/idx/books/index.qmd.json @@ -0,0 +1 @@ +{"title":"Books","markdown":{"yaml":{"title":"Books","description":"Study up on statistics and modeling with our comprehensive books.","toc":false,"listing":[{"id":"book-links","categories":"unnumbered","type":"default","page-size":40,"contents":["*.qmd"]}]},"containsRefs":false,"markdown":"\n","srcMarkdownNoYaml":"\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":false,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../styles.scss","../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Books","description":"Study up on statistics and modeling with our comprehensive books.","listing":[{"id":"book-links","categories":"unnumbered","type":"default","page-size":40,"contents":["*.qmd"]}]},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/books/moderndive/index.qmd.json b/.quarto/idx/books/moderndive/index.qmd.json new file mode 100644 index 00000000..f7f6e1aa --- /dev/null +++ b/.quarto/idx/books/moderndive/index.qmd.json @@ -0,0 +1 @@ +{"title":"Statistical Inference via Data Science","markdown":{"yaml":{"title":"Statistical Inference via Data Science","description":"A ModernDive into R and the Tidyverse","authors":["Chester Ismay","Albert Y. Kim"],"weight":5},"containsRefs":false,"markdown":"\n\n![](cover.png) \n\nThis book is intended to be a gentle introduction to the practice of analyzing data and answering questions using data the way data scientists, statisticians, data journalists, and other researchers would. Over the course of this book, you will develop your \"data science toolbox,\" equipping yourself with tools such as data visualization, data formatting, data wrangling, data modeling using regression, and statistical inference via hypothesis testing and confidence intervals.\n\nAn **HTML version** of this text can be found at [`https://moderndive.com/`](https://moderndive.com/). \n\nThe data sets and R code are available in the GitHub repository [`https://github.com/moderndive/ModernDive_book`](https://github.com/moderndive/ModernDive_book) and also the accompanying `moderndive` R package on [CRAN](https://cran.r-project.org/package=moderndive).\n\nThe physical copies are sold by [**Amazon**](https://www.amazon.com/Statistical-Inference-via-Data-Science/dp/0367409828/) and [**CRC Press**](https://www.crcpress.com/Statistical-Inference-via-Data-Science-A-ModernDive-into-R-and-the-Tidyverse/Ismay-Kim/p/book/9780367409821). \n","srcMarkdownNoYaml":"\n\n![](cover.png) \n\nThis book is intended to be a gentle introduction to the practice of analyzing data and answering questions using data the way data scientists, statisticians, data journalists, and other researchers would. Over the course of this book, you will develop your \"data science toolbox,\" equipping yourself with tools such as data visualization, data formatting, data wrangling, data modeling using regression, and statistical inference via hypothesis testing and confidence intervals.\n\nAn **HTML version** of this text can be found at [`https://moderndive.com/`](https://moderndive.com/). \n\nThe data sets and R code are available in the GitHub repository [`https://github.com/moderndive/ModernDive_book`](https://github.com/moderndive/ModernDive_book) and also the accompanying `moderndive` R package on [CRAN](https://cran.r-project.org/package=moderndive).\n\nThe physical copies are sold by [**Amazon**](https://www.amazon.com/Statistical-Inference-via-Data-Science/dp/0367409828/) and [**CRC Press**](https://www.crcpress.com/Statistical-Inference-via-Data-Science-A-ModernDive-into-R-and-the-Tidyverse/Ismay-Kim/p/book/9780367409821). \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Statistical Inference via Data Science","description":"A ModernDive into R and the Tidyverse","authors":["Chester Ismay","Albert Y. Kim"],"weight":5},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/books/smltar/index.qmd.json b/.quarto/idx/books/smltar/index.qmd.json new file mode 100644 index 00000000..90998850 --- /dev/null +++ b/.quarto/idx/books/smltar/index.qmd.json @@ -0,0 +1 @@ +{"title":"Supervised Machine Learning for Text Analysis in R","markdown":{"yaml":{"title":"Supervised Machine Learning for Text Analysis in R","description":"","authors":["Emil Hvitfeldt","Julia Silge"],"weight":2},"containsRefs":false,"markdown":"\n\n![](cover.png)\n\nThis book explains how to preprocess text data for modeling, train models, and evaluate model performance using tools from the tidyverse and tidymodels ecosystem. Models like these can be used to make predictions for new observations, to understand what natural language features or characteristics contribute to differences in the output, and more. If you are already familiar with the basics of predictive modeling, use the comprehensive, detailed examples in this book to extend your skills to the domain of natural language processing.\n\nThis book provides practical guidance and directly applicable knowledge for data scientists and analysts who want to integrate unstructured text data into their modeling pipelines. Learn how to use text data for both regression and classification tasks, and how to apply more straightforward algorithms like regularized regression or support vector machines as well as deep learning approaches. Natural language must be dramatically transformed to be ready for computation, so we explore typical text preprocessing and feature engineering steps like tokenization and word embeddings from the ground up. These steps influence model results in ways we can measure, both in terms of model metrics and other tangible consequences such as how fair or appropriate model results are.\n\n\nAn **HTML version** of this text can be found at [`https://smltar.com/`](https://smltar.com/). \n\nThe data sets and R code are available in the GitHub repository [`https://github.com/EmilHvitfeldt/smltar`](https://github.com/EmilHvitfeldt/smltar). \n\nThe physical copies are sold by [**Amazon**](https://amzn.to/3EwcEOK) and [**CRC Press**](https://www.routledge.com/Supervised-Machine-Learning-for-Text-Analysis-in-R/Hvitfeldt-Silge/p/book/9780367554194). \n\n","srcMarkdownNoYaml":"\n\n![](cover.png)\n\nThis book explains how to preprocess text data for modeling, train models, and evaluate model performance using tools from the tidyverse and tidymodels ecosystem. Models like these can be used to make predictions for new observations, to understand what natural language features or characteristics contribute to differences in the output, and more. If you are already familiar with the basics of predictive modeling, use the comprehensive, detailed examples in this book to extend your skills to the domain of natural language processing.\n\nThis book provides practical guidance and directly applicable knowledge for data scientists and analysts who want to integrate unstructured text data into their modeling pipelines. Learn how to use text data for both regression and classification tasks, and how to apply more straightforward algorithms like regularized regression or support vector machines as well as deep learning approaches. Natural language must be dramatically transformed to be ready for computation, so we explore typical text preprocessing and feature engineering steps like tokenization and word embeddings from the ground up. These steps influence model results in ways we can measure, both in terms of model metrics and other tangible consequences such as how fair or appropriate model results are.\n\n\nAn **HTML version** of this text can be found at [`https://smltar.com/`](https://smltar.com/). \n\nThe data sets and R code are available in the GitHub repository [`https://github.com/EmilHvitfeldt/smltar`](https://github.com/EmilHvitfeldt/smltar). \n\nThe physical copies are sold by [**Amazon**](https://amzn.to/3EwcEOK) and [**CRC Press**](https://www.routledge.com/Supervised-Machine-Learning-for-Text-Analysis-in-R/Hvitfeldt-Silge/p/book/9780367554194). \n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Supervised Machine Learning for Text Analysis in R","description":"","authors":["Emil Hvitfeldt","Julia Silge"],"weight":2},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/books/tidytext/index.qmd.json b/.quarto/idx/books/tidytext/index.qmd.json new file mode 100644 index 00000000..5176e512 --- /dev/null +++ b/.quarto/idx/books/tidytext/index.qmd.json @@ -0,0 +1 @@ +{"title":"Text Mining with R","markdown":{"yaml":{"title":"Text Mining with R","description":"A Tidy Approach","authors":["Julia Silge","David Robinson"],"weight":4},"containsRefs":false,"markdown":"\n\n![](cover.png)\n\nThis practical book provides an introduction to text mining using tidy data principles in R, focusing on exploratory data analysis for text. Using tidy data principles can make text mining task easier and more effective; in this book, learn how to manipulate, summarize, and visualize characteristics of text using these methods and R packages from the tidy tool ecosystem.\n\nAn **HTML version** of this text can be found at [`https://www.tidytextmining.com/`](https://www.tidytextmining.com/). \n\nThe data sets and R code are available in the GitHub repository [`https://github.com/dgrtwo/tidy-text-mining`](https://github.com/dgrtwo/tidy-text-mining). \n\nThe physical copies are sold by [**Amazon**](http://amzn.to/2tZkmxG) and [**O’Reilly**](http://www.jdoqocy.com/click-4428796-11290546?url=http%3A%2F%2Fshop.oreilly.com%2Fproduct%2F0636920067153.do%3Fcmp%3Daf-strata-books-video-product_cj_0636920067153_%25zp&cjsku=0636920067153). \n","srcMarkdownNoYaml":"\n\n![](cover.png)\n\nThis practical book provides an introduction to text mining using tidy data principles in R, focusing on exploratory data analysis for text. Using tidy data principles can make text mining task easier and more effective; in this book, learn how to manipulate, summarize, and visualize characteristics of text using these methods and R packages from the tidy tool ecosystem.\n\nAn **HTML version** of this text can be found at [`https://www.tidytextmining.com/`](https://www.tidytextmining.com/). \n\nThe data sets and R code are available in the GitHub repository [`https://github.com/dgrtwo/tidy-text-mining`](https://github.com/dgrtwo/tidy-text-mining). \n\nThe physical copies are sold by [**Amazon**](http://amzn.to/2tZkmxG) and [**O’Reilly**](http://www.jdoqocy.com/click-4428796-11290546?url=http%3A%2F%2Fshop.oreilly.com%2Fproduct%2F0636920067153.do%3Fcmp%3Daf-strata-books-video-product_cj_0636920067153_%25zp&cjsku=0636920067153). \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Text Mining with R","description":"A Tidy Approach","authors":["Julia Silge","David Robinson"],"weight":4},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/books/tmwr/index.qmd.json b/.quarto/idx/books/tmwr/index.qmd.json new file mode 100644 index 00000000..9822a9b0 --- /dev/null +++ b/.quarto/idx/books/tmwr/index.qmd.json @@ -0,0 +1 @@ +{"title":"Tidy Modeling with R","markdown":{"yaml":{"title":"Tidy Modeling with R","description":"","authors":["Max Kuhn","Julia Silge"],"weight":1},"containsRefs":false,"markdown":"\n\n![](cover.png)\n\nModeling of data is integral to science, business, politics, and many other aspects of our lives. The goals of this book are to:\n\n- introduce and demonstrate how to use the tidymodels packages, and \n- outline good practices for the phases of the modeling process.\n\nAn **HTML version** of this text can be found at . \n\nThe sources to create the book are available in the GitHub repository . \n","srcMarkdownNoYaml":"\n\n![](cover.png)\n\nModeling of data is integral to science, business, politics, and many other aspects of our lives. The goals of this book are to:\n\n- introduce and demonstrate how to use the tidymodels packages, and \n- outline good practices for the phases of the modeling process.\n\nAn **HTML version** of this text can be found at . \n\nThe sources to create the book are available in the GitHub repository . \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Tidy Modeling with R","description":"","authors":["Max Kuhn","Julia Silge"],"weight":1},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/contribute/index.qmd.json b/.quarto/idx/contribute/index.qmd.json new file mode 100644 index 00000000..a11dfc2d --- /dev/null +++ b/.quarto/idx/contribute/index.qmd.json @@ -0,0 +1 @@ +{"title":"How to contribute to tidymodels","markdown":{"yaml":{"title":"How to contribute to tidymodels","toc":true,"toc-depth":0,"include-after-body":"../resources.html"},"headingText":"Design goals","containsRefs":false,"markdown":"\n\nThe ecosystem of tidymodels packages would not be possible without the contributions of the R community. No matter your current skills, it's possible to contribute back to tidymodels. Contributions are guided by our design goals.\n\n\nThe goals of tidymodels packages are to:\n\n * Encourage empirical validation and good statistical practice.\n\n * Smooth out heterogeneous interfaces.\n \n * Establish highly reusable infrastructure.\n\n * Enable a wider variety of methodologies.\n\n * Help package developers quickly build high quality model packages of their own.\n\nThese goals are guided by our [principles for creating modeling packages](https://tidymodels.github.io/model-implementation-principles/). \n\n**What are different ways _you_ can contribute?**\n\n## Answer questions\n\nYou can help others use and learn tidymodels by answering questions on the [RStudio community site](https://community.rstudio.com/tag/tidymodels), [Stack Overflow](https://stackoverflow.com/questions/tagged/tidymodels?sort=newest), and [Twitter](https://twitter.com/search?q=%23tidymodels&f=live). Many people asking for help with tidymodels don't know what a [reprex](https://www.tidyverse.org/help#reprex) is or how to craft one. Acknowledging an individual's problem, showing them how to build a reprex, and pointing them to helpful resources are all enormously beneficial, even if you don't immediately solve their problem.\n\nRemember that while you might have seen a problem a hundred times before, it's new to the person asking it. Be patient, polite, and empathic.\n\n## File issues\n\nIf you've found a bug, first create a minimal [reprex](https://www.tidyverse.org/help#reprex). Spend some time working to make it as minimal as possible; the more time you spend doing this, the easier it is to fix the bug. When your reprex is ready, file it on the [GitHub repo](https://github.com/tidymodels/) of the appropriate package. \n\nThe tidymodels team often focuses on one package at a time to reduce context switching and be more efficient. We may not address each issue right away, but we will use the reprex you create to understand your problem when it is time to focus on that package.\n\n## Contribute documentation\n\nDocumentation is a high priority for tidymodels, and pull requests to correct or improve documentation are welcome. The most important thing to know is that tidymodels packages use [roxygen2](https://roxygen2.r-lib.org/); this means that documentation is found in the R code close to the source of each function. There are some special tags, but most tidymodels packages now use markdown in the documentation. This makes it particularly easy to get started!\n\n\n## Contribute code\n\nIf you are a more experienced R programmer, you may have the inclination, interest, and ability to contribute directly to package development. Before you submit a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal.\n\nIn tidymodels packages, we use the [tidyverse style guide](https://style.tidyverse.org/) which will make sure that your new code and documentation matches the existing style. This makes the review process much smoother.\n\nThe tidymodels packages are explicitly built to support the creation of other modeling packages, and we would love to hear about what you build yourself! Check out our learning resources for [developing custom modeling tools](/learn/develop/).\n\n","srcMarkdownNoYaml":"\n\nThe ecosystem of tidymodels packages would not be possible without the contributions of the R community. No matter your current skills, it's possible to contribute back to tidymodels. Contributions are guided by our design goals.\n\n## Design goals\n\nThe goals of tidymodels packages are to:\n\n * Encourage empirical validation and good statistical practice.\n\n * Smooth out heterogeneous interfaces.\n \n * Establish highly reusable infrastructure.\n\n * Enable a wider variety of methodologies.\n\n * Help package developers quickly build high quality model packages of their own.\n\nThese goals are guided by our [principles for creating modeling packages](https://tidymodels.github.io/model-implementation-principles/). \n\n**What are different ways _you_ can contribute?**\n\n## Answer questions\n\nYou can help others use and learn tidymodels by answering questions on the [RStudio community site](https://community.rstudio.com/tag/tidymodels), [Stack Overflow](https://stackoverflow.com/questions/tagged/tidymodels?sort=newest), and [Twitter](https://twitter.com/search?q=%23tidymodels&f=live). Many people asking for help with tidymodels don't know what a [reprex](https://www.tidyverse.org/help#reprex) is or how to craft one. Acknowledging an individual's problem, showing them how to build a reprex, and pointing them to helpful resources are all enormously beneficial, even if you don't immediately solve their problem.\n\nRemember that while you might have seen a problem a hundred times before, it's new to the person asking it. Be patient, polite, and empathic.\n\n## File issues\n\nIf you've found a bug, first create a minimal [reprex](https://www.tidyverse.org/help#reprex). Spend some time working to make it as minimal as possible; the more time you spend doing this, the easier it is to fix the bug. When your reprex is ready, file it on the [GitHub repo](https://github.com/tidymodels/) of the appropriate package. \n\nThe tidymodels team often focuses on one package at a time to reduce context switching and be more efficient. We may not address each issue right away, but we will use the reprex you create to understand your problem when it is time to focus on that package.\n\n## Contribute documentation\n\nDocumentation is a high priority for tidymodels, and pull requests to correct or improve documentation are welcome. The most important thing to know is that tidymodels packages use [roxygen2](https://roxygen2.r-lib.org/); this means that documentation is found in the R code close to the source of each function. There are some special tags, but most tidymodels packages now use markdown in the documentation. This makes it particularly easy to get started!\n\n\n## Contribute code\n\nIf you are a more experienced R programmer, you may have the inclination, interest, and ability to contribute directly to package development. Before you submit a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal.\n\nIn tidymodels packages, we use the [tidyverse style guide](https://style.tidyverse.org/) which will make sure that your new code and documentation matches the existing style. This makes the review process much smoother.\n\nThe tidymodels packages are explicitly built to support the creation of other modeling packages, and we would love to hear about what you build yourself! Check out our learning resources for [developing custom modeling tools](/learn/develop/).\n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":0,"include-after-body":["../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../styles.scss","../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"How to contribute to tidymodels"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/find/all/index.qmd.json b/.quarto/idx/find/all/index.qmd.json new file mode 100644 index 00000000..7d3cf804 --- /dev/null +++ b/.quarto/idx/find/all/index.qmd.json @@ -0,0 +1 @@ +{"title":"Search all of tidymodels","markdown":{"yaml":{"title":"Search all of tidymodels","toc":true,"toc-depth":0,"include-after-body":"../../resources.html"},"containsRefs":false,"markdown":"\n\n\nHere are all the functions available across all of the tidymodels packages. Click on the link in the topic column to find the relevant reference documentation.\n\n```{r}\n#| include: false\n\nlibrary(tidymodels)\nlibrary(DT)\ntidymodels_prefer()\nload(\"tidymodels_functions.RData\")\n```\n\n```{r}\n#| label: table-display\n#| echo: false\ntidymodels_functions %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 25))\n```\n","srcMarkdownNoYaml":"\n\n\nHere are all the functions available across all of the tidymodels packages. Click on the link in the topic column to find the relevant reference documentation.\n\n```{r}\n#| include: false\n\nlibrary(tidymodels)\nlibrary(DT)\ntidymodels_prefer()\nload(\"tidymodels_functions.RData\")\n```\n\n```{r}\n#| label: table-display\n#| echo: false\ntidymodels_functions %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 25))\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":0,"include-after-body":["../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Search all of tidymodels"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/find/broom/index.qmd.json b/.quarto/idx/find/broom/index.qmd.json new file mode 100644 index 00000000..3270a22e --- /dev/null +++ b/.quarto/idx/find/broom/index.qmd.json @@ -0,0 +1 @@ +{"title":"Search broom methods","markdown":{"yaml":{"subtitle":"Broom","title":"Search broom methods","weight":3,"description":"Find `tidy()`, `augment()`, and `glance()` methods for different objects.\n","toc":true,"toc-depth":0,"include-after-body":"../../resources.html"},"containsRefs":false,"markdown":"\n\nHere are all the broom functions available across CRAN packages. Click on the link in the topic column to find more information.\n\n\n\n```{r}\n#| include: false\n\nlibrary(tidymodels)\nlibrary(DT)\ntidymodels_prefer()\nload(\"broom_functions.RData\")\n```\n\n```{r}\n#| label: table-display\n#| echo: false\nbroom_functions %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 25))\n```\n","srcMarkdownNoYaml":"\n\nHere are all the broom functions available across CRAN packages. Click on the link in the topic column to find more information.\n\n\n\n```{r}\n#| include: false\n\nlibrary(tidymodels)\nlibrary(DT)\ntidymodels_prefer()\nload(\"broom_functions.RData\")\n```\n\n```{r}\n#| label: table-display\n#| echo: false\nbroom_functions %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 25))\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":0,"include-after-body":["../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"subtitle":"Broom","title":"Search broom methods","weight":3,"description":"Find `tidy()`, `augment()`, and `glance()` methods for different objects.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/find/index.qmd.json b/.quarto/idx/find/index.qmd.json new file mode 100644 index 00000000..133a0638 --- /dev/null +++ b/.quarto/idx/find/index.qmd.json @@ -0,0 +1 @@ +{"title":"Explore tidymodels","markdown":{"yaml":{"title":"Explore tidymodels","description":"Explore searchable tables of all tidymodels packages and functions.","toc":true,"toc-depth":0,"include-after-body":"../resources.html"},"headingText":"Books","containsRefs":false,"markdown":"\n\nBelow you'll find searchable tables to help you explore the tidymodels packages and functions. The tables also include links to the relevant reference page to help you navigate the package documentation. Use the following categories to guide you:\n\n* [Search all of tidymodels](all/index.qmd)\n\n* [Search parsnip models](parsnip/index.qmd)\n\n* [Search recipe steps](recipes/index.qmd)\n\n* [Search broom methods](broom/index.qmd)\n\n\nIf you want to read more long form there are a number of books written about and using tidymodels\n\n* [List of books](../books/index.qmd)\n","srcMarkdownNoYaml":"\n\nBelow you'll find searchable tables to help you explore the tidymodels packages and functions. The tables also include links to the relevant reference page to help you navigate the package documentation. Use the following categories to guide you:\n\n* [Search all of tidymodels](all/index.qmd)\n\n* [Search parsnip models](parsnip/index.qmd)\n\n* [Search recipe steps](recipes/index.qmd)\n\n* [Search broom methods](broom/index.qmd)\n\n# Books\n\nIf you want to read more long form there are a number of books written about and using tidymodels\n\n* [List of books](../books/index.qmd)\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":0,"include-after-body":["../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../styles.scss","../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Explore tidymodels","description":"Explore searchable tables of all tidymodels packages and functions."},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/find/parsnip/index.qmd.json b/.quarto/idx/find/parsnip/index.qmd.json new file mode 100644 index 00000000..77117290 --- /dev/null +++ b/.quarto/idx/find/parsnip/index.qmd.json @@ -0,0 +1 @@ +{"title":"Search parsnip models","markdown":{"yaml":{"title":"Search parsnip models","weight":2,"description":"Find model types, engines, and arguments to fit and predict in the tidymodels framework.\n","toc":true,"toc-depth":0,"include-after-body":"../../resources.html"},"containsRefs":false,"markdown":"\n\nTo learn about the parsnip package, see [*Get Started: Build a Model*](/start/models/). Use the tables below to find [model types and engines](#models).\n\n```{r}\n#| include: false\n\nlibrary(tidymodels)\nlibrary(DT)\ntidymodels_prefer()\nload(\"parsnip_models.RData\")\n```\n\n```{r}\n#| label: table-display\n#| echo: false\nparsnip_models %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 25))\n```\n","srcMarkdownNoYaml":"\n\nTo learn about the parsnip package, see [*Get Started: Build a Model*](/start/models/). Use the tables below to find [model types and engines](#models).\n\n```{r}\n#| include: false\n\nlibrary(tidymodels)\nlibrary(DT)\ntidymodels_prefer()\nload(\"parsnip_models.RData\")\n```\n\n```{r}\n#| label: table-display\n#| echo: false\nparsnip_models %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 25))\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":0,"include-after-body":["../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Search parsnip models","weight":2,"description":"Find model types, engines, and arguments to fit and predict in the tidymodels framework.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/find/recipes/index.qmd.json b/.quarto/idx/find/recipes/index.qmd.json new file mode 100644 index 00000000..c8012b1a --- /dev/null +++ b/.quarto/idx/find/recipes/index.qmd.json @@ -0,0 +1 @@ +{"title":"Search recipe steps","markdown":{"yaml":{"subtitle":"Recipes","title":"Search recipe steps","weight":3,"description":"Find recipe steps in the tidymodels framework to help you prep your data for modeling.\n","toc":true,"toc-depth":0,"include-after-body":"../../resources.html"},"containsRefs":false,"markdown":"\n\n\nTo learn about the recipes package, see [*Get Started: Preprocess your data with recipes*](/start/recipes/). The table below allows you to search for recipe steps across tidymodels packages.\n\n\n```{r}\n#| include: false\n\nlibrary(tidymodels)\nlibrary(DT)\ntidymodels_prefer()\nload(\"recipe_functions.RData\")\n```\n\n```{r}\n#| label: table-display\n#| echo: false\nrecipe_functions %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 25))\n```\n","srcMarkdownNoYaml":"\n\n\nTo learn about the recipes package, see [*Get Started: Preprocess your data with recipes*](/start/recipes/). The table below allows you to search for recipe steps across tidymodels packages.\n\n\n```{r}\n#| include: false\n\nlibrary(tidymodels)\nlibrary(DT)\ntidymodels_prefer()\nload(\"recipe_functions.RData\")\n```\n\n```{r}\n#| label: table-display\n#| echo: false\nrecipe_functions %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 25))\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":0,"include-after-body":["../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"subtitle":"Recipes","title":"Search recipe steps","weight":3,"description":"Find recipe steps in the tidymodels framework to help you prep your data for modeling.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/help/index.qmd.json b/.quarto/idx/help/index.qmd.json new file mode 100644 index 00000000..bad9e01a --- /dev/null +++ b/.quarto/idx/help/index.qmd.json @@ -0,0 +1 @@ +{"title":"Get Help","markdown":{"yaml":{"title":"Get Help","toc":true,"toc-depth":1,"include-after-body":"../resources.html"},"headingText":"Asking for help","containsRefs":false,"markdown":"\n\n\nIf you're asking for R help, reporting a bug, or requesting a new feature, you're more likely to succeed if you include a good reproducible example, which is precisely what the [reprex](https://reprex.tidyverse.org/) package is built for. You can learn more about reprex, along with other tips on how to help others help you in the [tidyverse.org help section](https://www.tidyverse.org/help/).\n\n## Where to ask\n\n\"\"\n\nNow that you've made a reprex, you need to share it in an appropriate forum. Here are some options:\n\n* [__community.rstudio.com__](https://community.rstudio.com/c/ml/15): This is a warm\n and welcoming place to ask any questions you might have about\n tidymodels or more generally about modeling, machine learning, and deep learning. (You can also ask questions about the tidyverse and RStudio there, too!)\n \n* [__Stack Overflow__](https://stackoverflow.com/questions/tagged/tidymodels). You're probably already familiar\n with Stack Overflow from googling; it's a frequent source of answers to\n coding related questions. Asking a question on Stack Overflow can be \n intimidating, but if you've taken the time to create a reprex, you're much\n more likely to get a useful answer. Make sure to [tag your question](https://stackoverflow.com/help/tagging) with `r`\n and `tidymodels` so that the right people are more likely to see it.\n \n* [__Twitter__][twitter-rstats] and [__Mastodon__](https://fosstodon.org/tags/tidymodels). These sites are great places to share a link to your reprex that's hosted elsewhere! The [#rstats twitter][twitter-rstats] and [#rstats fosstodon](https://fosstodon.org/tags/tidymodels) communities are extremely friendly and active, and have great crowds to be a part of. Make sure you tag your tweet with `#rstats` and `#tidymodels`.\n\n* If you think you've found a __bug__, please follow the instructions for filing an issue on \n [contributing to tidymodels](/contribute/).\n\n[twitter-rstats]: https://twitter.com/search?q=%23rstats&src=typd\n\n## Resources\n\n- See what you need to know to [get started with tidymodels](/start/), and learn more about [using tidymodels for specific tasks](/learn/).\n\n- Each tidymodels package has its own documentation site, full of helpful information. Find [links to all package documentation sites](/packages/) and explore them!\n\n- Search [all tidymodels functions](/find/), and check out [our books on these topics](/books/).\n\n- Stay up to date with the latest news about tidymodels through our posts on the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/).\n\n","srcMarkdownNoYaml":"\n\n## Asking for help\n\nIf you're asking for R help, reporting a bug, or requesting a new feature, you're more likely to succeed if you include a good reproducible example, which is precisely what the [reprex](https://reprex.tidyverse.org/) package is built for. You can learn more about reprex, along with other tips on how to help others help you in the [tidyverse.org help section](https://www.tidyverse.org/help/).\n\n## Where to ask\n\n\"\"\n\nNow that you've made a reprex, you need to share it in an appropriate forum. Here are some options:\n\n* [__community.rstudio.com__](https://community.rstudio.com/c/ml/15): This is a warm\n and welcoming place to ask any questions you might have about\n tidymodels or more generally about modeling, machine learning, and deep learning. (You can also ask questions about the tidyverse and RStudio there, too!)\n \n* [__Stack Overflow__](https://stackoverflow.com/questions/tagged/tidymodels). You're probably already familiar\n with Stack Overflow from googling; it's a frequent source of answers to\n coding related questions. Asking a question on Stack Overflow can be \n intimidating, but if you've taken the time to create a reprex, you're much\n more likely to get a useful answer. Make sure to [tag your question](https://stackoverflow.com/help/tagging) with `r`\n and `tidymodels` so that the right people are more likely to see it.\n \n* [__Twitter__][twitter-rstats] and [__Mastodon__](https://fosstodon.org/tags/tidymodels). These sites are great places to share a link to your reprex that's hosted elsewhere! The [#rstats twitter][twitter-rstats] and [#rstats fosstodon](https://fosstodon.org/tags/tidymodels) communities are extremely friendly and active, and have great crowds to be a part of. Make sure you tag your tweet with `#rstats` and `#tidymodels`.\n\n* If you think you've found a __bug__, please follow the instructions for filing an issue on \n [contributing to tidymodels](/contribute/).\n\n[twitter-rstats]: https://twitter.com/search?q=%23rstats&src=typd\n\n## Resources\n\n- See what you need to know to [get started with tidymodels](/start/), and learn more about [using tidymodels for specific tasks](/learn/).\n\n- Each tidymodels package has its own documentation site, full of helpful information. Find [links to all package documentation sites](/packages/) and explore them!\n\n- Search [all tidymodels functions](/find/), and check out [our books on these topics](/books/).\n\n- Stay up to date with the latest news about tidymodels through our posts on the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/).\n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":1,"include-after-body":["../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../styles.scss","../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Get Help"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/index.qmd.json b/.quarto/idx/index.qmd.json new file mode 100644 index 00000000..060facb9 --- /dev/null +++ b/.quarto/idx/index.qmd.json @@ -0,0 +1 @@ +{"markdown":{"yaml":{"page-layout":"custom"},"containsRefs":false,"markdown":"\n\n::::: {#FrontPage}\n\n:::: {.band .first}\n:::: {.bandContent}\n\n::: {.hexBadges}\n\n
\n \"tidymodels\n
\n\n\n
\n \"rsample\n
\n\n\n
\n \"parsnip\n
\n \n \n\n
\n \"recipes\n
\n\n\n
\n \"tune\n
\n\n\n
\n \"yardstick\n
\n:::\n\n::: {.blurb}\n[TIDYMODELS]{.tagline}\n\nThe tidymodels framework is a collection of packages for modeling and machine learning using [tidyverse](https://www.tidyverse.org/) principles.\n\nInstall tidymodels with:\n\n```r\ninstall.packages(\"tidymodels\")\n```\n:::\n\n::::\n::::\n\n:::: {.band .second}\n:::: {.bandContent}\n::: {.blurb}\n[LEARN TIDYMODELS]{.tagline}\n\nWhether you are just starting out today or have years of experience with modeling, tidymodels offers a consistent, flexible framework for your work.\n\n![](images/cover.png){.bookCover fig-alt=\"Parsnip set your engine, illustrated by Allison Horst\"}\n:::\n\n::: {.blurb}\n
\n \n
What do you need to know to start using tidymodels? Learn what you need in 5 articles, starting with how to create a model and ending with a beginning-to-end modeling case study.
\n
\n\n
\n \n
After you are comfortable with the basics, you can learn how to go farther with tidymodels in your modeling and machine learning projects.
\n
\n\n:::\n::::\n::::\n\n:::: {.band .third}\n:::: {.bandContent}\n\n
\"\"
\n\n::: {.blurb}\n[STAY UP TO DATE]{.tagline}\n\nHear about the latest tidymodels news at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/).\n:::\n::::\n::::\n\n:::::\n","srcMarkdownNoYaml":"\n\n::::: {#FrontPage}\n\n:::: {.band .first}\n:::: {.bandContent}\n\n::: {.hexBadges}\n\n
\n \"tidymodels\n
\n\n\n
\n \"rsample\n
\n\n\n
\n \"parsnip\n
\n \n \n\n
\n \"recipes\n
\n\n\n
\n \"tune\n
\n\n\n
\n \"yardstick\n
\n:::\n\n::: {.blurb}\n[TIDYMODELS]{.tagline}\n\nThe tidymodels framework is a collection of packages for modeling and machine learning using [tidyverse](https://www.tidyverse.org/) principles.\n\nInstall tidymodels with:\n\n```r\ninstall.packages(\"tidymodels\")\n```\n:::\n\n::::\n::::\n\n:::: {.band .second}\n:::: {.bandContent}\n::: {.blurb}\n[LEARN TIDYMODELS]{.tagline}\n\nWhether you are just starting out today or have years of experience with modeling, tidymodels offers a consistent, flexible framework for your work.\n\n![](images/cover.png){.bookCover fig-alt=\"Parsnip set your engine, illustrated by Allison Horst\"}\n:::\n\n::: {.blurb}\n
\n \n
What do you need to know to start using tidymodels? Learn what you need in 5 articles, starting with how to create a model and ending with a beginning-to-end modeling case study.
\n
\n\n
\n \n
After you are comfortable with the basics, you can learn how to go farther with tidymodels in your modeling and machine learning projects.
\n
\n\n:::\n::::\n::::\n\n:::: {.band .third}\n:::: {.bandContent}\n\n
\"\"
\n\n::: {.blurb}\n[STAY UP TO DATE]{.tagline}\n\nHear about the latest tidymodels news at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/).\n:::\n::::\n::::\n\n:::::\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","styles.scss","styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"page-layout":"custom"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/develop/broom/index.qmd.json b/.quarto/idx/learn/develop/broom/index.qmd.json new file mode 100644 index 00000000..ff27317a --- /dev/null +++ b/.quarto/idx/learn/develop/broom/index.qmd.json @@ -0,0 +1 @@ +{"title":"Create your own broom tidier methods","markdown":{"yaml":{"title":"Create your own broom tidier methods","categories":["developer tools"],"type":"learn-subsection","weight":5,"description":"Write tidy(), glance(), and augment() methods for new model objects.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(tidyverse)\nlibrary(generics)\nlibrary(DT)\npkgs <- c(\"tidymodels\", \"tidyverse\", \"generics\", \"usethis\")\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nThe broom package provides tools to summarize key information about models in tidy `tibble()`s. The package provides three verbs, or \"tidiers,\" to help make model objects easier to work with:\n\n* `tidy()` summarizes information about model components\n* `glance()` reports information about the entire model\n* `augment()` adds information about observations to a dataset\n\nEach of the three verbs above are _generic_, in that they do not define a procedure to tidy a given model object, but instead redirect to the relevant _method_ implemented to tidy a specific type of model object. The broom package provides methods for model objects from over 100 modeling packages along with nearly all of the model objects in the stats package that comes with base R. However, for maintainability purposes, the broom package authors now ask that requests for new methods be first directed to the parent package (i.e. the package that supplies the model object) rather than to broom. New methods will generally only be integrated into broom in the case that the requester has already asked the maintainers of the model-owning package to implement tidier methods in the parent package.\n\nWe'd like to make implementing external tidier methods as painless as possible. The general process for doing so is:\n\n* re-export the tidier generics\n* implement tidying methods\n* document the new methods\n\nIn this article, we'll walk through each of the above steps in detail, giving examples and pointing out helpful functions when possible.\n\n## Re-export the tidier generics\n\nThe first step is to re-export the generic functions for `tidy()`, `glance()`, and/or `augment()`. You could do so from `broom` itself, but we've provided an alternative, much lighter dependency called `generics`.\n\nFirst you'll need to add the [generics](https://github.com/r-lib/generics) package to `Imports`. We recommend using the [usethis](https://github.com/r-lib/usethis) package for this:\n\n```{r}\n#| eval: false\nusethis::use_package(\"generics\", \"Imports\")\n```\n\nNext, you'll need to re-export the appropriate tidying methods. If you plan to implement a `glance()` method, for example, you can re-export the `glance()` generic by adding the following somewhere inside the `/R` folder of your package:\n\n```{r}\n#| eval: false\n#' @importFrom generics glance\n#' @export\ngenerics::glance\n```\n\nOftentimes it doesn't make sense to define one or more of these methods for a particular model. In this case, only implement the methods that do make sense.\n\n::: {.callout-warning}\n Please do not define `tidy()`, `glance()`, or `augment()` generics in your package. This will result in namespace conflicts whenever your package is used along other packages that also export tidying methods. \n:::\n\n## Implement tidying methods\n\nYou'll now need to implement specific tidying methods for each of the generics you've re-exported in the above step. For each of `tidy()`, `glance()`, and `augment()`, we'll walk through the big picture, an example, and helpful resources.\n\nIn this article, we'll use the base R dataset `trees`, giving the tree girth (in inches), height (in feet), and volume (in cubic feet), to fit an example linear model using the base R `lm()` function. \n\n```{r}\n# load in the trees dataset\ndata(trees)\n\n# take a look!\nstr(trees)\n\n# fit the timber volume as a function of girth and height\ntrees_model <- lm(Volume ~ Girth + Height, data = trees)\n```\n\nLet's take a look at the `summary()` of our `trees_model` fit.\n\n```{r}\nsummary(trees_model)\n```\n\nThis output gives some summary statistics on the residuals (which would be described more fully in an `augment()` output), model coefficients (which, in this case, make up the `tidy()` output), and some model-level summarizations such as RSE, $R^2$, etc. (which make up the `glance()` output.)\n\n### Implementing the `tidy()` method\n\nThe `tidy(x, ...)` method will return a tibble where each row contains information about a component of the model. The `x` input is a model object, and the dots (`...`) are an optional argument to supply additional information to any calls inside your method. New `tidy()` methods can take additional arguments, but _must_ include the `x` and `...` arguments to be compatible with the generic function. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).) Examples of model components include regression coefficients (for regression models), clusters (for classification/clustering models), etc. These `tidy()` methods are useful for inspecting model details and creating custom model visualizations.\n\nReturning to the example of our linear model on timber volume, we'd like to extract information on the model components. In this example, the components are the regression coefficients. After taking a look at the model object and its `summary()`, you might notice that you can extract the regression coefficients as follows:\n\n```{r}\nsummary(trees_model)$coefficients\n```\n\nThis object contains the model coefficients as a table, where the information giving which coefficient is being described in each row is given in the row names. Converting to a tibble where the row names are contained in a column, you might write:\n\n```{r}\ntrees_model_tidy <- summary(trees_model)$coefficients %>% \n as_tibble(rownames = \"term\")\n\ntrees_model_tidy\n```\n\nThe broom package standardizes common column names used to describe coefficients. In this case, the column names are:\n\n```{r}\ncolnames(trees_model_tidy) <- c(\"term\", \"estimate\", \"std.error\", \"statistic\", \"p.value\")\n```\n\nA glossary giving the currently acceptable column names outputted by `tidy()` methods can be found [at the end of this article](#glossary). As a rule of thumb, column names resulting from `tidy()` methods should be all lowercase and contain only alphanumerics or periods (though there are plenty of exceptions).\n\nFinally, it is common for `tidy()` methods to include an option to calculate confidence/credible intervals for each component based on the model, when possible. In this example, the `confint()` function can be used to calculate confidence intervals from a model object resulting from `lm()`:\n\n```{r}\nconfint(trees_model)\n```\n\nWith these considerations in mind, a reasonable `tidy()` method for `lm()` might look something like:\n\n```{r}\n#| eval: false\ntidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {\n \n result <- summary(x)$coefficients %>%\n tibble::as_tibble(rownames = \"term\") %>%\n dplyr::rename(estimate = Estimate,\n std.error = `Std. Error`,\n statistic = `t value`,\n p.value = `Pr(>|t|)`)\n \n if (conf.int) {\n ci <- confint(x, level = conf.level)\n result <- dplyr::left_join(result, ci, by = \"term\")\n }\n \n result\n}\n```\n\n::: {.callout-note}\n If you're interested, the actual `tidy.lm()` source can be found [here](https://github.com/tidymodels/broom/blob/master/R/stats-lm-tidiers.R)! It's not too different from the version above except for some argument checking and additional columns. \n:::\n\nWith this method exported, then, if a user calls `tidy(fit)`, where `fit` is an output from `lm()`, the `tidy()` generic would \"redirect\" the call to the `tidy.lm()` function above.\n\nSome things to keep in mind while writing your `tidy()` method:\n\n* Sometimes a model will have several different types of components. For example, in mixed models, there is different information associated with fixed effects and random effects. Since this information doesn't have the same interpretation, it doesn't make sense to summarize the fixed and random effects in the same table. In cases like this you should add an argument that allows the user to specify which type of information they want. For example, you might implement an interface along the lines of:\n\n```{r}\n#| eval: false\nmodel <- mixed_model(...)\ntidy(model, effects = \"fixed\")\ntidy(model, effects = \"random\")\n```\n\n* How are missing values encoded in the model object and its `summary()`? Ensure that rows are included even when the associated model component is missing or rank deficient.\n* Are there other measures specific to each component that could reasonably be expected to be included in their summarizations? Some common arguments to `tidy()` methods include:\n - `conf.int`: A logical indicating whether or not to calculate confidence/credible intervals. This should default to `FALSE`.\n - `conf.level`: The confidence level to use for the interval when `conf.int = TRUE`. Typically defaults to `.95`.\n - `exponentiate`: A logical indicating whether or not model terms should be presented on an exponential scale (typical for logistic regression).\n\n### Implementing the `glance()` method\n\n`glance()` returns a one-row tibble providing model-level summarizations (e.g. goodness of fit measures and related statistics). This is useful to check for model misspecification and to compare many models. Again, the `x` input is a model object, and the `...` is an optional argument to supply additional information to any calls inside your method. New `glance()` methods can also take additional arguments and _must_ include the `x` and `...` arguments. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).)\n\nReturning to the `trees_model` example, we could pull out the $R^2$ value with the following code:\n\n```{r}\nsummary(trees_model)$r.squared\n```\n\nSimilarly, for the adjusted $R^2$:\n\n```{r}\nsummary(trees_model)$adj.r.squared\n```\n\nUnfortunately, for many model objects, the extraction of model-level information is largely a manual process. You will likely need to build a `tibble()` element-by-element by subsetting the `summary()` object repeatedly. The `with()` function, however, can help make this process a bit less tedious by evaluating expressions inside of the `summary(trees_model)` environment. To grab those those same two model elements from above using `with()`:\n\n```{r}\nwith(summary(trees_model),\n tibble::tibble(r.squared = r.squared,\n adj.r.squared = adj.r.squared))\n```\n\nA reasonable `glance()` method for `lm()`, then, might look something like:\n\n```{r}\n#| eval: false\nglance.lm <- function(x, ...) {\n with(\n summary(x),\n tibble::tibble(\n r.squared = r.squared,\n adj.r.squared = adj.r.squared,\n sigma = sigma,\n statistic = fstatistic[\"value\"],\n p.value = pf(\n fstatistic[\"value\"],\n fstatistic[\"numdf\"],\n fstatistic[\"dendf\"],\n lower.tail = FALSE\n ),\n df = fstatistic[\"numdf\"],\n logLik = as.numeric(stats::logLik(x)),\n AIC = stats::AIC(x),\n BIC = stats::BIC(x),\n deviance = stats::deviance(x),\n df.residual = df.residual(x),\n nobs = stats::nobs(x)\n )\n )\n}\n```\n\n::: {.callout-note}\nThis is the actual definition of `glance.lm()` provided by broom! \n:::\n\nSome things to keep in mind while writing `glance()` methods:\n* Output should not include the name of the modeling function or any arguments given to the modeling function.\n* In some cases, you may wish to provide model-level diagnostics not returned by the original object. For example, the above `glance.lm()` calculates `AIC` and `BIC` from the model fit. If these are easy to compute, feel free to add them. However, tidier methods are generally not an appropriate place to implement complex or time consuming calculations.\n* The `glance` method should always return the same columns in the same order when given an object of a given model class. If a summary metric (such as `AIC`) is not defined in certain circumstances, use `NA`.\n\n### Implementing the `augment()` method\n\n`augment()` methods add columns to a dataset containing information such as fitted values, residuals or cluster assignments. All columns added to a dataset have a `.` prefix to prevent existing columns from being overwritten. (Currently acceptable column names are given in [the glossary](#glossary).) The `x` and `...` arguments share their meaning with the two functions described above. `augment` methods also optionally accept a `data` argument that is a `data.frame` (or `tibble`) to add observation-level information to, returning a `tibble` object with the same number of rows as `data`. Many `augment()` methods also accept a `newdata` argument, following the same conventions as the `data` argument, except with the underlying assumption that the model has not \"seen\" the data yet. As a result, `newdata` arguments need not contain the response columns in `data`. Only one of `data` or `newdata` should be supplied. A full glossary of acceptable arguments to `augment()` methods can be found at [the end of this article](#glossary).\n\nIf a `data` argument is not specified, `augment()` should try to reconstruct the original data as much as possible from the model object. This may not always be possible, and often it will not be possible to recover columns not used by the model.\n\nWith this is mind, we can look back to our `trees_model` example. For one, the `model` element inside of the `trees_model` object will allow us to recover the original data:\n\n```{r}\n#| rows.print = 5\ntrees_model$model\n```\n\nSimilarly, the fitted values and residuals can be accessed with the following code:\n\n```{r}\nhead(trees_model$fitted.values)\nhead(trees_model$residuals)\n```\n\nAs with `glance()` methods, it's fine (and encouraged!) to include common metrics associated with observations if they are not computationally intensive to compute. A common metric associated with linear models, for example, is the standard error of fitted values:\n\n```{r}\nse.fit <- predict(trees_model, newdata = trees, se.fit = TRUE)$se.fit %>%\n unname()\n\nhead(se.fit)\n```\n\nThus, a reasonable `augment()` method for `lm` might look something like this:\n\n```{r}\naugment.lm <- function(x, data = x$model, newdata = NULL, ...) {\n if (is.null(newdata)) {\n dplyr::bind_cols(tibble::as_tibble(data),\n tibble::tibble(.fitted = x$fitted.values,\n .se.fit = predict(x, \n newdata = data, \n se.fit = TRUE)$se.fit,\n .resid = x$residuals))\n } else {\n predictions <- predict(x, newdata = newdata, se.fit = TRUE)\n dplyr::bind_cols(tibble::as_tibble(newdata),\n tibble::tibble(.fitted = predictions$fit,\n .se.fit = predictions$se.fit))\n }\n}\n```\n\nSome other things to keep in mind while writing `augment()` methods:\n* The `newdata` argument should default to `NULL`. Users should only ever specify one of `data` or `newdata`. Providing both `data` and `newdata` should result in an error. The `newdata` argument should accept both `data.frame`s and `tibble`s.\n* Data given to the `data` argument must have both the original predictors and the original response. Data given to the `newdata` argument only needs to have the original predictors. This is important because there may be important information associated with training data that is not associated with test data. This means that the `original_data` object in `augment(model, data = original_data)` should provide `.fitted` and `.resid` columns (in most cases), whereas `test_data` in `augment(model, data = test_data)` only needs a `.fitted` column, even if the response is present in `test_data`.\n* If the `data` or `newdata` is specified as a `data.frame` with rownames, `augment` should return them in a column called `.rownames`.\n* For observations where no fitted values or summaries are available (where there's missing data, for example), return `NA`.\n* *The `augment()` method should always return as many rows as were in `data` or `newdata`*, depending on which is supplied\n\n::: {.callout-note}\nThe recommended interface and functionality for `augment()` methods may change soon. \n:::\n\n## Document the new methods\n\nThe only remaining step is to integrate the new methods into the parent package! To do so, just drop the methods into a `.R` file inside of the `/R` folder and document them using roxygen2. If you're unfamiliar with the process of documenting objects, you can read more about it [here](http://r-pkgs.had.co.nz/man.html). Here's an example of how our `tidy.lm()` method might be documented:\n\n```{r}\n#| eval: false\n#' Tidy a(n) lm object\n#'\n#' @param x A `lm` object.\n#' @param conf.int Logical indicating whether or not to include \n#' a confidence interval in the tidied output. Defaults to FALSE.\n#' @param conf.level The confidence level to use for the confidence \n#' interval if conf.int = TRUE. Must be strictly greater than 0 \n#' and less than 1. Defaults to 0.95, which corresponds to a \n#' 95 percent confidence interval.\n#' @param ... Unused, included for generic consistency only.\n#' @return A tidy [tibble::tibble()] summarizing component-level\n#' information about the model\n#'\n#' @examples\n#' # load the trees dataset\n#' data(trees)\n#' \n#' # fit a linear model on timber volume\n#' trees_model <- lm(Volume ~ Girth + Height, data = trees)\n#'\n#' # summarize model coefficients in a tidy tibble!\n#' tidy(trees_model)\n#'\n#' @export\ntidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {\n\n # ... the rest of the function definition goes here!\n```\n\nOnce you've documented each of your new methods and executed `devtools::document()`, you're done! Congrats on implementing your own broom tidier methods for a new model object!\n\n## Glossaries\n\n\n```{r}\n#| include: false\n# grab the argument glossary: make a temporary file, write the\n# data to it, load it, and then delete it\nargs_url <- \"https://github.com/alexpghayes/modeltests/blob/master/data/argument_glossary.rda?raw=true\"\nargs_file <- tempfile()\nargs_get <- httr::GET(args_url)\nhttr::stop_for_status(args_get)\nwriteBin(httr::content(args_get, type = \"raw\"), args_file)\nload(args_file)\nunlink(args_file)\n\n# do the same thing for the columns\ncols_url <- \"https://github.com/alexpghayes/modeltests/blob/master/data/column_glossary.rda?raw=true\"\ncols_file <- tempfile()\ncols_get <- httr::GET(cols_url)\nhttr::stop_for_status(cols_get)\nwriteBin(httr::content(cols_get, type = \"raw\"), cols_file)\nload(cols_file)\nunlink(cols_file)\n```\n\n### Arguments\n\nTidier methods have a standardized set of acceptable argument and output column names. The currently acceptable argument names by tidier method are:\n\n```{r}\n#| echo: false\nargument_glossary %>%\n select(Method = method, Argument = argument) %>% \n mutate(Method = as.factor(Method)) %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 5))\n```\n\n### Column Names\n\nThe currently acceptable column names by tidier method are:\n\n```{r}\n#| echo: false\ncolumn_glossary %>%\n select(Method = method, Column = column) %>% \n mutate(Method = as.factor(Method)) %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 5))\n```\n\nThe [alexpghayes/modeltests](https://github.com/alexpghayes/modeltests) package provides unit testing infrastructure to check your new tidier methods. Please file an issue there to request new arguments/columns to be added to the glossaries!\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(tidyverse)\nlibrary(generics)\nlibrary(DT)\npkgs <- c(\"tidymodels\", \"tidyverse\", \"generics\", \"usethis\")\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nThe broom package provides tools to summarize key information about models in tidy `tibble()`s. The package provides three verbs, or \"tidiers,\" to help make model objects easier to work with:\n\n* `tidy()` summarizes information about model components\n* `glance()` reports information about the entire model\n* `augment()` adds information about observations to a dataset\n\nEach of the three verbs above are _generic_, in that they do not define a procedure to tidy a given model object, but instead redirect to the relevant _method_ implemented to tidy a specific type of model object. The broom package provides methods for model objects from over 100 modeling packages along with nearly all of the model objects in the stats package that comes with base R. However, for maintainability purposes, the broom package authors now ask that requests for new methods be first directed to the parent package (i.e. the package that supplies the model object) rather than to broom. New methods will generally only be integrated into broom in the case that the requester has already asked the maintainers of the model-owning package to implement tidier methods in the parent package.\n\nWe'd like to make implementing external tidier methods as painless as possible. The general process for doing so is:\n\n* re-export the tidier generics\n* implement tidying methods\n* document the new methods\n\nIn this article, we'll walk through each of the above steps in detail, giving examples and pointing out helpful functions when possible.\n\n## Re-export the tidier generics\n\nThe first step is to re-export the generic functions for `tidy()`, `glance()`, and/or `augment()`. You could do so from `broom` itself, but we've provided an alternative, much lighter dependency called `generics`.\n\nFirst you'll need to add the [generics](https://github.com/r-lib/generics) package to `Imports`. We recommend using the [usethis](https://github.com/r-lib/usethis) package for this:\n\n```{r}\n#| eval: false\nusethis::use_package(\"generics\", \"Imports\")\n```\n\nNext, you'll need to re-export the appropriate tidying methods. If you plan to implement a `glance()` method, for example, you can re-export the `glance()` generic by adding the following somewhere inside the `/R` folder of your package:\n\n```{r}\n#| eval: false\n#' @importFrom generics glance\n#' @export\ngenerics::glance\n```\n\nOftentimes it doesn't make sense to define one or more of these methods for a particular model. In this case, only implement the methods that do make sense.\n\n::: {.callout-warning}\n Please do not define `tidy()`, `glance()`, or `augment()` generics in your package. This will result in namespace conflicts whenever your package is used along other packages that also export tidying methods. \n:::\n\n## Implement tidying methods\n\nYou'll now need to implement specific tidying methods for each of the generics you've re-exported in the above step. For each of `tidy()`, `glance()`, and `augment()`, we'll walk through the big picture, an example, and helpful resources.\n\nIn this article, we'll use the base R dataset `trees`, giving the tree girth (in inches), height (in feet), and volume (in cubic feet), to fit an example linear model using the base R `lm()` function. \n\n```{r}\n# load in the trees dataset\ndata(trees)\n\n# take a look!\nstr(trees)\n\n# fit the timber volume as a function of girth and height\ntrees_model <- lm(Volume ~ Girth + Height, data = trees)\n```\n\nLet's take a look at the `summary()` of our `trees_model` fit.\n\n```{r}\nsummary(trees_model)\n```\n\nThis output gives some summary statistics on the residuals (which would be described more fully in an `augment()` output), model coefficients (which, in this case, make up the `tidy()` output), and some model-level summarizations such as RSE, $R^2$, etc. (which make up the `glance()` output.)\n\n### Implementing the `tidy()` method\n\nThe `tidy(x, ...)` method will return a tibble where each row contains information about a component of the model. The `x` input is a model object, and the dots (`...`) are an optional argument to supply additional information to any calls inside your method. New `tidy()` methods can take additional arguments, but _must_ include the `x` and `...` arguments to be compatible with the generic function. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).) Examples of model components include regression coefficients (for regression models), clusters (for classification/clustering models), etc. These `tidy()` methods are useful for inspecting model details and creating custom model visualizations.\n\nReturning to the example of our linear model on timber volume, we'd like to extract information on the model components. In this example, the components are the regression coefficients. After taking a look at the model object and its `summary()`, you might notice that you can extract the regression coefficients as follows:\n\n```{r}\nsummary(trees_model)$coefficients\n```\n\nThis object contains the model coefficients as a table, where the information giving which coefficient is being described in each row is given in the row names. Converting to a tibble where the row names are contained in a column, you might write:\n\n```{r}\ntrees_model_tidy <- summary(trees_model)$coefficients %>% \n as_tibble(rownames = \"term\")\n\ntrees_model_tidy\n```\n\nThe broom package standardizes common column names used to describe coefficients. In this case, the column names are:\n\n```{r}\ncolnames(trees_model_tidy) <- c(\"term\", \"estimate\", \"std.error\", \"statistic\", \"p.value\")\n```\n\nA glossary giving the currently acceptable column names outputted by `tidy()` methods can be found [at the end of this article](#glossary). As a rule of thumb, column names resulting from `tidy()` methods should be all lowercase and contain only alphanumerics or periods (though there are plenty of exceptions).\n\nFinally, it is common for `tidy()` methods to include an option to calculate confidence/credible intervals for each component based on the model, when possible. In this example, the `confint()` function can be used to calculate confidence intervals from a model object resulting from `lm()`:\n\n```{r}\nconfint(trees_model)\n```\n\nWith these considerations in mind, a reasonable `tidy()` method for `lm()` might look something like:\n\n```{r}\n#| eval: false\ntidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {\n \n result <- summary(x)$coefficients %>%\n tibble::as_tibble(rownames = \"term\") %>%\n dplyr::rename(estimate = Estimate,\n std.error = `Std. Error`,\n statistic = `t value`,\n p.value = `Pr(>|t|)`)\n \n if (conf.int) {\n ci <- confint(x, level = conf.level)\n result <- dplyr::left_join(result, ci, by = \"term\")\n }\n \n result\n}\n```\n\n::: {.callout-note}\n If you're interested, the actual `tidy.lm()` source can be found [here](https://github.com/tidymodels/broom/blob/master/R/stats-lm-tidiers.R)! It's not too different from the version above except for some argument checking and additional columns. \n:::\n\nWith this method exported, then, if a user calls `tidy(fit)`, where `fit` is an output from `lm()`, the `tidy()` generic would \"redirect\" the call to the `tidy.lm()` function above.\n\nSome things to keep in mind while writing your `tidy()` method:\n\n* Sometimes a model will have several different types of components. For example, in mixed models, there is different information associated with fixed effects and random effects. Since this information doesn't have the same interpretation, it doesn't make sense to summarize the fixed and random effects in the same table. In cases like this you should add an argument that allows the user to specify which type of information they want. For example, you might implement an interface along the lines of:\n\n```{r}\n#| eval: false\nmodel <- mixed_model(...)\ntidy(model, effects = \"fixed\")\ntidy(model, effects = \"random\")\n```\n\n* How are missing values encoded in the model object and its `summary()`? Ensure that rows are included even when the associated model component is missing or rank deficient.\n* Are there other measures specific to each component that could reasonably be expected to be included in their summarizations? Some common arguments to `tidy()` methods include:\n - `conf.int`: A logical indicating whether or not to calculate confidence/credible intervals. This should default to `FALSE`.\n - `conf.level`: The confidence level to use for the interval when `conf.int = TRUE`. Typically defaults to `.95`.\n - `exponentiate`: A logical indicating whether or not model terms should be presented on an exponential scale (typical for logistic regression).\n\n### Implementing the `glance()` method\n\n`glance()` returns a one-row tibble providing model-level summarizations (e.g. goodness of fit measures and related statistics). This is useful to check for model misspecification and to compare many models. Again, the `x` input is a model object, and the `...` is an optional argument to supply additional information to any calls inside your method. New `glance()` methods can also take additional arguments and _must_ include the `x` and `...` arguments. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).)\n\nReturning to the `trees_model` example, we could pull out the $R^2$ value with the following code:\n\n```{r}\nsummary(trees_model)$r.squared\n```\n\nSimilarly, for the adjusted $R^2$:\n\n```{r}\nsummary(trees_model)$adj.r.squared\n```\n\nUnfortunately, for many model objects, the extraction of model-level information is largely a manual process. You will likely need to build a `tibble()` element-by-element by subsetting the `summary()` object repeatedly. The `with()` function, however, can help make this process a bit less tedious by evaluating expressions inside of the `summary(trees_model)` environment. To grab those those same two model elements from above using `with()`:\n\n```{r}\nwith(summary(trees_model),\n tibble::tibble(r.squared = r.squared,\n adj.r.squared = adj.r.squared))\n```\n\nA reasonable `glance()` method for `lm()`, then, might look something like:\n\n```{r}\n#| eval: false\nglance.lm <- function(x, ...) {\n with(\n summary(x),\n tibble::tibble(\n r.squared = r.squared,\n adj.r.squared = adj.r.squared,\n sigma = sigma,\n statistic = fstatistic[\"value\"],\n p.value = pf(\n fstatistic[\"value\"],\n fstatistic[\"numdf\"],\n fstatistic[\"dendf\"],\n lower.tail = FALSE\n ),\n df = fstatistic[\"numdf\"],\n logLik = as.numeric(stats::logLik(x)),\n AIC = stats::AIC(x),\n BIC = stats::BIC(x),\n deviance = stats::deviance(x),\n df.residual = df.residual(x),\n nobs = stats::nobs(x)\n )\n )\n}\n```\n\n::: {.callout-note}\nThis is the actual definition of `glance.lm()` provided by broom! \n:::\n\nSome things to keep in mind while writing `glance()` methods:\n* Output should not include the name of the modeling function or any arguments given to the modeling function.\n* In some cases, you may wish to provide model-level diagnostics not returned by the original object. For example, the above `glance.lm()` calculates `AIC` and `BIC` from the model fit. If these are easy to compute, feel free to add them. However, tidier methods are generally not an appropriate place to implement complex or time consuming calculations.\n* The `glance` method should always return the same columns in the same order when given an object of a given model class. If a summary metric (such as `AIC`) is not defined in certain circumstances, use `NA`.\n\n### Implementing the `augment()` method\n\n`augment()` methods add columns to a dataset containing information such as fitted values, residuals or cluster assignments. All columns added to a dataset have a `.` prefix to prevent existing columns from being overwritten. (Currently acceptable column names are given in [the glossary](#glossary).) The `x` and `...` arguments share their meaning with the two functions described above. `augment` methods also optionally accept a `data` argument that is a `data.frame` (or `tibble`) to add observation-level information to, returning a `tibble` object with the same number of rows as `data`. Many `augment()` methods also accept a `newdata` argument, following the same conventions as the `data` argument, except with the underlying assumption that the model has not \"seen\" the data yet. As a result, `newdata` arguments need not contain the response columns in `data`. Only one of `data` or `newdata` should be supplied. A full glossary of acceptable arguments to `augment()` methods can be found at [the end of this article](#glossary).\n\nIf a `data` argument is not specified, `augment()` should try to reconstruct the original data as much as possible from the model object. This may not always be possible, and often it will not be possible to recover columns not used by the model.\n\nWith this is mind, we can look back to our `trees_model` example. For one, the `model` element inside of the `trees_model` object will allow us to recover the original data:\n\n```{r}\n#| rows.print = 5\ntrees_model$model\n```\n\nSimilarly, the fitted values and residuals can be accessed with the following code:\n\n```{r}\nhead(trees_model$fitted.values)\nhead(trees_model$residuals)\n```\n\nAs with `glance()` methods, it's fine (and encouraged!) to include common metrics associated with observations if they are not computationally intensive to compute. A common metric associated with linear models, for example, is the standard error of fitted values:\n\n```{r}\nse.fit <- predict(trees_model, newdata = trees, se.fit = TRUE)$se.fit %>%\n unname()\n\nhead(se.fit)\n```\n\nThus, a reasonable `augment()` method for `lm` might look something like this:\n\n```{r}\naugment.lm <- function(x, data = x$model, newdata = NULL, ...) {\n if (is.null(newdata)) {\n dplyr::bind_cols(tibble::as_tibble(data),\n tibble::tibble(.fitted = x$fitted.values,\n .se.fit = predict(x, \n newdata = data, \n se.fit = TRUE)$se.fit,\n .resid = x$residuals))\n } else {\n predictions <- predict(x, newdata = newdata, se.fit = TRUE)\n dplyr::bind_cols(tibble::as_tibble(newdata),\n tibble::tibble(.fitted = predictions$fit,\n .se.fit = predictions$se.fit))\n }\n}\n```\n\nSome other things to keep in mind while writing `augment()` methods:\n* The `newdata` argument should default to `NULL`. Users should only ever specify one of `data` or `newdata`. Providing both `data` and `newdata` should result in an error. The `newdata` argument should accept both `data.frame`s and `tibble`s.\n* Data given to the `data` argument must have both the original predictors and the original response. Data given to the `newdata` argument only needs to have the original predictors. This is important because there may be important information associated with training data that is not associated with test data. This means that the `original_data` object in `augment(model, data = original_data)` should provide `.fitted` and `.resid` columns (in most cases), whereas `test_data` in `augment(model, data = test_data)` only needs a `.fitted` column, even if the response is present in `test_data`.\n* If the `data` or `newdata` is specified as a `data.frame` with rownames, `augment` should return them in a column called `.rownames`.\n* For observations where no fitted values or summaries are available (where there's missing data, for example), return `NA`.\n* *The `augment()` method should always return as many rows as were in `data` or `newdata`*, depending on which is supplied\n\n::: {.callout-note}\nThe recommended interface and functionality for `augment()` methods may change soon. \n:::\n\n## Document the new methods\n\nThe only remaining step is to integrate the new methods into the parent package! To do so, just drop the methods into a `.R` file inside of the `/R` folder and document them using roxygen2. If you're unfamiliar with the process of documenting objects, you can read more about it [here](http://r-pkgs.had.co.nz/man.html). Here's an example of how our `tidy.lm()` method might be documented:\n\n```{r}\n#| eval: false\n#' Tidy a(n) lm object\n#'\n#' @param x A `lm` object.\n#' @param conf.int Logical indicating whether or not to include \n#' a confidence interval in the tidied output. Defaults to FALSE.\n#' @param conf.level The confidence level to use for the confidence \n#' interval if conf.int = TRUE. Must be strictly greater than 0 \n#' and less than 1. Defaults to 0.95, which corresponds to a \n#' 95 percent confidence interval.\n#' @param ... Unused, included for generic consistency only.\n#' @return A tidy [tibble::tibble()] summarizing component-level\n#' information about the model\n#'\n#' @examples\n#' # load the trees dataset\n#' data(trees)\n#' \n#' # fit a linear model on timber volume\n#' trees_model <- lm(Volume ~ Girth + Height, data = trees)\n#'\n#' # summarize model coefficients in a tidy tibble!\n#' tidy(trees_model)\n#'\n#' @export\ntidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {\n\n # ... the rest of the function definition goes here!\n```\n\nOnce you've documented each of your new methods and executed `devtools::document()`, you're done! Congrats on implementing your own broom tidier methods for a new model object!\n\n## Glossaries\n\n\n```{r}\n#| include: false\n# grab the argument glossary: make a temporary file, write the\n# data to it, load it, and then delete it\nargs_url <- \"https://github.com/alexpghayes/modeltests/blob/master/data/argument_glossary.rda?raw=true\"\nargs_file <- tempfile()\nargs_get <- httr::GET(args_url)\nhttr::stop_for_status(args_get)\nwriteBin(httr::content(args_get, type = \"raw\"), args_file)\nload(args_file)\nunlink(args_file)\n\n# do the same thing for the columns\ncols_url <- \"https://github.com/alexpghayes/modeltests/blob/master/data/column_glossary.rda?raw=true\"\ncols_file <- tempfile()\ncols_get <- httr::GET(cols_url)\nhttr::stop_for_status(cols_get)\nwriteBin(httr::content(cols_get, type = \"raw\"), cols_file)\nload(cols_file)\nunlink(cols_file)\n```\n\n### Arguments\n\nTidier methods have a standardized set of acceptable argument and output column names. The currently acceptable argument names by tidier method are:\n\n```{r}\n#| echo: false\nargument_glossary %>%\n select(Method = method, Argument = argument) %>% \n mutate(Method = as.factor(Method)) %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 5))\n```\n\n### Column Names\n\nThe currently acceptable column names by tidier method are:\n\n```{r}\n#| echo: false\ncolumn_glossary %>%\n select(Method = method, Column = column) %>% \n mutate(Method = as.factor(Method)) %>% \n datatable(rownames = FALSE,\n class = 'cell-border stripe',\n filter = 'top',\n escape = FALSE,\n options = list(pageLength = 5))\n```\n\nThe [alexpghayes/modeltests](https://github.com/alexpghayes/modeltests) package provides unit testing infrastructure to check your new tidier methods. Please file an issue there to request new arguments/columns to be added to the glossaries!\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Create your own broom tidier methods","categories":["developer tools"],"type":"learn-subsection","weight":5,"description":"Write tidy(), glance(), and augment() methods for new model objects.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/develop/metrics/index.qmd.json b/.quarto/idx/learn/develop/metrics/index.qmd.json new file mode 100644 index 00000000..14200cd9 --- /dev/null +++ b/.quarto/idx/learn/develop/metrics/index.qmd.json @@ -0,0 +1 @@ +{"title":"Custom performance metrics","markdown":{"yaml":{"title":"Custom performance metrics","categories":["developer tools"],"type":"learn-subsection","weight":3,"description":"Create a new performance metric and integrate it with yardstick functions.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(rlang)\n\npkgs <- c(\"tidymodels\", \"rlang\")\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nThe [yardstick](https://yardstick.tidymodels.org/) package already includes a large number of metrics, but there's obviously a chance that you might have a custom metric that hasn't been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics.\n\nWhy create custom metrics? With the infrastructure yardstick provides, you get:\n\n- Standardization between your metric and other preexisting metrics\n- Automatic error handling for types and lengths\n- Automatic selection of binary / multiclass metric implementations\n- Automatic `NA` handling\n- Support for grouped data frames\n- Support for use alongside other metrics in `metric_set()`\n\nThe implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of `roc_auc()` after reading this vignette if you want to work on a class probability metric.\n\n## Numeric example: MSE\n\nMean squared error (sometimes MSE or from here on, `mse()`) is a numeric metric that measures the average of the squared errors. Numeric metrics are generally the simplest to create with yardstick, as they do not have multiclass implementations. The formula for `mse()` is:\n\n$$ MSE = \\frac{1}{N} \\sum_{i=1}^{N} (truth_i - estimate_i) ^ 2 = mean( (truth - estimate) ^ 2) $$\n\nAll metrics should have a data frame version, and a vector version. The data frame version here will be named `mse()`, and the vector version will be `mse_vec()`.\n\n### Vector implementation\n\nTo start, create the vector version. Generally, all metrics have the same arguments unless the metric requires an extra parameter (such as `beta` in `f_meas()`). To create the vector function, you need to do two things:\n\n1) Create an internal implementation function, `mse_impl()`.\n2) Pass on that implementation function to `metric_vec_template()`.\n\nBelow, `mse_impl()` contains the actual implementation of the metric, and takes `truth` and `estimate` as arguments along with any metric specific arguments.\n\nThe yardstick function `metric_vec_template()` accepts the implementation function along with the other arguments to `mse_vec()` and actually executes `mse_impl()`. Additionally, it has a `cls` argument to specify the allowed class type of `truth` and `estimate`. If the classes are the same, a single character class can be passed, and if they are different a character vector of length 2 can be supplied.\n\nThe `metric_vec_template()` helper handles the removal of `NA` values in your metric, so your implementation function does not have to worry about them. It performs type checking using `cls` and also checks that the `estimator` is valid, the second of which is covered in the classification example. This way, all you have to worry about is the core implementation.\n\n```{r}\nlibrary(tidymodels)\n\nmse_vec <- function(truth, estimate, na_rm = TRUE, ...) {\n \n mse_impl <- function(truth, estimate) {\n mean((truth - estimate) ^ 2)\n }\n \n metric_vec_template(\n metric_impl = mse_impl,\n truth = truth, \n estimate = estimate,\n na_rm = na_rm,\n cls = \"numeric\",\n ...\n )\n \n}\n```\n\nAt this point, you've created the vector version of the mean squared error metric.\n\n```{r}\ndata(\"solubility_test\")\n\nmse_vec(\n truth = solubility_test$solubility, \n estimate = solubility_test$prediction\n)\n```\n\nIntelligent error handling is immediately available.\n\n```{r}\n#| error: true\nmse_vec(truth = \"apple\", estimate = 1)\n\nmse_vec(truth = 1, estimate = factor(\"xyz\"))\n```\n\n`NA` values are removed if `na_rm = TRUE` (the default). If `na_rm = FALSE` and any `NA` values are detected, then the metric automatically returns `NA`.\n\n```{r}\n# NA values removed\nmse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5))\n\n# NA returned\nmse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5), na_rm = FALSE)\n```\n\n### Data frame implementation\n\nThe data frame version of the metric should be fairly simple. It is a generic function with a `data.frame` method that calls the yardstick helper, `metric_summarizer()`, and passes along the `mse_vec()` function to it along with versions of `truth` and `estimate` that have been wrapped in `rlang::enquo()` and then unquoted with `!!` so that non-standard evaluation can be supported.\n\n```{r}\nlibrary(rlang)\n\nmse <- function(data, ...) {\n UseMethod(\"mse\")\n}\n\nmse <- new_numeric_metric(mse, direction = \"minimize\")\n\nmse.data.frame <- function(data, truth, estimate, na_rm = TRUE, ...) {\n \n metric_summarizer(\n metric_nm = \"mse\",\n metric_fn = mse_vec,\n data = data,\n truth = !! enquo(truth),\n estimate = !! enquo(estimate), \n na_rm = na_rm,\n ...\n )\n \n}\n```\n\nAnd that's it. The yardstick package handles the rest with an internal call to `summarise()`.\n\n```{r}\n#| error: false\n#| eval: false\nmse(solubility_test, truth = solubility, estimate = prediction)\n\n# Error handling\nmse(solubility_test, truth = solubility, estimate = factor(\"xyz\"))\n```\n\nLet's test it out on a grouped data frame.\n\n```{r}\n#| message: false\nlibrary(dplyr)\n\nset.seed(1234)\nsize <- 100\ntimes <- 10\n\n# create 10 resamples\nsolubility_resampled <- bind_rows(\n replicate(\n n = times,\n expr = sample_n(solubility_test, size, replace = TRUE),\n simplify = FALSE\n ),\n .id = \"resample\"\n)\n\nsolubility_resampled %>%\n group_by(resample) %>%\n mse(solubility, prediction)\n```\n\n## Class example: miss rate\n\nMiss rate is another name for the false negative rate, and is a classification metric in the same family as `sens()` and `spec()`. It follows the formula:\n\n$$ miss\\_rate = \\frac{FN}{FN + TP} $$\n\nThis metric, like other classification metrics, is more easily computed when expressed as a confusion matrix. As you will see in the example, you can achieve this with a call to `base::table(estimate, truth)` which correctly puts the \"correct\" result in the columns of the confusion matrix.\n\nClassification metrics are more complicated than numeric ones because you have to think about extensions to the multiclass case. For now, let's start with the binary case.\n\n### Vector implementation\n\nThe vector implementation for classification metrics initially has the same setup as numeric metrics, but has an additional argument, `estimator` that determines the type of estimator to use (binary or some kind of multiclass implementation or averaging). This argument is auto-selected for the user, so default it to `NULL`. Additionally, pass it along to `metric_vec_template()` so that it can check the provided `estimator` against the classes of `truth` and `estimate` to see if they are allowed.\n\n```{r}\n# Logic for `event_level`\nevent_col <- function(xtab, event_level) {\n if (identical(event_level, \"first\")) {\n colnames(xtab)[[1]]\n } else {\n colnames(xtab)[[2]]\n }\n}\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n estimator <- finalize_estimator(truth, estimator)\n \n miss_rate_impl <- function(truth, estimate) {\n # Create \n xtab <- table(estimate, truth)\n col <- event_col(xtab, event_level)\n col2 <- setdiff(colnames(xtab), col)\n \n tp <- xtab[col, col]\n fn <- xtab[col2, col]\n \n fn / (fn + tp)\n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n```\n\nAnother change from the numeric metric is that a call to `finalize_estimator()` is made. This is the infrastructure that auto-selects the type of estimator to use.\n\n```{r}\ndata(\"two_class_example\")\nmiss_rate_vec(two_class_example$truth, two_class_example$predicted)\n```\n\nWhat happens if you try and pass in a multiclass result?\n\n```{r}\ndata(\"hpc_cv\")\nfold1 <- filter(hpc_cv, Resample == \"Fold01\")\nmiss_rate_vec(fold1$obs, fold1$pred)\n```\n\nThis isn't great, as currently multiclass `miss_rate()` isn't supported and it would have been better to throw an error if the `estimator` was not `\"binary\"`. Currently, `finalize_estimator()` uses its default implementation which selected `\"macro\"` as the `estimator` since `truth` was a factor with more than 2 classes. When we implement multiclass averaging, this is what you want, but if your metric only works with a binary implementation (or has other specialized multiclass versions), you might want to guard against this.\n\nTo fix this, a generic counterpart to `finalize_estimator()`, called `finalize_estimator_internal()`, exists that helps you restrict the input types. If you provide a method to `finalize_estimator_internal()` where the method name is the same as your metric name, and then set the `metric_class` argument in `finalize_estimator()` to be the same thing, you can control how the auto-selection of the `estimator` is handled.\n\nDon't worry about the `metric_dispatcher` argument. This is handled for you and just exists as a dummy argument to dispatch off of.\n\nIt is also good practice to call `validate_estimator()` which handles the case where a user passed in the estimator themselves. This validates that the supplied `estimator` is one of the allowed types and error otherwise.\n\n```{r}\n#| error: false\nfinalize_estimator_internal.miss_rate <- function(metric_dispatcher, x, estimator) {\n \n validate_estimator(estimator, estimator_override = \"binary\")\n if (!is.null(estimator)) {\n return(estimator)\n }\n \n lvls <- levels(x)\n if (length(lvls) > 2) {\n stop(\"A multiclass `truth` input was provided, but only `binary` is supported.\")\n } \n \"binary\"\n}\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n # calls finalize_estimator_internal() internally\n estimator <- finalize_estimator(truth, estimator, metric_class = \"miss_rate\")\n \n miss_rate_impl <- function(truth, estimate) {\n # Create \n xtab <- table(estimate, truth)\n col <- event_col(xtab, event_level)\n col2 <- setdiff(colnames(xtab), col)\n \n tp <- xtab[col, col]\n fn <- xtab[col2, col]\n \n fn / (fn + tp)\n \n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\n# Error thrown by our custom handler\n# miss_rate_vec(fold1$obs, fold1$pred)\n\n# Error thrown by validate_estimator()\n# miss_rate_vec(fold1$obs, fold1$pred, estimator = \"macro\")\n```\n\n### Supporting multiclass miss rate\n\nLike many other classification metrics such as `precision()` or `recall()`, miss rate does not have a natural multiclass extension, but one can be created using methods such as macro, weighted macro, and micro averaging. If you have not, I encourage you to read `vignette(\"multiclass\", \"yardstick\")` for more information about how these methods work.\n\nGenerally, they require more effort to get right than the binary case, especially if you want to have a performant version. Luckily, a somewhat standard template is used in yardstick and can be used here as well.\n\nLet's first remove the \"binary\" restriction we created earlier.\n\n```{r}\nrm(finalize_estimator_internal.miss_rate)\n```\n\nThe main changes below are:\n\n- The binary implementation is moved to `miss_rate_binary()`.\n\n- `miss_rate_estimator_impl()` is a helper function for switching between binary and multiclass implementations. It also applies the weighting required for multiclass estimators. It is called from `miss_rate_impl()` and also accepts the `estimator` argument using R's function scoping rules.\n\n- `miss_rate_multiclass()` provides the implementation for the multiclass case. It calculates the true positive and false negative values as vectors with one value per class. For the macro case, it returns a vector of miss rate calculations, and for micro, it first sums the individual pieces and returns a single miss rate calculation. In the macro case, the vector is then weighted appropriately in `miss_rate_estimator_impl()` depending on whether or not it was macro or weighted macro.\n\n```{r}\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n # calls finalize_estimator_internal() internally\n estimator <- finalize_estimator(truth, estimator, metric_class = \"miss_rate\")\n \n miss_rate_impl <- function(truth, estimate) {\n xtab <- table(estimate, truth)\n # Rather than implement the actual method here, we rely on\n # an *_estimator_impl() function that can handle binary\n # and multiclass cases\n miss_rate_estimator_impl(xtab, estimator, event_level)\n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\n\n# This function switches between binary and multiclass implementations\nmiss_rate_estimator_impl <- function(data, estimator, event_level) {\n if(estimator == \"binary\") {\n miss_rate_binary(data, event_level)\n } else {\n # Encapsulates the macro, macro weighted, and micro cases\n wt <- get_weights(data, estimator)\n res <- miss_rate_multiclass(data, estimator)\n weighted.mean(res, wt)\n }\n}\n\n\nmiss_rate_binary <- function(data, event_level) {\n col <- event_col(data, event_level)\n col2 <- setdiff(colnames(data), col)\n \n tp <- data[col, col]\n fn <- data[col2, col]\n \n fn / (fn + tp)\n}\n\nmiss_rate_multiclass <- function(data, estimator) {\n \n # We need tp and fn for all classes individually\n # we can get this by taking advantage of the fact\n # that tp + fn = colSums(data)\n tp <- diag(data)\n tpfn <- colSums(data)\n fn <- tpfn - tp\n \n # If using a micro estimator, we sum the individual\n # pieces before performing the miss rate calculation\n if (estimator == \"micro\") {\n tp <- sum(tp)\n fn <- sum(fn)\n }\n \n # return the vector \n tp / (tp + fn)\n}\n```\n\nFor the macro case, this separation of weighting from the core implementation might seem strange, but there is good reason for it. Some metrics are combinations of other metrics, and it is nice to be able to reuse code when calculating more complex metrics. For example, `f_meas()` is a combination of `recall()` and `precision()`. When calculating a macro averaged `f_meas()`, the weighting must be applied 1 time, at the very end of the calculation. `recall_multiclass()` and `precision_multiclass()` are defined similarly to how `miss_rate_multiclass()` is defined and returns the unweighted vector of calculations. This means we can directly use this in `f_meas()`, and then weight everything once at the end of that calculation.\n\nLet's try it out now:\n\n```{r}\n# two class\nmiss_rate_vec(two_class_example$truth, two_class_example$predicted)\n\n# multiclass\nmiss_rate_vec(fold1$obs, fold1$pred)\n```\n\n#### Data frame implementation\n\nLuckily, the data frame implementation is as simple as the numeric case, we just need to add an extra `estimator` argument and pass that through.\n\n```{r}\nmiss_rate <- function(data, ...) {\n UseMethod(\"miss_rate\")\n}\n\nmiss_rate <- new_class_metric(miss_rate, direction = \"minimize\")\n\nmiss_rate.data.frame <- function(data, \n truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n metric_summarizer(\n metric_nm = \"miss_rate\",\n metric_fn = miss_rate_vec,\n data = data,\n truth = !! enquo(truth),\n estimate = !! enquo(estimate), \n estimator = estimator,\n na_rm = na_rm,\n event_level = event_level,\n ...\n )\n}\n```\n\n```{r}\n#| error: false\n#| eval: false\n# Macro weighted automatically selected\nfold1 %>%\n miss_rate(obs, pred)\n\n# Switch to micro\nfold1 %>%\n miss_rate(obs, pred, estimator = \"micro\")\n\n# Macro weighted by resample\nhpc_cv %>%\n group_by(Resample) %>%\n miss_rate(obs, pred, estimator = \"macro_weighted\")\n\n# Error handling\nmiss_rate(hpc_cv, obs, VF)\n```\n\n## Using custom metrics\n\nThe `metric_set()` function validates that all metric functions are of the same metric type by checking the class of the function. If any metrics are not of the right class, `metric_set()` fails. By using `new_numeric_metric()` and `new_class_metric()` in the above custom metrics, they work out of the box without any additional adjustments.\n\n```{r}\nnumeric_mets <- metric_set(mse, rmse)\n\nnumeric_mets(solubility_test, solubility, prediction)\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(\"yardstick\")\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(rlang)\n\npkgs <- c(\"tidymodels\", \"rlang\")\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nThe [yardstick](https://yardstick.tidymodels.org/) package already includes a large number of metrics, but there's obviously a chance that you might have a custom metric that hasn't been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics.\n\nWhy create custom metrics? With the infrastructure yardstick provides, you get:\n\n- Standardization between your metric and other preexisting metrics\n- Automatic error handling for types and lengths\n- Automatic selection of binary / multiclass metric implementations\n- Automatic `NA` handling\n- Support for grouped data frames\n- Support for use alongside other metrics in `metric_set()`\n\nThe implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of `roc_auc()` after reading this vignette if you want to work on a class probability metric.\n\n## Numeric example: MSE\n\nMean squared error (sometimes MSE or from here on, `mse()`) is a numeric metric that measures the average of the squared errors. Numeric metrics are generally the simplest to create with yardstick, as they do not have multiclass implementations. The formula for `mse()` is:\n\n$$ MSE = \\frac{1}{N} \\sum_{i=1}^{N} (truth_i - estimate_i) ^ 2 = mean( (truth - estimate) ^ 2) $$\n\nAll metrics should have a data frame version, and a vector version. The data frame version here will be named `mse()`, and the vector version will be `mse_vec()`.\n\n### Vector implementation\n\nTo start, create the vector version. Generally, all metrics have the same arguments unless the metric requires an extra parameter (such as `beta` in `f_meas()`). To create the vector function, you need to do two things:\n\n1) Create an internal implementation function, `mse_impl()`.\n2) Pass on that implementation function to `metric_vec_template()`.\n\nBelow, `mse_impl()` contains the actual implementation of the metric, and takes `truth` and `estimate` as arguments along with any metric specific arguments.\n\nThe yardstick function `metric_vec_template()` accepts the implementation function along with the other arguments to `mse_vec()` and actually executes `mse_impl()`. Additionally, it has a `cls` argument to specify the allowed class type of `truth` and `estimate`. If the classes are the same, a single character class can be passed, and if they are different a character vector of length 2 can be supplied.\n\nThe `metric_vec_template()` helper handles the removal of `NA` values in your metric, so your implementation function does not have to worry about them. It performs type checking using `cls` and also checks that the `estimator` is valid, the second of which is covered in the classification example. This way, all you have to worry about is the core implementation.\n\n```{r}\nlibrary(tidymodels)\n\nmse_vec <- function(truth, estimate, na_rm = TRUE, ...) {\n \n mse_impl <- function(truth, estimate) {\n mean((truth - estimate) ^ 2)\n }\n \n metric_vec_template(\n metric_impl = mse_impl,\n truth = truth, \n estimate = estimate,\n na_rm = na_rm,\n cls = \"numeric\",\n ...\n )\n \n}\n```\n\nAt this point, you've created the vector version of the mean squared error metric.\n\n```{r}\ndata(\"solubility_test\")\n\nmse_vec(\n truth = solubility_test$solubility, \n estimate = solubility_test$prediction\n)\n```\n\nIntelligent error handling is immediately available.\n\n```{r}\n#| error: true\nmse_vec(truth = \"apple\", estimate = 1)\n\nmse_vec(truth = 1, estimate = factor(\"xyz\"))\n```\n\n`NA` values are removed if `na_rm = TRUE` (the default). If `na_rm = FALSE` and any `NA` values are detected, then the metric automatically returns `NA`.\n\n```{r}\n# NA values removed\nmse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5))\n\n# NA returned\nmse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5), na_rm = FALSE)\n```\n\n### Data frame implementation\n\nThe data frame version of the metric should be fairly simple. It is a generic function with a `data.frame` method that calls the yardstick helper, `metric_summarizer()`, and passes along the `mse_vec()` function to it along with versions of `truth` and `estimate` that have been wrapped in `rlang::enquo()` and then unquoted with `!!` so that non-standard evaluation can be supported.\n\n```{r}\nlibrary(rlang)\n\nmse <- function(data, ...) {\n UseMethod(\"mse\")\n}\n\nmse <- new_numeric_metric(mse, direction = \"minimize\")\n\nmse.data.frame <- function(data, truth, estimate, na_rm = TRUE, ...) {\n \n metric_summarizer(\n metric_nm = \"mse\",\n metric_fn = mse_vec,\n data = data,\n truth = !! enquo(truth),\n estimate = !! enquo(estimate), \n na_rm = na_rm,\n ...\n )\n \n}\n```\n\nAnd that's it. The yardstick package handles the rest with an internal call to `summarise()`.\n\n```{r}\n#| error: false\n#| eval: false\nmse(solubility_test, truth = solubility, estimate = prediction)\n\n# Error handling\nmse(solubility_test, truth = solubility, estimate = factor(\"xyz\"))\n```\n\nLet's test it out on a grouped data frame.\n\n```{r}\n#| message: false\nlibrary(dplyr)\n\nset.seed(1234)\nsize <- 100\ntimes <- 10\n\n# create 10 resamples\nsolubility_resampled <- bind_rows(\n replicate(\n n = times,\n expr = sample_n(solubility_test, size, replace = TRUE),\n simplify = FALSE\n ),\n .id = \"resample\"\n)\n\nsolubility_resampled %>%\n group_by(resample) %>%\n mse(solubility, prediction)\n```\n\n## Class example: miss rate\n\nMiss rate is another name for the false negative rate, and is a classification metric in the same family as `sens()` and `spec()`. It follows the formula:\n\n$$ miss\\_rate = \\frac{FN}{FN + TP} $$\n\nThis metric, like other classification metrics, is more easily computed when expressed as a confusion matrix. As you will see in the example, you can achieve this with a call to `base::table(estimate, truth)` which correctly puts the \"correct\" result in the columns of the confusion matrix.\n\nClassification metrics are more complicated than numeric ones because you have to think about extensions to the multiclass case. For now, let's start with the binary case.\n\n### Vector implementation\n\nThe vector implementation for classification metrics initially has the same setup as numeric metrics, but has an additional argument, `estimator` that determines the type of estimator to use (binary or some kind of multiclass implementation or averaging). This argument is auto-selected for the user, so default it to `NULL`. Additionally, pass it along to `metric_vec_template()` so that it can check the provided `estimator` against the classes of `truth` and `estimate` to see if they are allowed.\n\n```{r}\n# Logic for `event_level`\nevent_col <- function(xtab, event_level) {\n if (identical(event_level, \"first\")) {\n colnames(xtab)[[1]]\n } else {\n colnames(xtab)[[2]]\n }\n}\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n estimator <- finalize_estimator(truth, estimator)\n \n miss_rate_impl <- function(truth, estimate) {\n # Create \n xtab <- table(estimate, truth)\n col <- event_col(xtab, event_level)\n col2 <- setdiff(colnames(xtab), col)\n \n tp <- xtab[col, col]\n fn <- xtab[col2, col]\n \n fn / (fn + tp)\n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n```\n\nAnother change from the numeric metric is that a call to `finalize_estimator()` is made. This is the infrastructure that auto-selects the type of estimator to use.\n\n```{r}\ndata(\"two_class_example\")\nmiss_rate_vec(two_class_example$truth, two_class_example$predicted)\n```\n\nWhat happens if you try and pass in a multiclass result?\n\n```{r}\ndata(\"hpc_cv\")\nfold1 <- filter(hpc_cv, Resample == \"Fold01\")\nmiss_rate_vec(fold1$obs, fold1$pred)\n```\n\nThis isn't great, as currently multiclass `miss_rate()` isn't supported and it would have been better to throw an error if the `estimator` was not `\"binary\"`. Currently, `finalize_estimator()` uses its default implementation which selected `\"macro\"` as the `estimator` since `truth` was a factor with more than 2 classes. When we implement multiclass averaging, this is what you want, but if your metric only works with a binary implementation (or has other specialized multiclass versions), you might want to guard against this.\n\nTo fix this, a generic counterpart to `finalize_estimator()`, called `finalize_estimator_internal()`, exists that helps you restrict the input types. If you provide a method to `finalize_estimator_internal()` where the method name is the same as your metric name, and then set the `metric_class` argument in `finalize_estimator()` to be the same thing, you can control how the auto-selection of the `estimator` is handled.\n\nDon't worry about the `metric_dispatcher` argument. This is handled for you and just exists as a dummy argument to dispatch off of.\n\nIt is also good practice to call `validate_estimator()` which handles the case where a user passed in the estimator themselves. This validates that the supplied `estimator` is one of the allowed types and error otherwise.\n\n```{r}\n#| error: false\nfinalize_estimator_internal.miss_rate <- function(metric_dispatcher, x, estimator) {\n \n validate_estimator(estimator, estimator_override = \"binary\")\n if (!is.null(estimator)) {\n return(estimator)\n }\n \n lvls <- levels(x)\n if (length(lvls) > 2) {\n stop(\"A multiclass `truth` input was provided, but only `binary` is supported.\")\n } \n \"binary\"\n}\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n # calls finalize_estimator_internal() internally\n estimator <- finalize_estimator(truth, estimator, metric_class = \"miss_rate\")\n \n miss_rate_impl <- function(truth, estimate) {\n # Create \n xtab <- table(estimate, truth)\n col <- event_col(xtab, event_level)\n col2 <- setdiff(colnames(xtab), col)\n \n tp <- xtab[col, col]\n fn <- xtab[col2, col]\n \n fn / (fn + tp)\n \n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\n# Error thrown by our custom handler\n# miss_rate_vec(fold1$obs, fold1$pred)\n\n# Error thrown by validate_estimator()\n# miss_rate_vec(fold1$obs, fold1$pred, estimator = \"macro\")\n```\n\n### Supporting multiclass miss rate\n\nLike many other classification metrics such as `precision()` or `recall()`, miss rate does not have a natural multiclass extension, but one can be created using methods such as macro, weighted macro, and micro averaging. If you have not, I encourage you to read `vignette(\"multiclass\", \"yardstick\")` for more information about how these methods work.\n\nGenerally, they require more effort to get right than the binary case, especially if you want to have a performant version. Luckily, a somewhat standard template is used in yardstick and can be used here as well.\n\nLet's first remove the \"binary\" restriction we created earlier.\n\n```{r}\nrm(finalize_estimator_internal.miss_rate)\n```\n\nThe main changes below are:\n\n- The binary implementation is moved to `miss_rate_binary()`.\n\n- `miss_rate_estimator_impl()` is a helper function for switching between binary and multiclass implementations. It also applies the weighting required for multiclass estimators. It is called from `miss_rate_impl()` and also accepts the `estimator` argument using R's function scoping rules.\n\n- `miss_rate_multiclass()` provides the implementation for the multiclass case. It calculates the true positive and false negative values as vectors with one value per class. For the macro case, it returns a vector of miss rate calculations, and for micro, it first sums the individual pieces and returns a single miss rate calculation. In the macro case, the vector is then weighted appropriately in `miss_rate_estimator_impl()` depending on whether or not it was macro or weighted macro.\n\n```{r}\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n # calls finalize_estimator_internal() internally\n estimator <- finalize_estimator(truth, estimator, metric_class = \"miss_rate\")\n \n miss_rate_impl <- function(truth, estimate) {\n xtab <- table(estimate, truth)\n # Rather than implement the actual method here, we rely on\n # an *_estimator_impl() function that can handle binary\n # and multiclass cases\n miss_rate_estimator_impl(xtab, estimator, event_level)\n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\n\n# This function switches between binary and multiclass implementations\nmiss_rate_estimator_impl <- function(data, estimator, event_level) {\n if(estimator == \"binary\") {\n miss_rate_binary(data, event_level)\n } else {\n # Encapsulates the macro, macro weighted, and micro cases\n wt <- get_weights(data, estimator)\n res <- miss_rate_multiclass(data, estimator)\n weighted.mean(res, wt)\n }\n}\n\n\nmiss_rate_binary <- function(data, event_level) {\n col <- event_col(data, event_level)\n col2 <- setdiff(colnames(data), col)\n \n tp <- data[col, col]\n fn <- data[col2, col]\n \n fn / (fn + tp)\n}\n\nmiss_rate_multiclass <- function(data, estimator) {\n \n # We need tp and fn for all classes individually\n # we can get this by taking advantage of the fact\n # that tp + fn = colSums(data)\n tp <- diag(data)\n tpfn <- colSums(data)\n fn <- tpfn - tp\n \n # If using a micro estimator, we sum the individual\n # pieces before performing the miss rate calculation\n if (estimator == \"micro\") {\n tp <- sum(tp)\n fn <- sum(fn)\n }\n \n # return the vector \n tp / (tp + fn)\n}\n```\n\nFor the macro case, this separation of weighting from the core implementation might seem strange, but there is good reason for it. Some metrics are combinations of other metrics, and it is nice to be able to reuse code when calculating more complex metrics. For example, `f_meas()` is a combination of `recall()` and `precision()`. When calculating a macro averaged `f_meas()`, the weighting must be applied 1 time, at the very end of the calculation. `recall_multiclass()` and `precision_multiclass()` are defined similarly to how `miss_rate_multiclass()` is defined and returns the unweighted vector of calculations. This means we can directly use this in `f_meas()`, and then weight everything once at the end of that calculation.\n\nLet's try it out now:\n\n```{r}\n# two class\nmiss_rate_vec(two_class_example$truth, two_class_example$predicted)\n\n# multiclass\nmiss_rate_vec(fold1$obs, fold1$pred)\n```\n\n#### Data frame implementation\n\nLuckily, the data frame implementation is as simple as the numeric case, we just need to add an extra `estimator` argument and pass that through.\n\n```{r}\nmiss_rate <- function(data, ...) {\n UseMethod(\"miss_rate\")\n}\n\nmiss_rate <- new_class_metric(miss_rate, direction = \"minimize\")\n\nmiss_rate.data.frame <- function(data, \n truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n metric_summarizer(\n metric_nm = \"miss_rate\",\n metric_fn = miss_rate_vec,\n data = data,\n truth = !! enquo(truth),\n estimate = !! enquo(estimate), \n estimator = estimator,\n na_rm = na_rm,\n event_level = event_level,\n ...\n )\n}\n```\n\n```{r}\n#| error: false\n#| eval: false\n# Macro weighted automatically selected\nfold1 %>%\n miss_rate(obs, pred)\n\n# Switch to micro\nfold1 %>%\n miss_rate(obs, pred, estimator = \"micro\")\n\n# Macro weighted by resample\nhpc_cv %>%\n group_by(Resample) %>%\n miss_rate(obs, pred, estimator = \"macro_weighted\")\n\n# Error handling\nmiss_rate(hpc_cv, obs, VF)\n```\n\n## Using custom metrics\n\nThe `metric_set()` function validates that all metric functions are of the same metric type by checking the class of the function. If any metrics are not of the right class, `metric_set()` fails. By using `new_numeric_metric()` and `new_class_metric()` in the above custom metrics, they work out of the box without any additional adjustments.\n\n```{r}\nnumeric_mets <- metric_set(mse, rmse)\n\nnumeric_mets(solubility_test, solubility, prediction)\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(\"yardstick\")\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Custom performance metrics","categories":["developer tools"],"type":"learn-subsection","weight":3,"description":"Create a new performance metric and integrate it with yardstick functions.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/develop/models/index.qmd.json b/.quarto/idx/learn/develop/models/index.qmd.json new file mode 100644 index 00000000..099b6915 --- /dev/null +++ b/.quarto/idx/learn/develop/models/index.qmd.json @@ -0,0 +1 @@ +{"title":"How to build a parsnip model","markdown":{"yaml":{"title":"How to build a parsnip model","categories":["developer tools"],"type":"learn-subsection","weight":2,"description":"Create a parsnip model function from an existing model implementation.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(mda)\nlibrary(tidymodels)\n\npkgs <- c(\"tidymodels\", \"mda\", \"modeldata\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nThe parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this:\n\n * It eliminates a lot of duplicate code.\n * Since the expressions are not evaluated until fitting, it eliminates many package dependencies.\n\nA parsnip model function is itself very general. For example, the `logistic_reg()` function itself doesn't have any model code within it. Instead, each model function is associated with one or more computational _engines_. These might be different R packages or some function in another language (that can be evaluated by R). \n\nThis article describes the process of creating a new model function. Before proceeding, take a minute and read our [guidelines on creating modeling packages](https://tidymodels.github.io/model-implementation-principles/) to understand the general themes and conventions that we use. \n\n## An example model\n\nAs an example, we'll create a function for _mixture discriminant analysis_. There are [a few packages](http://search.r-project.org/cgi-bin/namazu.cgi?query=%22mixture+discriminant%22&max=100&result=normal&sort=score&idxname=functions) that implement this but we'll focus on `mda::mda`:\n\n```{r}\n#| label: \"mda-str\"\nstr(mda::mda)\n```\n\nThe main hyperparameter is the number of subclasses. We'll name our function `discrim_mixture`. \n\n## Aspects of models\n\nBefore proceeding, it helps to to review how parsnip categorizes models:\n\n* The model _type_ is related to the structural aspect of the model. For example, the model type `linear_reg` represents linear models (slopes and intercepts) that model a numeric outcome. Other model types in the package are `nearest_neighbor`, `decision_tree`, and so on. \n\n* Within a model type is the _mode_, related to the modeling goal. Currently the two modes in the package are regression and classification. Some models have methods for both models (e.g. nearest neighbors) while others have only a single mode (e.g. logistic regression). \n\n* The computation _engine_ is a combination of the estimation method and the implementation. For example, for linear regression, one engine is `\"lm\"` which uses ordinary least squares analysis via the `lm()` function. Another engine is `\"stan\"` which uses the Stan infrastructure to estimate parameters using Bayes rule. \n\nWhen adding a model into parsnip, the user has to specify which modes and engines are used. The package also enables users to add a new mode or engine to an existing model. \n\n## The general process\n\nThe parsnip package stores information about the models in an internal environment object. The environment can be accessed via the function `get_model_env()`. The package includes a variety of functions that can get or set the different aspects of the models. \n\nIf you are adding a new model from your own package, you can use these functions to add new entries into the model environment. \n\n### Step 1. Register the model, modes, and arguments\n\nWe will add the MDA model using the model type `discrim_mixture`. Since this is a classification method, we only have to register a single mode:\n\n```{r}\n#| label: \"mda-reg\"\nlibrary(tidymodels)\nset_new_model(\"discrim_mixture\")\nset_model_mode(model = \"discrim_mixture\", mode = \"classification\")\nset_model_engine(\n \"discrim_mixture\", \n mode = \"classification\", \n eng = \"mda\"\n)\nset_dependency(\"discrim_mixture\", eng = \"mda\", pkg = \"mda\")\n```\n\nThese functions should silently finish. There is also a function that can be used to show what aspects of the model have been added to parsnip: \n\n```{r}\n#| label: \"mda-show-1\"\nshow_model_info(\"discrim_mixture\")\n```\n\nThe next step would be to declare the main arguments to the model. These are declared independent of the mode. To specify the argument, there are a few slots to fill in:\n\n * The name that parsnip uses for the argument. In general, we try to use non-jargony names for arguments (e.g. \"penalty\" instead of \"lambda\" for regularized regression). We recommend consulting [the model argument table available here](/find/parsnip/) to see if an existing argument name can be used before creating a new one. \n \n * The argument name that is used by the underlying modeling function. \n \n * A function reference for a _constructor_ that will be used to generate tuning parameter values. This should be a character vector with a named element called `fun` that is the constructor function. There is an optional element `pkg` that can be used to call the function using its namespace. If referencing functions from the dials package, quantitative parameters can have additional arguments in the list for `trans` and `range` while qualitative parameters can pass `values` via this list. \n \n * A logical value for whether the argument can be used to generate multiple predictions for a single R object. For example, for boosted trees, if a model is fit with 10 boosting iterations, many modeling packages allow the model object to make predictions for any iterations less than the one used to fit the model. In general this is not the case so one would use `has_submodels = FALSE`. \n \nFor `mda::mda()`, the main tuning parameter is `subclasses` which we will rewrite as `sub_classes`. \n\n```{r}\n#| label: \"mda-args\"\nset_model_arg(\n model = \"discrim_mixture\",\n eng = \"mda\",\n parsnip = \"sub_classes\",\n original = \"subclasses\",\n func = list(pkg = \"foo\", fun = \"bar\"),\n has_submodel = FALSE\n)\nshow_model_info(\"discrim_mixture\")\n```\n\n### Step 2. Create the model function\n\nThis is a fairly simple function that can follow a basic template. The main arguments to our function will be:\n\n * The mode. If the model can do more than one mode, you might default this to \"unknown\". In our case, since it is only a classification model, it makes sense to default it to that mode so that the users won't have to specify it. \n \n * The argument names (`sub_classes` here). These should be defaulted to `NULL`.\n\nA basic version of the function is:\n\n```{r}\n#| label: \"model-fun\"\ndiscrim_mixture <-\n function(mode = \"classification\", sub_classes = NULL) {\n # Check for correct mode\n if (mode != \"classification\") {\n rlang::abort(\"`mode` should be 'classification'\")\n }\n \n # Capture the arguments in quosures\n args <- list(sub_classes = rlang::enquo(sub_classes))\n \n # Save some empty slots for future parts of the specification\n new_model_spec(\n \"discrim_mixture\",\n args = args,\n eng_args = NULL,\n mode = mode,\n method = NULL,\n engine = NULL\n )\n }\n```\n\nThis is pretty simple since the data are not exposed to this function. \n\n::: {.callout-warning}\n We strongly suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former return better traceback results and have safer defaults for handling call objects. \n:::\n\n### Step 3. Add a fit module\n\nNow that parsnip knows about the model, mode, and engine, we can give it the information on fitting the model for our engine. The information needed to fit the model is contained in another list. The elements are:\n\n * `interface` is a single character value that could be \"formula\", \"data.frame\", or \"matrix\". This defines the type of interface used by the underlying fit function (`mda::mda`, in this case). This helps the translation of the data to be in an appropriate format for the that function. \n \n * `protect` is an optional list of function arguments that **should not be changeable** by the user. In this case, we probably don't want users to pass data values to these arguments (until the `fit()` function is called).\n \n * `func` is the package and name of the function that will be called. If you are using a locally defined function, only `fun` is required. \n \n * `defaults` is an optional list of arguments to the fit function that the user can change, but whose defaults can be set here. This isn't needed in this case, but is described later in this document.\n\nFor the first engine:\n\n```{r}\n#| label: \"fit-mod\"\nset_fit(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n value = list(\n interface = \"formula\",\n protect = c(\"formula\", \"data\"),\n func = c(pkg = \"mda\", fun = \"mda\"),\n defaults = list()\n )\n)\n\nshow_model_info(\"discrim_mixture\")\n```\n\nWe also set up the information on how the predictors should be handled. These options ensure that the data that parsnip gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly.\n\n * `predictor_indicators` describes whether and how to create indicator/dummy variables from factor predictors. There are three options: `\"none\"` (do not expand factor predictors), `\"traditional\"` (apply the standard `model.matrix()` encodings), and `\"one_hot\"` (create the complete set including the baseline level for all factors). \n \n * `compute_intercept` controls whether `model.matrix()` should include the intercept in its formula. This affects more than the inclusion of an intercept column. With an intercept, `model.matrix()` computes dummy variables for all but one factor level. Without an intercept, `model.matrix()` computes a full set of indicators for the first factor variable, but an incomplete set for the remainder.\n \n * `remove_intercept` removes the intercept column *after* `model.matrix()` is finished. This can be useful if the model function (e.g. `lm()`) automatically generates an intercept.\n\n* `allow_sparse_x` specifies whether the model can accommodate a sparse representation for predictors during fitting and tuning.\n\n```{r}\nset_encoding(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n options = list(\n predictor_indicators = \"traditional\",\n compute_intercept = TRUE,\n remove_intercept = TRUE,\n allow_sparse_x = FALSE\n )\n)\n```\n\n\n### Step 4. Add modules for prediction\n\nSimilar to the fitting module, we specify the code for making different types of predictions. To make hard class predictions, the `class` object contains the details. The elements of the list are:\n\n * `pre` and `post` are optional functions that can preprocess the data being fed to the prediction code and to postprocess the raw output of the predictions. These won't be needed for this example, but a section below has examples of how these can be used when the model code is not easy to use. If the data being predicted has a simple type requirement, you can avoid using a `pre` function with the `args` below. \n * `func` is the prediction function (in the same format as above). In many cases, packages have a predict method for their model's class but this is typically not exported. In this case (and the example below), it is simple enough to make a generic call to `predict()` with no associated package. \n * `args` is a list of arguments to pass to the prediction function. These will most likely be wrapped in `rlang::expr()` so that they are not evaluated when defining the method. For mda, the code would be `predict(object, newdata, type = \"class\")`. What is actually given to the function is the parsnip model fit object, which includes a sub-object called `fit()` that houses the mda model object. If the data need to be a matrix or data frame, you could also use `newdata = quote(as.data.frame(newdata))` or similar. \n\nThe parsnip prediction code will expect the result to be an unnamed character string or factor. This will be coerced to a factor with the same levels as the original data. \n\nTo add this method to the model environment, a similar `set()` function is used:\n\n```{r}\n#| label: \"mds-class\"\nclass_info <- \n list(\n pre = NULL,\n post = NULL,\n func = c(fun = \"predict\"),\n args =\n # These lists should be of the form:\n # {predict.mda argument name} = {values provided from parsnip objects}\n list(\n # We don't want the first two arguments evaluated right now\n # since they don't exist yet. `type` is a simple object that\n # doesn't need to have its evaluation deferred. \n object = quote(object$fit),\n newdata = quote(new_data),\n type = \"class\"\n )\n )\n\nset_pred(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n type = \"class\",\n value = class_info\n)\n``` \n\nA similar call can be used to define the class probability module (if they can be computed). The format is identical to the `class` module but the output is expected to be a tibble with columns for each factor level. \n\nAs an example of the `post` function, the data frame created by `mda:::predict.mda()` will be converted to a tibble. The arguments are `x` (the raw results coming from the predict method) and `object` (the parsnip model fit object). The latter has a sub-object called `lvl` which is a character string of the outcome's factor levels (if any). \n\nWe register the probability module. There is a template function that makes this slightly easier to format the objects:\n\n```{r}\n#| label: \"mda-prob\"\nprob_info <-\n pred_value_template(\n post = function(x, object) {\n tibble::as_tibble(x)\n },\n func = c(fun = \"predict\"),\n # Now everything else is put into the `args` slot\n object = quote(object$fit),\n newdata = quote(new_data),\n type = \"posterior\"\n )\n\nset_pred(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n type = \"prob\",\n value = prob_info\n)\n\nshow_model_info(\"discrim_mixture\")\n```\n\nIf this model could be used for regression situations, we could also add a \"numeric\" module. For `pred`, the model requires an unnamed numeric vector output (usually). \n\nExamples are [here](https://github.com/tidymodels/parsnip/blob/master/R/linear_reg_data.R) and [here](https://github.com/tidymodels/parsnip/blob/master/R/rand_forest_data.R). \n\n\n### Does it work? \n\nAs a developer, one thing that may come in handy is the `translate()` function. This will tell you what the model's eventual syntax will be. \n\nFor example:\n\n```{r}\n#| label: \"mda-code\"\ndiscrim_mixture(sub_classes = 2) %>%\n translate(engine = \"mda\")\n```\n\nLet's try it on a data set from the modeldata package:\n\n```{r}\n#| label: \"mda-data\"\ndata(\"two_class_dat\", package = \"modeldata\")\nset.seed(4622)\nexample_split <- initial_split(two_class_dat, prop = 0.99)\nexample_train <- training(example_split)\nexample_test <- testing(example_split)\n\nmda_spec <- discrim_mixture(sub_classes = 2) %>% \n set_engine(\"mda\")\n\nmda_fit <- mda_spec %>%\n fit(Class ~ ., data = example_train, engine = \"mda\")\nmda_fit\n\npredict(mda_fit, new_data = example_test, type = \"prob\") %>%\n bind_cols(example_test %>% select(Class))\n\npredict(mda_fit, new_data = example_test) %>% \n bind_cols(example_test %>% select(Class))\n```\n\n\n## Add an engine\n\nThe process for adding an engine to an existing model is _almost_ the same as building a new model but simpler with fewer steps. You only need to add the engine-specific aspects of the model. For example, if we wanted to fit a linear regression model using M-estimation, we could only add a new engine. The code for the `rlm()` function in MASS is pretty similar to `lm()`, so we can copy that code and change the package/function names:\n\n```{r}\n#| label: \"rlm\"\nset_model_engine(\"linear_reg\", \"regression\", eng = \"rlm\")\nset_dependency(\"linear_reg\", eng = \"rlm\", pkg = \"MASS\")\n\nset_fit(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n value = list(\n interface = \"formula\",\n protect = c(\"formula\", \"data\", \"weights\"),\n func = c(pkg = \"MASS\", fun = \"rlm\"),\n defaults = list()\n )\n)\n\nset_encoding(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n options = list(\n predictor_indicators = \"traditional\",\n compute_intercept = TRUE,\n remove_intercept = TRUE,\n allow_sparse_x = FALSE\n )\n)\n\nset_pred(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n type = \"numeric\",\n value = list(\n pre = NULL,\n post = NULL,\n func = c(fun = \"predict\"),\n args =\n list(\n object = expr(object$fit),\n newdata = expr(new_data),\n type = \"response\"\n )\n )\n)\n\n# testing:\nlinear_reg() %>% \n set_engine(\"rlm\") %>% \n fit(mpg ~ ., data = mtcars)\n```\n\n## Add parsnip models to another package\n\nThe process here is almost the same. All of the previous functions are still required but their execution is a little different. \n\nFor parsnip to register them, that package must already be loaded. For this reason, it makes sense to have parsnip in the \"Depends\" category. \n\nThe first difference is that the functions that define the model must be inside of a wrapper function that is called when your package is loaded. For our example here, this might look like: \n\n```{r}\n#| eval: false\nmake_discrim_mixture_mda <- function() {\n parsnip::set_new_model(\"discrim_mixture\")\n\n parsnip::set_model_mode(\"discrim_mixture\", \"classification\")\n\n # and so one...\n}\n```\n\nThis function is then executed when your package is loaded: \n\n```{r}\n#| eval: false\n.onLoad <- function(libname, pkgname) {\n # This defines discrim_mixture in the model database\n make_discrim_mixture_mda()\n}\n```\n\nFor an example package that uses parsnip definitions, take a look at the [discrim](https://github.com/tidymodels/discrim) package.\n\n::: {.callout-warning}\n To use a new model and/or engine in the broader tidymodels infrastructure, we recommend your model definition declarations (e.g. `set_new_model()` and similar) reside in a package. If these definitions are in a script only, the new model may not work with the tune package, for example for parallel processing. \n:::\n\nIt is also important for parallel processing support to **list the home package as a dependency**. If the `discrim_mixture()` function lived in a package called `mixedup`, include the line:\n\n```r\nset_dependency(\"discrim_mixture\", eng = \"mda\", pkg = \"mixedup\")\n```\n\nParallel processing requires this explicit dependency setting. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux will load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a parsnip model is not loaded in the worker processes, the model will not have an entry in parsnip's internal database (and produce an error). \n\n\n## Your model, tuning parameters, and you\n\nThe tune package can be used to find reasonable values of model arguments via tuning. There are some S3 methods that are useful to define for your model. `discrim_mixture()` has one main tuning parameter: `sub_classes`. To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. \n\n`tunable()` takes the model specification as its argument and returns a tibble with columns: \n\n* `name`: The name of the argument. \n\n* `call_info`: A list that describes how to call a function that returns a dials parameter object. \n\n* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `\"model_spec\"`. \n\n* `component`: A character string with more information about the source. For models, this is just the name of the function (e.g. `\"discrim_mixture\"`). \n\n* `component_id`: A character string to indicate where a unique identifier is for the object. For a model, this is indicates the type of model argument (e.g. \"main\"). \n\nThe main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. \n\nFor example, for a nearest-neighbors `neighbors` parameter, this value is just: \n\n```{r}\n#| label: \"mtry\"\ninfo <- list(pkg = \"dials\", fun = \"neighbors\")\n\n# FYI: how it is used under-the-hood: \nnew_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)\nrlang::eval_tidy(new_param_call)\n```\n\nFor `discrim_mixture()`, a dials object is needed that returns an integer that is the number of sub-classes that should be create. We can create a dials parameter function for this:\n\n```{r}\n#| label: \"sub-classes\"\nsub_classes <- function(range = c(1L, 10L), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(sub_classes = \"# Sub-Classes\"),\n finalize = NULL\n )\n}\n```\n\nIf this were in the dials package, we could use: \n\n```{r}\n#| label: \"tunable\"\ntunable.discrim_mixture <- function(x, ...) {\n tibble::tibble(\n name = c(\"sub_classes\"),\n call_info = list(list(pkg = NULL, fun = \"sub_classes\")),\n source = \"model_spec\",\n component = \"discrim_mixture\",\n component_id = \"main\"\n )\n}\n```\n\nOnce this method is in place, the tuning functions can be used: \n\n```{r}\n#| label: \"tune-mda\"\n#| message: false\nmda_spec <- \n discrim_mixture(sub_classes = tune()) %>% \n set_engine(\"mda\")\n\nset.seed(452)\ncv <- vfold_cv(example_train)\nmda_tune_res <- mda_spec %>%\n tune_grid(Class ~ ., cv, grid = 4)\nshow_best(mda_tune_res, metric = \"roc_auc\")\n```\n\n\n\n## Pro-tips, what-ifs, exceptions, FAQ, and minutiae\n\nThere are various things that came to mind while developing this resource.\n\n**Do I have to return a simple vector for `predict` and `predict_class`?**\n\nPreviously, when discussing the `pred` information:\n\n> For `pred`, the model requires an unnamed numeric vector output **(usually)**.\n\nThere are some models (e.g. `glmnet`, `plsr`, `Cubist`, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so, for these cases, the current convention is to return a tibble with the prediction in a column called `values` and have extra columns for any parameters that define the different sub-models. \n\nFor example, if I fit a linear regression model via `glmnet` and get four values of the regularization parameter (`lambda`):\n\n```{r}\n#| label: \"glmnet\"\n#| eval: false\nlinear_reg() %>%\n set_engine(\"glmnet\", nlambda = 4) %>% \n fit(mpg ~ ., data = mtcars) %>%\n multi_predict(new_data = mtcars[1:3, -1])\n```\n\n_However_, the API is still being developed. Currently, there is not an interface in the prediction functions to pass in the values of the parameters to make predictions with (`lambda`, in this case). \n\n**What do I do about how my model handles factors or categorical data?**\n\nSome modeling functions in R create indicator/dummy variables from categorical data when you use a model formula (typically using `model.matrix()`), and some do not. Some examples of models that do _not_ create indicator variables include tree-based models, naive Bayes models, and multilevel or hierarchical models. The tidymodels ecosystem assumes a `model.matrix()`-like default encoding for categorical data used in a model formula, but you can change this encoding using `set_encoding()`. For example, you can set predictor encodings that say, \"leave my data alone,\" and keep factors as is:\n\n```{r}\n#| label: \"encodinginfo\"\n#| eval: false\nset_encoding(\n model = \"decision_tree\",\n eng = \"rpart\",\n mode = \"regression\",\n options = list(\n predictor_indicators = \"none\",\n compute_intercept = FALSE,\n remove_intercept = FALSE\n )\n)\n```\n\n::: {.callout-note}\nThere are three options for `predictor_indicators`: \n- \"none\" (do not expand factor predictors)\n- \"traditional\" (apply the standard `model.matrix()` encoding)\n- \"one_hot\" (create the complete set including the baseline level for all factors) \n:::\n\nTo learn more about encoding categorical predictors, check out [this blog post](https://www.tidyverse.org/blog/2020/07/parsnip-0-1-2/#predictor-encoding-consistency).\n\n**What is the `defaults` slot and why do I need it?**\n\nYou might want to set defaults that can be overridden by the user. For example, for logistic regression with `glm`, it make sense to default `family = binomial`. However, if someone wants to use a different link function, they should be able to do that. For that model/engine definition, it has:\n\n```{r}\n#| label: \"glm-alt\"\n#| eval: false\ndefaults = list(family = expr(binomial))\n```\n\nSo that is the default:\n\n```{r}\n#| label: \"glm-alt-show\"\n#| eval: false\nlogistic_reg() %>% translate(engine = \"glm\")\n\n# but you can change it:\n\nlogistic_reg() %>%\n set_engine(\"glm\", family = expr(binomial(link = \"probit\"))) %>% \n translate()\n```\n\nThat's what `defaults` are for. \n\nNote that we wrapped `binomial` inside of `expr()`. If we didn't, it would substitute the results of executing `binomial()` inside of the expression (and that's a mess). \n\n**What if I want more complex defaults?**\n\nThe `translate` function can be used to check values or set defaults once the model's mode is known. To do this, you can create a model-specific S3 method that first calls the general method (`translate.model_spec()`) and then makes modifications or conducts error traps. \n\nFor example, the ranger and randomForest package functions have arguments for calculating importance. One is a logical and the other is a string. Since this is likely to lead to a bunch of frustration and GitHub issues, we can put in a check:\n\n```{r}\n#| label: \"rf-trans\"\n#| eval: false\n# Simplified version\ntranslate.rand_forest <- function (x, engine, ...){\n # Run the general method to get the real arguments in place\n x <- translate.default(x, engine, ...)\n \n # Check and see if they make sense for the engine and/or mode:\n if (x$engine == \"ranger\") {\n if (any(names(x$method$fit$args) == \"importance\")) \n if (is.logical(x$method$fit$args$importance)) \n rlang::abort(\"`importance` should be a character value. See ?ranger::ranger.\")\n }\n x\n}\n```\n\nAs another example, `nnet::nnet()` has an option for the final layer to be linear (called `linout`). If `mode = \"regression\"`, that should probably be set to `TRUE`. You couldn't do this with the `args` (described above) since you need the function translated first. \n\n\n**My model fit requires more than one function call. So....?**\n\nThe best course of action is to write wrapper so that it can be one call. This was the case with xgboost and keras. \n\n**Why would I preprocess my data?**\n\nThere might be non-trivial transformations that the model prediction code requires (such as converting to a sparse matrix representation, etc.)\n\nThis would **not** include making dummy variables and `model.matrix` stuff. The parsnip infrastructure already does that for you. \n\n\n**Why would I post-process my predictions?**\n\nWhat comes back from some R functions may be somewhat... arcane or problematic. As an example, for xgboost, if you fit a multi-class boosted tree, you might expect the class probabilities to come back as a matrix (*narrator: they don't*). If you have four classes and make predictions on three samples, you get a vector of 12 probability values. You need to convert these to a rectangular data set. \n\nAnother example is the predict method for ranger, which encapsulates the actual predictions in a more complex object structure. \n\nThese are the types of problems that the post-processor will solve. \n\n**Are there other modes?**\n\nNot yet but there will be. For example, it might make sense to have a different mode when doing risk-based modeling via Cox regression models. That would enable different classes of objects and those might be needed since the types of models don't make direct predictions of the outcome. \n\nIf you have a suggestion, please add a [GitHub issue](https://github.com/tidymodels/parsnip/issues) to discuss it. \n\n \n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n\n\n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(mda)\nlibrary(tidymodels)\n\npkgs <- c(\"tidymodels\", \"mda\", \"modeldata\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nThe parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this:\n\n * It eliminates a lot of duplicate code.\n * Since the expressions are not evaluated until fitting, it eliminates many package dependencies.\n\nA parsnip model function is itself very general. For example, the `logistic_reg()` function itself doesn't have any model code within it. Instead, each model function is associated with one or more computational _engines_. These might be different R packages or some function in another language (that can be evaluated by R). \n\nThis article describes the process of creating a new model function. Before proceeding, take a minute and read our [guidelines on creating modeling packages](https://tidymodels.github.io/model-implementation-principles/) to understand the general themes and conventions that we use. \n\n## An example model\n\nAs an example, we'll create a function for _mixture discriminant analysis_. There are [a few packages](http://search.r-project.org/cgi-bin/namazu.cgi?query=%22mixture+discriminant%22&max=100&result=normal&sort=score&idxname=functions) that implement this but we'll focus on `mda::mda`:\n\n```{r}\n#| label: \"mda-str\"\nstr(mda::mda)\n```\n\nThe main hyperparameter is the number of subclasses. We'll name our function `discrim_mixture`. \n\n## Aspects of models\n\nBefore proceeding, it helps to to review how parsnip categorizes models:\n\n* The model _type_ is related to the structural aspect of the model. For example, the model type `linear_reg` represents linear models (slopes and intercepts) that model a numeric outcome. Other model types in the package are `nearest_neighbor`, `decision_tree`, and so on. \n\n* Within a model type is the _mode_, related to the modeling goal. Currently the two modes in the package are regression and classification. Some models have methods for both models (e.g. nearest neighbors) while others have only a single mode (e.g. logistic regression). \n\n* The computation _engine_ is a combination of the estimation method and the implementation. For example, for linear regression, one engine is `\"lm\"` which uses ordinary least squares analysis via the `lm()` function. Another engine is `\"stan\"` which uses the Stan infrastructure to estimate parameters using Bayes rule. \n\nWhen adding a model into parsnip, the user has to specify which modes and engines are used. The package also enables users to add a new mode or engine to an existing model. \n\n## The general process\n\nThe parsnip package stores information about the models in an internal environment object. The environment can be accessed via the function `get_model_env()`. The package includes a variety of functions that can get or set the different aspects of the models. \n\nIf you are adding a new model from your own package, you can use these functions to add new entries into the model environment. \n\n### Step 1. Register the model, modes, and arguments\n\nWe will add the MDA model using the model type `discrim_mixture`. Since this is a classification method, we only have to register a single mode:\n\n```{r}\n#| label: \"mda-reg\"\nlibrary(tidymodels)\nset_new_model(\"discrim_mixture\")\nset_model_mode(model = \"discrim_mixture\", mode = \"classification\")\nset_model_engine(\n \"discrim_mixture\", \n mode = \"classification\", \n eng = \"mda\"\n)\nset_dependency(\"discrim_mixture\", eng = \"mda\", pkg = \"mda\")\n```\n\nThese functions should silently finish. There is also a function that can be used to show what aspects of the model have been added to parsnip: \n\n```{r}\n#| label: \"mda-show-1\"\nshow_model_info(\"discrim_mixture\")\n```\n\nThe next step would be to declare the main arguments to the model. These are declared independent of the mode. To specify the argument, there are a few slots to fill in:\n\n * The name that parsnip uses for the argument. In general, we try to use non-jargony names for arguments (e.g. \"penalty\" instead of \"lambda\" for regularized regression). We recommend consulting [the model argument table available here](/find/parsnip/) to see if an existing argument name can be used before creating a new one. \n \n * The argument name that is used by the underlying modeling function. \n \n * A function reference for a _constructor_ that will be used to generate tuning parameter values. This should be a character vector with a named element called `fun` that is the constructor function. There is an optional element `pkg` that can be used to call the function using its namespace. If referencing functions from the dials package, quantitative parameters can have additional arguments in the list for `trans` and `range` while qualitative parameters can pass `values` via this list. \n \n * A logical value for whether the argument can be used to generate multiple predictions for a single R object. For example, for boosted trees, if a model is fit with 10 boosting iterations, many modeling packages allow the model object to make predictions for any iterations less than the one used to fit the model. In general this is not the case so one would use `has_submodels = FALSE`. \n \nFor `mda::mda()`, the main tuning parameter is `subclasses` which we will rewrite as `sub_classes`. \n\n```{r}\n#| label: \"mda-args\"\nset_model_arg(\n model = \"discrim_mixture\",\n eng = \"mda\",\n parsnip = \"sub_classes\",\n original = \"subclasses\",\n func = list(pkg = \"foo\", fun = \"bar\"),\n has_submodel = FALSE\n)\nshow_model_info(\"discrim_mixture\")\n```\n\n### Step 2. Create the model function\n\nThis is a fairly simple function that can follow a basic template. The main arguments to our function will be:\n\n * The mode. If the model can do more than one mode, you might default this to \"unknown\". In our case, since it is only a classification model, it makes sense to default it to that mode so that the users won't have to specify it. \n \n * The argument names (`sub_classes` here). These should be defaulted to `NULL`.\n\nA basic version of the function is:\n\n```{r}\n#| label: \"model-fun\"\ndiscrim_mixture <-\n function(mode = \"classification\", sub_classes = NULL) {\n # Check for correct mode\n if (mode != \"classification\") {\n rlang::abort(\"`mode` should be 'classification'\")\n }\n \n # Capture the arguments in quosures\n args <- list(sub_classes = rlang::enquo(sub_classes))\n \n # Save some empty slots for future parts of the specification\n new_model_spec(\n \"discrim_mixture\",\n args = args,\n eng_args = NULL,\n mode = mode,\n method = NULL,\n engine = NULL\n )\n }\n```\n\nThis is pretty simple since the data are not exposed to this function. \n\n::: {.callout-warning}\n We strongly suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former return better traceback results and have safer defaults for handling call objects. \n:::\n\n### Step 3. Add a fit module\n\nNow that parsnip knows about the model, mode, and engine, we can give it the information on fitting the model for our engine. The information needed to fit the model is contained in another list. The elements are:\n\n * `interface` is a single character value that could be \"formula\", \"data.frame\", or \"matrix\". This defines the type of interface used by the underlying fit function (`mda::mda`, in this case). This helps the translation of the data to be in an appropriate format for the that function. \n \n * `protect` is an optional list of function arguments that **should not be changeable** by the user. In this case, we probably don't want users to pass data values to these arguments (until the `fit()` function is called).\n \n * `func` is the package and name of the function that will be called. If you are using a locally defined function, only `fun` is required. \n \n * `defaults` is an optional list of arguments to the fit function that the user can change, but whose defaults can be set here. This isn't needed in this case, but is described later in this document.\n\nFor the first engine:\n\n```{r}\n#| label: \"fit-mod\"\nset_fit(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n value = list(\n interface = \"formula\",\n protect = c(\"formula\", \"data\"),\n func = c(pkg = \"mda\", fun = \"mda\"),\n defaults = list()\n )\n)\n\nshow_model_info(\"discrim_mixture\")\n```\n\nWe also set up the information on how the predictors should be handled. These options ensure that the data that parsnip gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly.\n\n * `predictor_indicators` describes whether and how to create indicator/dummy variables from factor predictors. There are three options: `\"none\"` (do not expand factor predictors), `\"traditional\"` (apply the standard `model.matrix()` encodings), and `\"one_hot\"` (create the complete set including the baseline level for all factors). \n \n * `compute_intercept` controls whether `model.matrix()` should include the intercept in its formula. This affects more than the inclusion of an intercept column. With an intercept, `model.matrix()` computes dummy variables for all but one factor level. Without an intercept, `model.matrix()` computes a full set of indicators for the first factor variable, but an incomplete set for the remainder.\n \n * `remove_intercept` removes the intercept column *after* `model.matrix()` is finished. This can be useful if the model function (e.g. `lm()`) automatically generates an intercept.\n\n* `allow_sparse_x` specifies whether the model can accommodate a sparse representation for predictors during fitting and tuning.\n\n```{r}\nset_encoding(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n options = list(\n predictor_indicators = \"traditional\",\n compute_intercept = TRUE,\n remove_intercept = TRUE,\n allow_sparse_x = FALSE\n )\n)\n```\n\n\n### Step 4. Add modules for prediction\n\nSimilar to the fitting module, we specify the code for making different types of predictions. To make hard class predictions, the `class` object contains the details. The elements of the list are:\n\n * `pre` and `post` are optional functions that can preprocess the data being fed to the prediction code and to postprocess the raw output of the predictions. These won't be needed for this example, but a section below has examples of how these can be used when the model code is not easy to use. If the data being predicted has a simple type requirement, you can avoid using a `pre` function with the `args` below. \n * `func` is the prediction function (in the same format as above). In many cases, packages have a predict method for their model's class but this is typically not exported. In this case (and the example below), it is simple enough to make a generic call to `predict()` with no associated package. \n * `args` is a list of arguments to pass to the prediction function. These will most likely be wrapped in `rlang::expr()` so that they are not evaluated when defining the method. For mda, the code would be `predict(object, newdata, type = \"class\")`. What is actually given to the function is the parsnip model fit object, which includes a sub-object called `fit()` that houses the mda model object. If the data need to be a matrix or data frame, you could also use `newdata = quote(as.data.frame(newdata))` or similar. \n\nThe parsnip prediction code will expect the result to be an unnamed character string or factor. This will be coerced to a factor with the same levels as the original data. \n\nTo add this method to the model environment, a similar `set()` function is used:\n\n```{r}\n#| label: \"mds-class\"\nclass_info <- \n list(\n pre = NULL,\n post = NULL,\n func = c(fun = \"predict\"),\n args =\n # These lists should be of the form:\n # {predict.mda argument name} = {values provided from parsnip objects}\n list(\n # We don't want the first two arguments evaluated right now\n # since they don't exist yet. `type` is a simple object that\n # doesn't need to have its evaluation deferred. \n object = quote(object$fit),\n newdata = quote(new_data),\n type = \"class\"\n )\n )\n\nset_pred(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n type = \"class\",\n value = class_info\n)\n``` \n\nA similar call can be used to define the class probability module (if they can be computed). The format is identical to the `class` module but the output is expected to be a tibble with columns for each factor level. \n\nAs an example of the `post` function, the data frame created by `mda:::predict.mda()` will be converted to a tibble. The arguments are `x` (the raw results coming from the predict method) and `object` (the parsnip model fit object). The latter has a sub-object called `lvl` which is a character string of the outcome's factor levels (if any). \n\nWe register the probability module. There is a template function that makes this slightly easier to format the objects:\n\n```{r}\n#| label: \"mda-prob\"\nprob_info <-\n pred_value_template(\n post = function(x, object) {\n tibble::as_tibble(x)\n },\n func = c(fun = \"predict\"),\n # Now everything else is put into the `args` slot\n object = quote(object$fit),\n newdata = quote(new_data),\n type = \"posterior\"\n )\n\nset_pred(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n type = \"prob\",\n value = prob_info\n)\n\nshow_model_info(\"discrim_mixture\")\n```\n\nIf this model could be used for regression situations, we could also add a \"numeric\" module. For `pred`, the model requires an unnamed numeric vector output (usually). \n\nExamples are [here](https://github.com/tidymodels/parsnip/blob/master/R/linear_reg_data.R) and [here](https://github.com/tidymodels/parsnip/blob/master/R/rand_forest_data.R). \n\n\n### Does it work? \n\nAs a developer, one thing that may come in handy is the `translate()` function. This will tell you what the model's eventual syntax will be. \n\nFor example:\n\n```{r}\n#| label: \"mda-code\"\ndiscrim_mixture(sub_classes = 2) %>%\n translate(engine = \"mda\")\n```\n\nLet's try it on a data set from the modeldata package:\n\n```{r}\n#| label: \"mda-data\"\ndata(\"two_class_dat\", package = \"modeldata\")\nset.seed(4622)\nexample_split <- initial_split(two_class_dat, prop = 0.99)\nexample_train <- training(example_split)\nexample_test <- testing(example_split)\n\nmda_spec <- discrim_mixture(sub_classes = 2) %>% \n set_engine(\"mda\")\n\nmda_fit <- mda_spec %>%\n fit(Class ~ ., data = example_train, engine = \"mda\")\nmda_fit\n\npredict(mda_fit, new_data = example_test, type = \"prob\") %>%\n bind_cols(example_test %>% select(Class))\n\npredict(mda_fit, new_data = example_test) %>% \n bind_cols(example_test %>% select(Class))\n```\n\n\n## Add an engine\n\nThe process for adding an engine to an existing model is _almost_ the same as building a new model but simpler with fewer steps. You only need to add the engine-specific aspects of the model. For example, if we wanted to fit a linear regression model using M-estimation, we could only add a new engine. The code for the `rlm()` function in MASS is pretty similar to `lm()`, so we can copy that code and change the package/function names:\n\n```{r}\n#| label: \"rlm\"\nset_model_engine(\"linear_reg\", \"regression\", eng = \"rlm\")\nset_dependency(\"linear_reg\", eng = \"rlm\", pkg = \"MASS\")\n\nset_fit(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n value = list(\n interface = \"formula\",\n protect = c(\"formula\", \"data\", \"weights\"),\n func = c(pkg = \"MASS\", fun = \"rlm\"),\n defaults = list()\n )\n)\n\nset_encoding(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n options = list(\n predictor_indicators = \"traditional\",\n compute_intercept = TRUE,\n remove_intercept = TRUE,\n allow_sparse_x = FALSE\n )\n)\n\nset_pred(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n type = \"numeric\",\n value = list(\n pre = NULL,\n post = NULL,\n func = c(fun = \"predict\"),\n args =\n list(\n object = expr(object$fit),\n newdata = expr(new_data),\n type = \"response\"\n )\n )\n)\n\n# testing:\nlinear_reg() %>% \n set_engine(\"rlm\") %>% \n fit(mpg ~ ., data = mtcars)\n```\n\n## Add parsnip models to another package\n\nThe process here is almost the same. All of the previous functions are still required but their execution is a little different. \n\nFor parsnip to register them, that package must already be loaded. For this reason, it makes sense to have parsnip in the \"Depends\" category. \n\nThe first difference is that the functions that define the model must be inside of a wrapper function that is called when your package is loaded. For our example here, this might look like: \n\n```{r}\n#| eval: false\nmake_discrim_mixture_mda <- function() {\n parsnip::set_new_model(\"discrim_mixture\")\n\n parsnip::set_model_mode(\"discrim_mixture\", \"classification\")\n\n # and so one...\n}\n```\n\nThis function is then executed when your package is loaded: \n\n```{r}\n#| eval: false\n.onLoad <- function(libname, pkgname) {\n # This defines discrim_mixture in the model database\n make_discrim_mixture_mda()\n}\n```\n\nFor an example package that uses parsnip definitions, take a look at the [discrim](https://github.com/tidymodels/discrim) package.\n\n::: {.callout-warning}\n To use a new model and/or engine in the broader tidymodels infrastructure, we recommend your model definition declarations (e.g. `set_new_model()` and similar) reside in a package. If these definitions are in a script only, the new model may not work with the tune package, for example for parallel processing. \n:::\n\nIt is also important for parallel processing support to **list the home package as a dependency**. If the `discrim_mixture()` function lived in a package called `mixedup`, include the line:\n\n```r\nset_dependency(\"discrim_mixture\", eng = \"mda\", pkg = \"mixedup\")\n```\n\nParallel processing requires this explicit dependency setting. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux will load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a parsnip model is not loaded in the worker processes, the model will not have an entry in parsnip's internal database (and produce an error). \n\n\n## Your model, tuning parameters, and you\n\nThe tune package can be used to find reasonable values of model arguments via tuning. There are some S3 methods that are useful to define for your model. `discrim_mixture()` has one main tuning parameter: `sub_classes`. To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. \n\n`tunable()` takes the model specification as its argument and returns a tibble with columns: \n\n* `name`: The name of the argument. \n\n* `call_info`: A list that describes how to call a function that returns a dials parameter object. \n\n* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `\"model_spec\"`. \n\n* `component`: A character string with more information about the source. For models, this is just the name of the function (e.g. `\"discrim_mixture\"`). \n\n* `component_id`: A character string to indicate where a unique identifier is for the object. For a model, this is indicates the type of model argument (e.g. \"main\"). \n\nThe main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. \n\nFor example, for a nearest-neighbors `neighbors` parameter, this value is just: \n\n```{r}\n#| label: \"mtry\"\ninfo <- list(pkg = \"dials\", fun = \"neighbors\")\n\n# FYI: how it is used under-the-hood: \nnew_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)\nrlang::eval_tidy(new_param_call)\n```\n\nFor `discrim_mixture()`, a dials object is needed that returns an integer that is the number of sub-classes that should be create. We can create a dials parameter function for this:\n\n```{r}\n#| label: \"sub-classes\"\nsub_classes <- function(range = c(1L, 10L), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(sub_classes = \"# Sub-Classes\"),\n finalize = NULL\n )\n}\n```\n\nIf this were in the dials package, we could use: \n\n```{r}\n#| label: \"tunable\"\ntunable.discrim_mixture <- function(x, ...) {\n tibble::tibble(\n name = c(\"sub_classes\"),\n call_info = list(list(pkg = NULL, fun = \"sub_classes\")),\n source = \"model_spec\",\n component = \"discrim_mixture\",\n component_id = \"main\"\n )\n}\n```\n\nOnce this method is in place, the tuning functions can be used: \n\n```{r}\n#| label: \"tune-mda\"\n#| message: false\nmda_spec <- \n discrim_mixture(sub_classes = tune()) %>% \n set_engine(\"mda\")\n\nset.seed(452)\ncv <- vfold_cv(example_train)\nmda_tune_res <- mda_spec %>%\n tune_grid(Class ~ ., cv, grid = 4)\nshow_best(mda_tune_res, metric = \"roc_auc\")\n```\n\n\n\n## Pro-tips, what-ifs, exceptions, FAQ, and minutiae\n\nThere are various things that came to mind while developing this resource.\n\n**Do I have to return a simple vector for `predict` and `predict_class`?**\n\nPreviously, when discussing the `pred` information:\n\n> For `pred`, the model requires an unnamed numeric vector output **(usually)**.\n\nThere are some models (e.g. `glmnet`, `plsr`, `Cubist`, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so, for these cases, the current convention is to return a tibble with the prediction in a column called `values` and have extra columns for any parameters that define the different sub-models. \n\nFor example, if I fit a linear regression model via `glmnet` and get four values of the regularization parameter (`lambda`):\n\n```{r}\n#| label: \"glmnet\"\n#| eval: false\nlinear_reg() %>%\n set_engine(\"glmnet\", nlambda = 4) %>% \n fit(mpg ~ ., data = mtcars) %>%\n multi_predict(new_data = mtcars[1:3, -1])\n```\n\n_However_, the API is still being developed. Currently, there is not an interface in the prediction functions to pass in the values of the parameters to make predictions with (`lambda`, in this case). \n\n**What do I do about how my model handles factors or categorical data?**\n\nSome modeling functions in R create indicator/dummy variables from categorical data when you use a model formula (typically using `model.matrix()`), and some do not. Some examples of models that do _not_ create indicator variables include tree-based models, naive Bayes models, and multilevel or hierarchical models. The tidymodels ecosystem assumes a `model.matrix()`-like default encoding for categorical data used in a model formula, but you can change this encoding using `set_encoding()`. For example, you can set predictor encodings that say, \"leave my data alone,\" and keep factors as is:\n\n```{r}\n#| label: \"encodinginfo\"\n#| eval: false\nset_encoding(\n model = \"decision_tree\",\n eng = \"rpart\",\n mode = \"regression\",\n options = list(\n predictor_indicators = \"none\",\n compute_intercept = FALSE,\n remove_intercept = FALSE\n )\n)\n```\n\n::: {.callout-note}\nThere are three options for `predictor_indicators`: \n- \"none\" (do not expand factor predictors)\n- \"traditional\" (apply the standard `model.matrix()` encoding)\n- \"one_hot\" (create the complete set including the baseline level for all factors) \n:::\n\nTo learn more about encoding categorical predictors, check out [this blog post](https://www.tidyverse.org/blog/2020/07/parsnip-0-1-2/#predictor-encoding-consistency).\n\n**What is the `defaults` slot and why do I need it?**\n\nYou might want to set defaults that can be overridden by the user. For example, for logistic regression with `glm`, it make sense to default `family = binomial`. However, if someone wants to use a different link function, they should be able to do that. For that model/engine definition, it has:\n\n```{r}\n#| label: \"glm-alt\"\n#| eval: false\ndefaults = list(family = expr(binomial))\n```\n\nSo that is the default:\n\n```{r}\n#| label: \"glm-alt-show\"\n#| eval: false\nlogistic_reg() %>% translate(engine = \"glm\")\n\n# but you can change it:\n\nlogistic_reg() %>%\n set_engine(\"glm\", family = expr(binomial(link = \"probit\"))) %>% \n translate()\n```\n\nThat's what `defaults` are for. \n\nNote that we wrapped `binomial` inside of `expr()`. If we didn't, it would substitute the results of executing `binomial()` inside of the expression (and that's a mess). \n\n**What if I want more complex defaults?**\n\nThe `translate` function can be used to check values or set defaults once the model's mode is known. To do this, you can create a model-specific S3 method that first calls the general method (`translate.model_spec()`) and then makes modifications or conducts error traps. \n\nFor example, the ranger and randomForest package functions have arguments for calculating importance. One is a logical and the other is a string. Since this is likely to lead to a bunch of frustration and GitHub issues, we can put in a check:\n\n```{r}\n#| label: \"rf-trans\"\n#| eval: false\n# Simplified version\ntranslate.rand_forest <- function (x, engine, ...){\n # Run the general method to get the real arguments in place\n x <- translate.default(x, engine, ...)\n \n # Check and see if they make sense for the engine and/or mode:\n if (x$engine == \"ranger\") {\n if (any(names(x$method$fit$args) == \"importance\")) \n if (is.logical(x$method$fit$args$importance)) \n rlang::abort(\"`importance` should be a character value. See ?ranger::ranger.\")\n }\n x\n}\n```\n\nAs another example, `nnet::nnet()` has an option for the final layer to be linear (called `linout`). If `mode = \"regression\"`, that should probably be set to `TRUE`. You couldn't do this with the `args` (described above) since you need the function translated first. \n\n\n**My model fit requires more than one function call. So....?**\n\nThe best course of action is to write wrapper so that it can be one call. This was the case with xgboost and keras. \n\n**Why would I preprocess my data?**\n\nThere might be non-trivial transformations that the model prediction code requires (such as converting to a sparse matrix representation, etc.)\n\nThis would **not** include making dummy variables and `model.matrix` stuff. The parsnip infrastructure already does that for you. \n\n\n**Why would I post-process my predictions?**\n\nWhat comes back from some R functions may be somewhat... arcane or problematic. As an example, for xgboost, if you fit a multi-class boosted tree, you might expect the class probabilities to come back as a matrix (*narrator: they don't*). If you have four classes and make predictions on three samples, you get a vector of 12 probability values. You need to convert these to a rectangular data set. \n\nAnother example is the predict method for ranger, which encapsulates the actual predictions in a more complex object structure. \n\nThese are the types of problems that the post-processor will solve. \n\n**Are there other modes?**\n\nNot yet but there will be. For example, it might make sense to have a different mode when doing risk-based modeling via Cox regression models. That would enable different classes of objects and those might be needed since the types of models don't make direct predictions of the outcome. \n\nIf you have a suggestion, please add a [GitHub issue](https://github.com/tidymodels/parsnip/issues) to discuss it. \n\n \n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n\n\n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"How to build a parsnip model","categories":["developer tools"],"type":"learn-subsection","weight":2,"description":"Create a parsnip model function from an existing model implementation.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/develop/parameters/index.qmd.json b/.quarto/idx/learn/develop/parameters/index.qmd.json new file mode 100644 index 00000000..cad499b2 --- /dev/null +++ b/.quarto/idx/learn/develop/parameters/index.qmd.json @@ -0,0 +1 @@ +{"title":"How to create a tuning parameter function","markdown":{"yaml":{"title":"How to create a tuning parameter function","categories":["developer tools"],"type":"learn-subsection","weight":4,"description":"Build functions to use in tuning both quantitative and qualitative parameters.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\n\npkgs <- c(\"dials\", \"scales\")\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nSome models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using `new_quant_param()` or `new_qual_param()`, respectively. This article is a guide to creating new parameters.\n\n## Quantitative parameters\n\nAs an example, let's consider the multivariate adaptive regression spline ([MARS](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline)) model, which creates nonlinear features from predictors and adds them to a linear regression models. The earth package is an excellent implementation of this method.\n\nMARS creates an initial set of features and then prunes them back to an appropriate size. This can be done automatically by `earth::earth()` or the number of final terms can be set by the user. The parsnip function `mars()` has a parameter called `num_terms` that defines this.\n\nWhat if we want to create a parameter for the number of *initial terms* included in the model. There is no argument in `parsnip::mars()` for this but we will make one now. The argument name in `earth::earth()` is `nk`, which is not very descriptive. Our parameter will be called `num_initial_terms`.\n\nWe use the `new_quant_param()` function since this is a numeric parameter. The main two arguments to a numeric parameter function are `range` and `trans`.\n\nThe `range` specifies the possible values of the parameter. For our example, a minimal value might be one or two. What is the upper limit? The default in the earth package is\n\n```{r}\n#| label: \"eart\"\n#| eval: false\nmin(200, max(20, 2 * ncol(x))) + 1\n```\n\nwhere `x` is the predictor matrix. We often put in values that are either sensible defaults or are minimal enough to work for the majority of data sets. For now, let's specify an upper limit of 10 but this will be discussed more in the next section.\n\nThe other argument is `trans`, which represents a transformation that should be applied to the parameter values when working with them. For example, many regularization methods have a `penalty` parameter that tends to range between zero and some upper bound (let's say 1). The effect of going from a penalty value of 0.01 to 0.1 is much more impactful than going from 0.9 to 1.0. In such a case, it might make sense to work with this parameter in transformed units (such as the log, in this example). If new parameter values are generated at random, it helps if they are uniformly simulated in the transformed units and then converted back to the original units.\n\nThe `trans` parameter accepts a transformation object from the scales package. For example:\n\n```{r}\n#| label: \"scales\"\nlibrary(scales)\nlsf.str(\"package:scales\", pattern = \"_trans$\")\nscales::log10_trans()\n```\n\nA value of `NULL` means that no transformation should be used.\n\nA quantitative parameter function should have these two arguments and, in the function body, a call `new_quant_param()`. There are a few arguments to this function:\n\n```{r}\n#| label: \"new_quant_param\"\nlibrary(tidymodels)\nargs(new_quant_param)\n```\n\n- Possible types are double precision and integers. The value of `type` should agree with the values of `range` in the function definition.\n\n- It's OK for our tuning to include the minimum or maximum, so we'll use `c(TRUE, TRUE)` for `inclusive`. If the value cannot include one end of the range, set one or both of these values to `FALSE`.\n\n- The `label` should be a named character string where the name is the parameter name and the value represents what will be printed automatically.\n\n- `finalize` is an argument that can set parts of the range. This is discussed more below.\n\nHere's an example of a basic quantitative parameter object:\n\n```{r}\n#| label: \"num-initial-terms\"\nnum_initial_terms <- function(range = c(1L, 10L), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = NULL\n )\n}\n\nnum_initial_terms()\n\n# Sample from the parameter:\nset.seed(4832856)\nnum_initial_terms() %>% value_sample(5)\n```\n\n### Finalizing parameters\n\nIt might be the case that the range of the parameter is unknown. For example, parameters that are related to the number of columns in a data set cannot be exactly specified in the absence of data. In those cases, a placeholder of `unknown()` can be added. This will force the user to \"finalize\" the parameter object for their particular data set. Let's redefine our function with an `unknown()` value:\n\n```{r}\n#| label: \"num-initial-terms-unk\"\n#| error: false\n#| eval: false\nnum_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = NULL\n )\n}\nnum_initial_terms()\n\n# Can we sample? \nnum_initial_terms() %>% value_sample(5)\n```\n\nThe `finalize` argument of `num_initial_terms()` can take a function that uses data to set the range. For example, the package already includes a few functions for finalization:\n\n```{r}\n#| label: \"dials-final-funcs\"\nlsf.str(\"package:dials\", pattern = \"^get_\")\n```\n\nThese functions generally take a data frame of predictors (in an argument called `x`) and add the range of the parameter object. Using the formula in the earth package, we might use:\n\n```{r}\n#| label: \"earth-range\"\nget_initial_mars_terms <- function(object, x) {\n upper_bound <- min(200, max(20, 2 * ncol(x))) + 1\n upper_bound <- as.integer(upper_bound)\n bounds <- range_get(object)\n bounds$upper <- upper_bound\n range_set(object, bounds)\n}\n\n# Use the mtcars are the finalize the upper bound: \nnum_initial_terms() %>% get_initial_mars_terms(x = mtcars[, -1])\n```\n\nOnce we add this function to the object, the general `finalize()` method can be used:\n\n```{r}\n#| label: \"final-obj\"\nnum_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = get_initial_mars_terms\n )\n}\n\nnum_initial_terms() %>% finalize(x = mtcars[, -1])\n```\n\n## Qualitative parameters\n\nNow let's look at an example of a qualitative parameter. If a model includes a data aggregation step, we want to allow users to tune how our parameters are aggregated. For example, in embedding methods, possible values might be `min`, `max`, `mean`, `sum`, or to not aggregate at all (\"none\"). Since these cannot be put on a numeric scale, they are possible values of a qualitative parameter. We'll take \"character\" input (not \"logical\"), and we must specify the allowed values. By default we won't aggregate.\n\n```{r}\n#| label: \"aggregation\"\naggregation <- function(values = c(\"none\", \"min\", \"max\", \"mean\", \"sum\")) {\n new_qual_param(\n type = \"character\",\n values = values,\n # By default, the first value is selected as default. We'll specify that to\n # make it clear.\n default = \"none\",\n label = c(aggregation = \"Aggregation Method\")\n )\n}\n```\n\nWithin the dials package, the convention is to have the values contained in a separate vector whose name starts with `values_`. For example:\n\n```{r}\n#| label: \"aggregation-vec\"\nvalues_aggregation <- c(\"none\", \"min\", \"max\", \"mean\", \"sum\")\naggregation <- function(values = values_aggregation) {\n new_qual_param(\n type = \"character\",\n values = values,\n # By default, the first value is selected as default. We'll specify that to\n # make it clear.\n default = \"none\",\n label = c(aggregation = \"Aggregation Method\")\n )\n}\n```\n\nThis step may not make sense if you are using the function in a script and not keeping it within a package.\n\nWe can use our `aggregation` parameters with dials functions.\n\n```{r}\n#| label: \"aggregation-use\"\naggregation()\naggregation() %>% value_sample(3)\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\n\npkgs <- c(\"dials\", \"scales\")\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nSome models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using `new_quant_param()` or `new_qual_param()`, respectively. This article is a guide to creating new parameters.\n\n## Quantitative parameters\n\nAs an example, let's consider the multivariate adaptive regression spline ([MARS](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline)) model, which creates nonlinear features from predictors and adds them to a linear regression models. The earth package is an excellent implementation of this method.\n\nMARS creates an initial set of features and then prunes them back to an appropriate size. This can be done automatically by `earth::earth()` or the number of final terms can be set by the user. The parsnip function `mars()` has a parameter called `num_terms` that defines this.\n\nWhat if we want to create a parameter for the number of *initial terms* included in the model. There is no argument in `parsnip::mars()` for this but we will make one now. The argument name in `earth::earth()` is `nk`, which is not very descriptive. Our parameter will be called `num_initial_terms`.\n\nWe use the `new_quant_param()` function since this is a numeric parameter. The main two arguments to a numeric parameter function are `range` and `trans`.\n\nThe `range` specifies the possible values of the parameter. For our example, a minimal value might be one or two. What is the upper limit? The default in the earth package is\n\n```{r}\n#| label: \"eart\"\n#| eval: false\nmin(200, max(20, 2 * ncol(x))) + 1\n```\n\nwhere `x` is the predictor matrix. We often put in values that are either sensible defaults or are minimal enough to work for the majority of data sets. For now, let's specify an upper limit of 10 but this will be discussed more in the next section.\n\nThe other argument is `trans`, which represents a transformation that should be applied to the parameter values when working with them. For example, many regularization methods have a `penalty` parameter that tends to range between zero and some upper bound (let's say 1). The effect of going from a penalty value of 0.01 to 0.1 is much more impactful than going from 0.9 to 1.0. In such a case, it might make sense to work with this parameter in transformed units (such as the log, in this example). If new parameter values are generated at random, it helps if they are uniformly simulated in the transformed units and then converted back to the original units.\n\nThe `trans` parameter accepts a transformation object from the scales package. For example:\n\n```{r}\n#| label: \"scales\"\nlibrary(scales)\nlsf.str(\"package:scales\", pattern = \"_trans$\")\nscales::log10_trans()\n```\n\nA value of `NULL` means that no transformation should be used.\n\nA quantitative parameter function should have these two arguments and, in the function body, a call `new_quant_param()`. There are a few arguments to this function:\n\n```{r}\n#| label: \"new_quant_param\"\nlibrary(tidymodels)\nargs(new_quant_param)\n```\n\n- Possible types are double precision and integers. The value of `type` should agree with the values of `range` in the function definition.\n\n- It's OK for our tuning to include the minimum or maximum, so we'll use `c(TRUE, TRUE)` for `inclusive`. If the value cannot include one end of the range, set one or both of these values to `FALSE`.\n\n- The `label` should be a named character string where the name is the parameter name and the value represents what will be printed automatically.\n\n- `finalize` is an argument that can set parts of the range. This is discussed more below.\n\nHere's an example of a basic quantitative parameter object:\n\n```{r}\n#| label: \"num-initial-terms\"\nnum_initial_terms <- function(range = c(1L, 10L), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = NULL\n )\n}\n\nnum_initial_terms()\n\n# Sample from the parameter:\nset.seed(4832856)\nnum_initial_terms() %>% value_sample(5)\n```\n\n### Finalizing parameters\n\nIt might be the case that the range of the parameter is unknown. For example, parameters that are related to the number of columns in a data set cannot be exactly specified in the absence of data. In those cases, a placeholder of `unknown()` can be added. This will force the user to \"finalize\" the parameter object for their particular data set. Let's redefine our function with an `unknown()` value:\n\n```{r}\n#| label: \"num-initial-terms-unk\"\n#| error: false\n#| eval: false\nnum_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = NULL\n )\n}\nnum_initial_terms()\n\n# Can we sample? \nnum_initial_terms() %>% value_sample(5)\n```\n\nThe `finalize` argument of `num_initial_terms()` can take a function that uses data to set the range. For example, the package already includes a few functions for finalization:\n\n```{r}\n#| label: \"dials-final-funcs\"\nlsf.str(\"package:dials\", pattern = \"^get_\")\n```\n\nThese functions generally take a data frame of predictors (in an argument called `x`) and add the range of the parameter object. Using the formula in the earth package, we might use:\n\n```{r}\n#| label: \"earth-range\"\nget_initial_mars_terms <- function(object, x) {\n upper_bound <- min(200, max(20, 2 * ncol(x))) + 1\n upper_bound <- as.integer(upper_bound)\n bounds <- range_get(object)\n bounds$upper <- upper_bound\n range_set(object, bounds)\n}\n\n# Use the mtcars are the finalize the upper bound: \nnum_initial_terms() %>% get_initial_mars_terms(x = mtcars[, -1])\n```\n\nOnce we add this function to the object, the general `finalize()` method can be used:\n\n```{r}\n#| label: \"final-obj\"\nnum_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = get_initial_mars_terms\n )\n}\n\nnum_initial_terms() %>% finalize(x = mtcars[, -1])\n```\n\n## Qualitative parameters\n\nNow let's look at an example of a qualitative parameter. If a model includes a data aggregation step, we want to allow users to tune how our parameters are aggregated. For example, in embedding methods, possible values might be `min`, `max`, `mean`, `sum`, or to not aggregate at all (\"none\"). Since these cannot be put on a numeric scale, they are possible values of a qualitative parameter. We'll take \"character\" input (not \"logical\"), and we must specify the allowed values. By default we won't aggregate.\n\n```{r}\n#| label: \"aggregation\"\naggregation <- function(values = c(\"none\", \"min\", \"max\", \"mean\", \"sum\")) {\n new_qual_param(\n type = \"character\",\n values = values,\n # By default, the first value is selected as default. We'll specify that to\n # make it clear.\n default = \"none\",\n label = c(aggregation = \"Aggregation Method\")\n )\n}\n```\n\nWithin the dials package, the convention is to have the values contained in a separate vector whose name starts with `values_`. For example:\n\n```{r}\n#| label: \"aggregation-vec\"\nvalues_aggregation <- c(\"none\", \"min\", \"max\", \"mean\", \"sum\")\naggregation <- function(values = values_aggregation) {\n new_qual_param(\n type = \"character\",\n values = values,\n # By default, the first value is selected as default. We'll specify that to\n # make it clear.\n default = \"none\",\n label = c(aggregation = \"Aggregation Method\")\n )\n}\n```\n\nThis step may not make sense if you are using the function in a script and not keeping it within a package.\n\nWe can use our `aggregation` parameters with dials functions.\n\n```{r}\n#| label: \"aggregation-use\"\naggregation()\naggregation() %>% value_sample(3)\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"How to create a tuning parameter function","categories":["developer tools"],"type":"learn-subsection","weight":4,"description":"Build functions to use in tuning both quantitative and qualitative parameters.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/develop/recipes/index.qmd.json b/.quarto/idx/learn/develop/recipes/index.qmd.json new file mode 100644 index 00000000..e10edfcb --- /dev/null +++ b/.quarto/idx/learn/develop/recipes/index.qmd.json @@ -0,0 +1 @@ +{"title":"Create your own recipe step function","markdown":{"yaml":{"title":"Create your own recipe step function","categories":["developer tools"],"type":"learn-subsection","weight":1,"description":"Write a new recipe step for data preprocessing.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"ex_setup\"\n#| include: false\nlibrary(tidymodels)\nlibrary(modeldata)\npkgs <- c(\"tidymodels\", \"modeldata\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nThere are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages [can be found here](/find/recipes/). However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for [centering](https://github.com/tidymodels/recipes/blob/master/R/center.R) or [PCA](https://github.com/tidymodels/recipes/blob/master/R/pca.R) to start. \n\nFor check operations (e.g. `check_class()`), the process is very similar. Notes on this are available at the end of this article. \n\nThe general process to follow is to:\n\n1. Define a step constructor function.\n\n2. Create the minimal S3 methods for `prep()`, `bake()`, and `print()`. \n\n3. Optionally add some extra methods to work with other tidymodels packages, such as `tunable()` and `tidy()`. \n\nAs an example, we will create a step for converting data into percentiles. \n\n## A new step definition\n\nLet's create a step that replaces the value of a variable with its percentile from the training set. The example data we'll use is from the modeldata package:\n\n```{r}\n#| label: \"initial\"\nlibrary(modeldata)\ndata(biomass)\nstr(biomass)\n\nbiomass_tr <- biomass[biomass$dataset == \"Training\",]\nbiomass_te <- biomass[biomass$dataset == \"Testing\",]\n```\n\nTo illustrate the transformation with the `carbon` variable, note the training set distribution of this variable with a vertical line below for the first value of the test set. \n\n```{r}\n#| label: \"carbon_dist\"\n#| fig-width: 6\n#| fig-height: 4.25\n#| out-width: \"100%\"\nlibrary(ggplot2)\ntheme_set(theme_bw())\nggplot(biomass_tr, aes(x = carbon)) + \n geom_histogram(binwidth = 5, col = \"blue\", fill = \"blue\", alpha = .5) + \n geom_vline(xintercept = biomass_te$carbon[1], lty = 2)\n```\n\nBased on the training set, `r round(mean(biomass_tr$carbon <= biomass_te$carbon[1])*100, 1)`% of the data are less than a value of `r biomass_te$carbon[1]`. There are some applications where it might be advantageous to represent the predictor values as percentiles rather than their original values. \n\nOur new step will do this computation for any numeric variables of interest. We will call this new recipe step `step_percentile()`. The code below is designed for illustration and not speed or best practices. We've left out a lot of error trapping that we would want in a real implementation. \n\n## Create the function\n\nTo start, there is a _user-facing_ function. Let's call that `step_percentile()`. This is just a simple wrapper around a _constructor function_, which defines the rules for any step object that defines a percentile transformation. We'll call this constructor `step_percentile_new()`. \n\nThe function `step_percentile()` takes the same arguments as your function and simply adds it to a new recipe. The `...` signifies the variable selectors that can be used.\n\n```{r}\n#| label: \"initial_def\"\nstep_percentile <- function(\n recipe, \n ..., \n role = NA, \n trained = FALSE, \n ref_dist = NULL,\n options = list(probs = (0:100)/100, names = TRUE),\n skip = FALSE,\n id = rand_id(\"percentile\")\n ) {\n\n ## The variable selectors are not immediately evaluated by using\n ## the `quos()` function in `rlang`. `ellipse_check()` captures \n ## the values and also checks to make sure that they are not empty. \n terms <- ellipse_check(...) \n\n add_step(\n recipe, \n step_percentile_new(\n terms = terms, \n trained = trained,\n role = role, \n ref_dist = ref_dist,\n options = options,\n skip = skip,\n id = id\n )\n )\n}\n```\n\nYou should always keep the first four arguments (`recipe` though `trained`) the same as listed above. Some notes:\n\n * the `role` argument is used when you either 1) create new variables and want their role to be pre-set or 2) replace the existing variables with new values. The latter is what we will be doing and using `role = NA` will leave the existing role intact. \n * `trained` is set by the package when the estimation step has been run. You should default your function definition's argument to `FALSE`. \n * `skip` is a logical. Whenever a recipe is prepped, each step is trained and then baked. However, there are some steps that should not be applied when a call to `bake()` is used. For example, if a step is applied to the variables with roles of \"outcomes\", these data would not be available for new samples. \n * `id` is a character string that can be used to identify steps in package code. `rand_id()` will create an ID that has the prefix and a random character sequence. \n\nWe can estimate the percentiles of new data points based on the percentiles from the training set with `approx()`. Our `step_percentile` contains a `ref_dist` object to store these percentiles (pre-computed from the training set in `prep()`) for later use in `bake()`.\n\nWe will use `stats::quantile()` to compute the grid. However, we might also want to have control over the granularity of this grid, so the `options` argument will be used to define how that calculation is done. We could use the ellipses (aka `...`) so that any options passed to `step_percentile()` that are not one of its arguments will then be passed to `stats::quantile()`. However, we recommend making a separate list object with the options and use these inside the function because `...` is already used to define the variable selection. \n\nIt is also important to consider if there are any _main arguments_ to the step. For example, for spline-related steps such as `step_ns()`, users typically want to adjust the argument for the degrees of freedom in the spline (e.g. `splines::ns(x, df)`). Rather than letting users add `df` to the `options` argument: \n\n* Allow the important arguments to be main arguments to the step function. \n\n* Follow the tidymodels [conventions for naming arguments](https://tidymodels.github.io/model-implementation-principles/standardized-argument-names.html). Whenever possible, avoid jargon and keep common argument names. \n\nThere are benefits to following these principles (as shown below). \n\n## Initialize a new object\n\nNow, the constructor function can be created.\n\nThe function cascade is: \n\n```\nstep_percentile() calls recipes::add_step()\n└──> recipes::add_step() calls step_percentile_new()\n └──> step_percentile_new() calls recipes::step()\n```\n\n`step()` is a general constructor for recipes that mainly makes sure that the resulting step object is a list with an appropriate S3 class structure. Using `subclass = \"percentile\"` will set the class of new objects to `\"step_percentile\"`. \n\n```{r}\n#| label: \"initialize\"\nstep_percentile_new <- \n function(terms, role, trained, ref_dist, options, skip, id) {\n step(\n subclass = \"percentile\", \n terms = terms,\n role = role,\n trained = trained,\n ref_dist = ref_dist,\n options = options,\n skip = skip,\n id = id\n )\n }\n```\n\nThis constructor function should have no default argument values. Defaults should be set in the user-facing step object. \n\n## Create the `prep` method\n\nYou will need to create a new `prep()` method for your step's class. To do this, three arguments that the method should have are:\n\n```r\nfunction(x, training, info = NULL)\n```\n\nwhere\n\n * `x` will be the `step_percentile` object,\n * `training` will be a _tibble_ that has the training set data, and\n * `info` will also be a tibble that has information on the current set of data available. This information is updated as each step is evaluated by its specific `prep()` method so it may not have the variables from the original data. The columns in this tibble are `variable` (the variable name), `type` (currently either \"numeric\" or \"nominal\"), `role` (defining the variable's role), and `source` (either \"original\" or \"derived\" depending on where it originated).\n\nYou can define other arguments as well. \n\nThe first thing that you might want to do in the `prep()` function is to translate the specification listed in the `terms` argument to column names in the current data. There is a function called `recipes_eval_select()` that can be used to obtain this. \n\n::: {.callout-warning}\n The `recipes_eval_select()` function is not one you interact with as a typical recipes user, but it is helpful if you develop your own custom recipe steps. \n:::\n\n```{r}\n#| label: \"prep_1\"\n#| eval: false\nprep.step_percentile <- function(x, training, info = NULL, ...) {\n col_names <- recipes_eval_select(x$terms, training, info) \n # TODO finish the rest of the function\n}\n```\n\nAfter this function call, it is a good idea to check that the selected columns have the appropriate type (e.g. numeric for this example). See `recipes::check_type()` to do this for basic types. \n\nOnce we have this, we can save the approximation grid. For the grid, we will use a helper function that enables us to run `rlang::exec()` to splice in any extra arguments contained in the `options` list to the call to `quantile()`: \n\n```{r}\n#| label: \"splice\"\nget_train_pctl <- function(x, args = NULL) {\n res <- rlang::exec(\"quantile\", x = x, !!!args)\n # Remove duplicate percentile values\n res[!duplicated(res)]\n}\n\n# For example:\nget_train_pctl(biomass_tr$carbon, list(probs = 0:1))\nget_train_pctl(biomass_tr$carbon)\n```\n\nNow, the `prep()` method can be created: \n\n```{r}\n#| label: \"prep-2\"\nprep.step_percentile <- function(x, training, info = NULL, ...) {\n col_names <- recipes_eval_select(x$terms, training, info)\n ## You can add error trapping for non-numeric data here and so on. \n \n ## We'll use the names later so make sure they are available\n if (x$options$names == FALSE) {\n rlang::abort(\"`names` should be set to TRUE\")\n }\n \n if (!any(names(x$options) == \"probs\")) {\n x$options$probs <- (0:100)/100\n } else {\n x$options$probs <- sort(unique(x$options$probs))\n }\n \n # Compute percentile grid\n ref_dist <- purrr::map(training[, col_names], get_train_pctl, args = x$options)\n\n ## Use the constructor function to return the updated object. \n ## Note that `trained` is now set to TRUE\n \n step_percentile_new(\n terms = x$terms, \n trained = TRUE,\n role = x$role, \n ref_dist = ref_dist,\n options = x$options,\n skip = x$skip,\n id = x$id\n )\n}\n```\n\nWe suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former can be used for better traceback results.\n\n\n## Create the `bake` method\n\nRemember that the `prep()` function does not _apply_ the step to the data; it only estimates any required values such as `ref_dist`. We will need to create a new method for our `step_percentile()` class. The minimum arguments for this are\n\n```r\nfunction(object, new_data, ...)\n```\n\nwhere `object` is the updated step function that has been through the corresponding `prep()` code and `new_data` is a tibble of data to be processed. \n\nHere is the code to convert the new data to percentiles. The input data (`x` below) comes in as a numeric vector and the output is a vector of approximate percentiles: \n\n```{r}\n#| label: \"bake-helpers\"\npctl_by_approx <- function(x, ref) {\n # In case duplicates were removed, get the percentiles from\n # the names of the reference object\n grid <- as.numeric(gsub(\"%$\", \"\", names(ref))) \n approx(x = ref, y = grid, xout = x)$y/100\n}\n```\n\nThese computations are done column-wise using `purrr::map2_dfc()` to modify the new data in-place:\n\n```{r}\n#| label: \"bake-method\"\nbake.step_percentile <- function(object, new_data, ...) {\n ## For illustration (and not speed), we will loop through the affected variables\n ## and do the computations\n vars <- names(object$ref_dist)\n \n new_data[, vars] <-\n purrr::map2_dfc(new_data[, vars], object$ref_dist, pctl_by_approx)\n \n ## Always convert to tibbles on the way out\n tibble::as_tibble(new_data)\n}\n```\n\n::: {.callout-note}\nYou need to import `recipes::prep()` and `recipes::bake()` to create your own step function in a package. \n:::\n\n## Run the example\n\nLet's use the example data to make sure that it works: \n\n```{r}\n#| label: \"example\"\n#| eval: false\nrec_obj <- \n recipe(HHV ~ ., data = biomass_tr) %>%\n step_percentile(ends_with(\"gen\")) %>%\n prep(training = biomass_tr)\n\nbiomass_te %>% select(ends_with(\"gen\")) %>% slice(1:2)\nbake(rec_obj, biomass_te %>% slice(1:2), ends_with(\"gen\"))\n\n# Checking to get approximate result: \nmean(biomass_tr$hydrogen <= biomass_te$hydrogen[1])\nmean(biomass_tr$oxygen <= biomass_te$oxygen[1])\n```\n\nThe plot below shows how the original hydrogen percentiles line up with the estimated values:\n\n```{r}\n#| label: \"cdf_plot\"\n#| eval: false\nhydrogen_values <- \n bake(rec_obj, biomass_te, hydrogen) %>% \n bind_cols(biomass_te %>% select(original = hydrogen))\n\nggplot(biomass_tr, aes(x = hydrogen)) + \n # Plot the empirical distribution function of the \n # hydrogen training set values as a black line\n stat_ecdf() + \n # Overlay the estimated percentiles for the new data: \n geom_point(data = hydrogen_values, \n aes(x = original, y = hydrogen), \n col = \"red\", alpha = .5, cex = 2) + \n labs(x = \"New Hydrogen Values\", y = \"Percentile Based on Training Set\")\n```\n\nThese line up very nicely! \n\n## Custom check operations \n\nThe process here is exactly the same as steps; the internal functions have a similar naming convention: \n\n * `add_check()` instead of `add_step()`\n * `check()` instead of `step()`, and so on. \n \nIt is strongly recommended that:\n \n 1. The operations start with `check_` (i.e. `check_range()` and `check_range_new()`)\n 1. The check uses `rlang::abort(paste0(...))` when the conditions are not met\n 1. The original data are returned (unaltered) by the check when the conditions are satisfied. \n\n## Other step methods\n\nThere are a few other S3 methods that can be created for your step function. They are not required unless you plan on using your step in the broader tidymodels package set. \n\n### A print method\n\nIf you don't add a print method for `step_percentile`, it will still print but it will be printed as a list of (potentially large) objects and look a bit ugly. The recipes package contains a helper function called `printer()` that should be useful in most cases. We are using it here for the custom print method for `step_percentile`. It requires the original terms specification and the column names this specification is evaluated to by `prep()`. For the former, our step object is structured so that the list object `ref_dist` has the names of the selected variables: \n\n```{r}\n#| label: \"print-method\"\n#| eval: false\nprint.step_percentile <-\n function(x, width = max(20, options()$width - 35), ...) {\n cat(\"Percentile transformation on \", sep = \"\")\n printer(\n # Names before prep (could be selectors)\n untr_obj = x$terms,\n # Names after prep:\n tr_obj = names(x$ref_dist),\n # Has it been prepped? \n trained = x$trained,\n # An estimate of how many characters to print on a line: \n width = width\n )\n invisible(x)\n }\n\n# Results before `prep()`:\nrecipe(HHV ~ ., data = biomass_tr) %>%\n step_percentile(ends_with(\"gen\"))\n\n# Results after `prep()`: \nrec_obj\n```\n \n### Methods for declaring required packages\n\nSome recipe steps use functions from other packages. When this is the case, the `step_*()` function should check to see if the package is installed. The function `recipes::recipes_pkg_check()` will do this. For example: \n\n```\n> recipes::recipes_pkg_check(\"some_package\")\n1 package is needed for this step and is not installed. (some_package). Start \na clean R session then run: install.packages(\"some_package\")\n```\n\nThere is an S3 method that can be used to declare what packages should be loaded when using the step. For a hypothetical step that relies on the `hypothetical` package, this might look like: \n\n```{r}\n#| eval: false\nrequired_pkgs.step_hypothetical <- function(x, ...) {\n c(\"hypothetical\", \"myrecipespkg\")\n}\n```\n\nIn this example, `myrecipespkg` is the package where the step resides (if it is in a package).\n\nThe reason to declare what packages should be loaded is parallel processing. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a recipe step is not loaded in the worker processes, the `prep()` methods cannot be found and an error occurs. \n\nIf this S3 method is used for your step, you can rely on this for checking the installation: \n \n```{r}\n#| eval: false\nrecipes::recipes_pkg_check(required_pkgs.step_hypothetical())\n``` \n\nIf you'd like an example of this in a package, please take a look at the [embed](https://github.com/tidymodels/embed/) or [themis](https://github.com/tidymodels/themis/) package.\n\n### A tidy method\n\nThe `broom::tidy()` method is a means to return information about the step in a usable format. For our step, it would be helpful to know the reference values. \n\nWhen the recipe has been prepped, those data are in the list `ref_dist`. A small function can be used to reformat that data into a tibble. It is customary to return the main values as `value`:\n\n```{r}\n#| label: \"tidy-calcs\"\n#| eval: false\nformat_pctl <- function(x) {\n tibble::tibble(\n value = unname(x),\n percentile = as.numeric(gsub(\"%$\", \"\", names(x))) \n )\n}\n\n# For example: \npctl_step_object <- rec_obj$steps[[1]]\npctl_step_object\nformat_pctl(pctl_step_object$ref_dist[[\"hydrogen\"]])\n```\n\nThe tidy method could return these values for each selected column. Before `prep()`, missing values can be used as placeholders. \n\n```{r}\n#| label: \"tidy\"\n#| eval: false\ntidy.step_percentile <- function(x, ...) {\n if (is_trained(x)) {\n res <- map_dfr(x$ref_dist, format_pctl, .id = \"term\")\n }\n else {\n term_names <- sel2char(x$terms)\n res <-\n tibble(\n terms = term_names,\n value = rlang::na_dbl,\n percentile = rlang::na_dbl\n )\n }\n # Always return the step id: \n res$id <- x$id\n res\n}\n\ntidy(rec_obj, number = 1)\n```\n\n### Methods for tuning parameters\n\nThe tune package can be used to find reasonable values of step arguments by model tuning. There are some S3 methods that are useful to define for your step. The percentile example doesn't really have any tunable parameters, so we will demonstrate using `step_poly()`, which returns a polynomial expansion of selected columns. Its function definition has the arguments: \n\n```{r}\n#| label: \"poly-args\"\n#| eval: false\nargs(step_poly)\n```\n\nThe argument `degree` is tunable.\n\nTo work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. \n\n`tunable()` takes the step object as its argument and returns a tibble with columns: \n\n* `name`: The name of the argument. \n\n* `call_info`: A list that describes how to call a function that returns a dials parameter object. \n\n* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `\"recipe\"`. \n\n* `component`: A character string with more information about the source. For recipes, this is just the name of the step (e.g. `\"step_poly\"`). \n\n* `component_id`: A character string to indicate where a unique identifier is for the object. For recipes, this is just the `id` value of the step object. \n\nThe main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. \n\nFor example, for a nearest-neighbors `neighbors` parameter, this value is just: \n\n```{r}\n#| label: \"mtry\"\n#| eval: false\ninfo <- list(pkg = \"dials\", fun = \"neighbors\")\n\n# FYI: how it is used under-the-hood: \nnew_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)\nrlang::eval_tidy(new_param_call)\n```\n\nFor `step_poly()`, a dials object is needed that returns an integer that is the number of new columns to create. It turns out that there are a few different types of tuning parameters related to degree: \n\n```r\n> lsf.str(\"package:dials\", pattern = \"degree\")\ndegree : function (range = c(1, 3), trans = NULL) \ndegree_int : function (range = c(1L, 3L), trans = NULL) \nprod_degree : function (range = c(1L, 2L), trans = NULL) \nspline_degree : function (range = c(3L, 10L), trans = NULL) \n```\n\nLooking at the `range` values, some return doubles and others return integers. For our problem, `degree_int()` would be a good choice. \n\nFor `step_poly()` the `tunable()` S3 method could be: \n\n```{r}\n#| label: \"tunable\"\n#| eval: false\ntunable.step_poly <- function (x, ...) {\n tibble::tibble(\n name = c(\"degree\"),\n call_info = list(list(pkg = \"dials\", fun = \"degree_int\")),\n source = \"recipe\",\n component = \"step_poly\",\n component_id = x$id\n )\n}\n```\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"ex_setup\"\n#| include: false\nlibrary(tidymodels)\nlibrary(modeldata)\npkgs <- c(\"tidymodels\", \"modeldata\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nThere are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages [can be found here](/find/recipes/). However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for [centering](https://github.com/tidymodels/recipes/blob/master/R/center.R) or [PCA](https://github.com/tidymodels/recipes/blob/master/R/pca.R) to start. \n\nFor check operations (e.g. `check_class()`), the process is very similar. Notes on this are available at the end of this article. \n\nThe general process to follow is to:\n\n1. Define a step constructor function.\n\n2. Create the minimal S3 methods for `prep()`, `bake()`, and `print()`. \n\n3. Optionally add some extra methods to work with other tidymodels packages, such as `tunable()` and `tidy()`. \n\nAs an example, we will create a step for converting data into percentiles. \n\n## A new step definition\n\nLet's create a step that replaces the value of a variable with its percentile from the training set. The example data we'll use is from the modeldata package:\n\n```{r}\n#| label: \"initial\"\nlibrary(modeldata)\ndata(biomass)\nstr(biomass)\n\nbiomass_tr <- biomass[biomass$dataset == \"Training\",]\nbiomass_te <- biomass[biomass$dataset == \"Testing\",]\n```\n\nTo illustrate the transformation with the `carbon` variable, note the training set distribution of this variable with a vertical line below for the first value of the test set. \n\n```{r}\n#| label: \"carbon_dist\"\n#| fig-width: 6\n#| fig-height: 4.25\n#| out-width: \"100%\"\nlibrary(ggplot2)\ntheme_set(theme_bw())\nggplot(biomass_tr, aes(x = carbon)) + \n geom_histogram(binwidth = 5, col = \"blue\", fill = \"blue\", alpha = .5) + \n geom_vline(xintercept = biomass_te$carbon[1], lty = 2)\n```\n\nBased on the training set, `r round(mean(biomass_tr$carbon <= biomass_te$carbon[1])*100, 1)`% of the data are less than a value of `r biomass_te$carbon[1]`. There are some applications where it might be advantageous to represent the predictor values as percentiles rather than their original values. \n\nOur new step will do this computation for any numeric variables of interest. We will call this new recipe step `step_percentile()`. The code below is designed for illustration and not speed or best practices. We've left out a lot of error trapping that we would want in a real implementation. \n\n## Create the function\n\nTo start, there is a _user-facing_ function. Let's call that `step_percentile()`. This is just a simple wrapper around a _constructor function_, which defines the rules for any step object that defines a percentile transformation. We'll call this constructor `step_percentile_new()`. \n\nThe function `step_percentile()` takes the same arguments as your function and simply adds it to a new recipe. The `...` signifies the variable selectors that can be used.\n\n```{r}\n#| label: \"initial_def\"\nstep_percentile <- function(\n recipe, \n ..., \n role = NA, \n trained = FALSE, \n ref_dist = NULL,\n options = list(probs = (0:100)/100, names = TRUE),\n skip = FALSE,\n id = rand_id(\"percentile\")\n ) {\n\n ## The variable selectors are not immediately evaluated by using\n ## the `quos()` function in `rlang`. `ellipse_check()` captures \n ## the values and also checks to make sure that they are not empty. \n terms <- ellipse_check(...) \n\n add_step(\n recipe, \n step_percentile_new(\n terms = terms, \n trained = trained,\n role = role, \n ref_dist = ref_dist,\n options = options,\n skip = skip,\n id = id\n )\n )\n}\n```\n\nYou should always keep the first four arguments (`recipe` though `trained`) the same as listed above. Some notes:\n\n * the `role` argument is used when you either 1) create new variables and want their role to be pre-set or 2) replace the existing variables with new values. The latter is what we will be doing and using `role = NA` will leave the existing role intact. \n * `trained` is set by the package when the estimation step has been run. You should default your function definition's argument to `FALSE`. \n * `skip` is a logical. Whenever a recipe is prepped, each step is trained and then baked. However, there are some steps that should not be applied when a call to `bake()` is used. For example, if a step is applied to the variables with roles of \"outcomes\", these data would not be available for new samples. \n * `id` is a character string that can be used to identify steps in package code. `rand_id()` will create an ID that has the prefix and a random character sequence. \n\nWe can estimate the percentiles of new data points based on the percentiles from the training set with `approx()`. Our `step_percentile` contains a `ref_dist` object to store these percentiles (pre-computed from the training set in `prep()`) for later use in `bake()`.\n\nWe will use `stats::quantile()` to compute the grid. However, we might also want to have control over the granularity of this grid, so the `options` argument will be used to define how that calculation is done. We could use the ellipses (aka `...`) so that any options passed to `step_percentile()` that are not one of its arguments will then be passed to `stats::quantile()`. However, we recommend making a separate list object with the options and use these inside the function because `...` is already used to define the variable selection. \n\nIt is also important to consider if there are any _main arguments_ to the step. For example, for spline-related steps such as `step_ns()`, users typically want to adjust the argument for the degrees of freedom in the spline (e.g. `splines::ns(x, df)`). Rather than letting users add `df` to the `options` argument: \n\n* Allow the important arguments to be main arguments to the step function. \n\n* Follow the tidymodels [conventions for naming arguments](https://tidymodels.github.io/model-implementation-principles/standardized-argument-names.html). Whenever possible, avoid jargon and keep common argument names. \n\nThere are benefits to following these principles (as shown below). \n\n## Initialize a new object\n\nNow, the constructor function can be created.\n\nThe function cascade is: \n\n```\nstep_percentile() calls recipes::add_step()\n└──> recipes::add_step() calls step_percentile_new()\n └──> step_percentile_new() calls recipes::step()\n```\n\n`step()` is a general constructor for recipes that mainly makes sure that the resulting step object is a list with an appropriate S3 class structure. Using `subclass = \"percentile\"` will set the class of new objects to `\"step_percentile\"`. \n\n```{r}\n#| label: \"initialize\"\nstep_percentile_new <- \n function(terms, role, trained, ref_dist, options, skip, id) {\n step(\n subclass = \"percentile\", \n terms = terms,\n role = role,\n trained = trained,\n ref_dist = ref_dist,\n options = options,\n skip = skip,\n id = id\n )\n }\n```\n\nThis constructor function should have no default argument values. Defaults should be set in the user-facing step object. \n\n## Create the `prep` method\n\nYou will need to create a new `prep()` method for your step's class. To do this, three arguments that the method should have are:\n\n```r\nfunction(x, training, info = NULL)\n```\n\nwhere\n\n * `x` will be the `step_percentile` object,\n * `training` will be a _tibble_ that has the training set data, and\n * `info` will also be a tibble that has information on the current set of data available. This information is updated as each step is evaluated by its specific `prep()` method so it may not have the variables from the original data. The columns in this tibble are `variable` (the variable name), `type` (currently either \"numeric\" or \"nominal\"), `role` (defining the variable's role), and `source` (either \"original\" or \"derived\" depending on where it originated).\n\nYou can define other arguments as well. \n\nThe first thing that you might want to do in the `prep()` function is to translate the specification listed in the `terms` argument to column names in the current data. There is a function called `recipes_eval_select()` that can be used to obtain this. \n\n::: {.callout-warning}\n The `recipes_eval_select()` function is not one you interact with as a typical recipes user, but it is helpful if you develop your own custom recipe steps. \n:::\n\n```{r}\n#| label: \"prep_1\"\n#| eval: false\nprep.step_percentile <- function(x, training, info = NULL, ...) {\n col_names <- recipes_eval_select(x$terms, training, info) \n # TODO finish the rest of the function\n}\n```\n\nAfter this function call, it is a good idea to check that the selected columns have the appropriate type (e.g. numeric for this example). See `recipes::check_type()` to do this for basic types. \n\nOnce we have this, we can save the approximation grid. For the grid, we will use a helper function that enables us to run `rlang::exec()` to splice in any extra arguments contained in the `options` list to the call to `quantile()`: \n\n```{r}\n#| label: \"splice\"\nget_train_pctl <- function(x, args = NULL) {\n res <- rlang::exec(\"quantile\", x = x, !!!args)\n # Remove duplicate percentile values\n res[!duplicated(res)]\n}\n\n# For example:\nget_train_pctl(biomass_tr$carbon, list(probs = 0:1))\nget_train_pctl(biomass_tr$carbon)\n```\n\nNow, the `prep()` method can be created: \n\n```{r}\n#| label: \"prep-2\"\nprep.step_percentile <- function(x, training, info = NULL, ...) {\n col_names <- recipes_eval_select(x$terms, training, info)\n ## You can add error trapping for non-numeric data here and so on. \n \n ## We'll use the names later so make sure they are available\n if (x$options$names == FALSE) {\n rlang::abort(\"`names` should be set to TRUE\")\n }\n \n if (!any(names(x$options) == \"probs\")) {\n x$options$probs <- (0:100)/100\n } else {\n x$options$probs <- sort(unique(x$options$probs))\n }\n \n # Compute percentile grid\n ref_dist <- purrr::map(training[, col_names], get_train_pctl, args = x$options)\n\n ## Use the constructor function to return the updated object. \n ## Note that `trained` is now set to TRUE\n \n step_percentile_new(\n terms = x$terms, \n trained = TRUE,\n role = x$role, \n ref_dist = ref_dist,\n options = x$options,\n skip = x$skip,\n id = x$id\n )\n}\n```\n\nWe suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former can be used for better traceback results.\n\n\n## Create the `bake` method\n\nRemember that the `prep()` function does not _apply_ the step to the data; it only estimates any required values such as `ref_dist`. We will need to create a new method for our `step_percentile()` class. The minimum arguments for this are\n\n```r\nfunction(object, new_data, ...)\n```\n\nwhere `object` is the updated step function that has been through the corresponding `prep()` code and `new_data` is a tibble of data to be processed. \n\nHere is the code to convert the new data to percentiles. The input data (`x` below) comes in as a numeric vector and the output is a vector of approximate percentiles: \n\n```{r}\n#| label: \"bake-helpers\"\npctl_by_approx <- function(x, ref) {\n # In case duplicates were removed, get the percentiles from\n # the names of the reference object\n grid <- as.numeric(gsub(\"%$\", \"\", names(ref))) \n approx(x = ref, y = grid, xout = x)$y/100\n}\n```\n\nThese computations are done column-wise using `purrr::map2_dfc()` to modify the new data in-place:\n\n```{r}\n#| label: \"bake-method\"\nbake.step_percentile <- function(object, new_data, ...) {\n ## For illustration (and not speed), we will loop through the affected variables\n ## and do the computations\n vars <- names(object$ref_dist)\n \n new_data[, vars] <-\n purrr::map2_dfc(new_data[, vars], object$ref_dist, pctl_by_approx)\n \n ## Always convert to tibbles on the way out\n tibble::as_tibble(new_data)\n}\n```\n\n::: {.callout-note}\nYou need to import `recipes::prep()` and `recipes::bake()` to create your own step function in a package. \n:::\n\n## Run the example\n\nLet's use the example data to make sure that it works: \n\n```{r}\n#| label: \"example\"\n#| eval: false\nrec_obj <- \n recipe(HHV ~ ., data = biomass_tr) %>%\n step_percentile(ends_with(\"gen\")) %>%\n prep(training = biomass_tr)\n\nbiomass_te %>% select(ends_with(\"gen\")) %>% slice(1:2)\nbake(rec_obj, biomass_te %>% slice(1:2), ends_with(\"gen\"))\n\n# Checking to get approximate result: \nmean(biomass_tr$hydrogen <= biomass_te$hydrogen[1])\nmean(biomass_tr$oxygen <= biomass_te$oxygen[1])\n```\n\nThe plot below shows how the original hydrogen percentiles line up with the estimated values:\n\n```{r}\n#| label: \"cdf_plot\"\n#| eval: false\nhydrogen_values <- \n bake(rec_obj, biomass_te, hydrogen) %>% \n bind_cols(biomass_te %>% select(original = hydrogen))\n\nggplot(biomass_tr, aes(x = hydrogen)) + \n # Plot the empirical distribution function of the \n # hydrogen training set values as a black line\n stat_ecdf() + \n # Overlay the estimated percentiles for the new data: \n geom_point(data = hydrogen_values, \n aes(x = original, y = hydrogen), \n col = \"red\", alpha = .5, cex = 2) + \n labs(x = \"New Hydrogen Values\", y = \"Percentile Based on Training Set\")\n```\n\nThese line up very nicely! \n\n## Custom check operations \n\nThe process here is exactly the same as steps; the internal functions have a similar naming convention: \n\n * `add_check()` instead of `add_step()`\n * `check()` instead of `step()`, and so on. \n \nIt is strongly recommended that:\n \n 1. The operations start with `check_` (i.e. `check_range()` and `check_range_new()`)\n 1. The check uses `rlang::abort(paste0(...))` when the conditions are not met\n 1. The original data are returned (unaltered) by the check when the conditions are satisfied. \n\n## Other step methods\n\nThere are a few other S3 methods that can be created for your step function. They are not required unless you plan on using your step in the broader tidymodels package set. \n\n### A print method\n\nIf you don't add a print method for `step_percentile`, it will still print but it will be printed as a list of (potentially large) objects and look a bit ugly. The recipes package contains a helper function called `printer()` that should be useful in most cases. We are using it here for the custom print method for `step_percentile`. It requires the original terms specification and the column names this specification is evaluated to by `prep()`. For the former, our step object is structured so that the list object `ref_dist` has the names of the selected variables: \n\n```{r}\n#| label: \"print-method\"\n#| eval: false\nprint.step_percentile <-\n function(x, width = max(20, options()$width - 35), ...) {\n cat(\"Percentile transformation on \", sep = \"\")\n printer(\n # Names before prep (could be selectors)\n untr_obj = x$terms,\n # Names after prep:\n tr_obj = names(x$ref_dist),\n # Has it been prepped? \n trained = x$trained,\n # An estimate of how many characters to print on a line: \n width = width\n )\n invisible(x)\n }\n\n# Results before `prep()`:\nrecipe(HHV ~ ., data = biomass_tr) %>%\n step_percentile(ends_with(\"gen\"))\n\n# Results after `prep()`: \nrec_obj\n```\n \n### Methods for declaring required packages\n\nSome recipe steps use functions from other packages. When this is the case, the `step_*()` function should check to see if the package is installed. The function `recipes::recipes_pkg_check()` will do this. For example: \n\n```\n> recipes::recipes_pkg_check(\"some_package\")\n1 package is needed for this step and is not installed. (some_package). Start \na clean R session then run: install.packages(\"some_package\")\n```\n\nThere is an S3 method that can be used to declare what packages should be loaded when using the step. For a hypothetical step that relies on the `hypothetical` package, this might look like: \n\n```{r}\n#| eval: false\nrequired_pkgs.step_hypothetical <- function(x, ...) {\n c(\"hypothetical\", \"myrecipespkg\")\n}\n```\n\nIn this example, `myrecipespkg` is the package where the step resides (if it is in a package).\n\nThe reason to declare what packages should be loaded is parallel processing. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a recipe step is not loaded in the worker processes, the `prep()` methods cannot be found and an error occurs. \n\nIf this S3 method is used for your step, you can rely on this for checking the installation: \n \n```{r}\n#| eval: false\nrecipes::recipes_pkg_check(required_pkgs.step_hypothetical())\n``` \n\nIf you'd like an example of this in a package, please take a look at the [embed](https://github.com/tidymodels/embed/) or [themis](https://github.com/tidymodels/themis/) package.\n\n### A tidy method\n\nThe `broom::tidy()` method is a means to return information about the step in a usable format. For our step, it would be helpful to know the reference values. \n\nWhen the recipe has been prepped, those data are in the list `ref_dist`. A small function can be used to reformat that data into a tibble. It is customary to return the main values as `value`:\n\n```{r}\n#| label: \"tidy-calcs\"\n#| eval: false\nformat_pctl <- function(x) {\n tibble::tibble(\n value = unname(x),\n percentile = as.numeric(gsub(\"%$\", \"\", names(x))) \n )\n}\n\n# For example: \npctl_step_object <- rec_obj$steps[[1]]\npctl_step_object\nformat_pctl(pctl_step_object$ref_dist[[\"hydrogen\"]])\n```\n\nThe tidy method could return these values for each selected column. Before `prep()`, missing values can be used as placeholders. \n\n```{r}\n#| label: \"tidy\"\n#| eval: false\ntidy.step_percentile <- function(x, ...) {\n if (is_trained(x)) {\n res <- map_dfr(x$ref_dist, format_pctl, .id = \"term\")\n }\n else {\n term_names <- sel2char(x$terms)\n res <-\n tibble(\n terms = term_names,\n value = rlang::na_dbl,\n percentile = rlang::na_dbl\n )\n }\n # Always return the step id: \n res$id <- x$id\n res\n}\n\ntidy(rec_obj, number = 1)\n```\n\n### Methods for tuning parameters\n\nThe tune package can be used to find reasonable values of step arguments by model tuning. There are some S3 methods that are useful to define for your step. The percentile example doesn't really have any tunable parameters, so we will demonstrate using `step_poly()`, which returns a polynomial expansion of selected columns. Its function definition has the arguments: \n\n```{r}\n#| label: \"poly-args\"\n#| eval: false\nargs(step_poly)\n```\n\nThe argument `degree` is tunable.\n\nTo work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. \n\n`tunable()` takes the step object as its argument and returns a tibble with columns: \n\n* `name`: The name of the argument. \n\n* `call_info`: A list that describes how to call a function that returns a dials parameter object. \n\n* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `\"recipe\"`. \n\n* `component`: A character string with more information about the source. For recipes, this is just the name of the step (e.g. `\"step_poly\"`). \n\n* `component_id`: A character string to indicate where a unique identifier is for the object. For recipes, this is just the `id` value of the step object. \n\nThe main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. \n\nFor example, for a nearest-neighbors `neighbors` parameter, this value is just: \n\n```{r}\n#| label: \"mtry\"\n#| eval: false\ninfo <- list(pkg = \"dials\", fun = \"neighbors\")\n\n# FYI: how it is used under-the-hood: \nnew_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)\nrlang::eval_tidy(new_param_call)\n```\n\nFor `step_poly()`, a dials object is needed that returns an integer that is the number of new columns to create. It turns out that there are a few different types of tuning parameters related to degree: \n\n```r\n> lsf.str(\"package:dials\", pattern = \"degree\")\ndegree : function (range = c(1, 3), trans = NULL) \ndegree_int : function (range = c(1L, 3L), trans = NULL) \nprod_degree : function (range = c(1L, 2L), trans = NULL) \nspline_degree : function (range = c(3L, 10L), trans = NULL) \n```\n\nLooking at the `range` values, some return doubles and others return integers. For our problem, `degree_int()` would be a good choice. \n\nFor `step_poly()` the `tunable()` S3 method could be: \n\n```{r}\n#| label: \"tunable\"\n#| eval: false\ntunable.step_poly <- function (x, ...) {\n tibble::tibble(\n name = c(\"degree\"),\n call_info = list(list(pkg = \"dials\", fun = \"degree_int\")),\n source = \"recipe\",\n component = \"step_poly\",\n component_id = x$id\n )\n}\n```\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Create your own recipe step function","categories":["developer tools"],"type":"learn-subsection","weight":1,"description":"Write a new recipe step for data preprocessing.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/index.qmd.json b/.quarto/idx/learn/index.qmd.json new file mode 100644 index 00000000..5177826a --- /dev/null +++ b/.quarto/idx/learn/index.qmd.json @@ -0,0 +1 @@ +{"title":"Learn","markdown":{"yaml":{"title":"Learn","description":"Learn how to go farther with tidymodels in your modeling and machine learning projects.","icon_attribution":"Icons made by [Becris](https://www.flaticon.com/authors/becris) from www.flaticon.com","listing":[{"id":"articles-links","categories":"unnumbered","type":"grid","page-size":40,"contents":["*.qmd","../start/models/index.qmd","../start/recipes/index.qmd","../start/resampling/index.qmd","../start/tuning/index.qmd","../start/case-study/index.qmd"]}]},"containsRefs":false,"markdown":"\n\nAfter you know [what you need to get started](/start/) with tidymodels, you can learn more and go further. Find articles here to help you solve specific problems using the tidymodels framework. \n \n","srcMarkdownNoYaml":"\n\nAfter you know [what you need to get started](/start/) with tidymodels, you can learn more and go further. Find articles here to help you solve specific problems using the tidymodels framework. \n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../styles.scss","../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Learn","description":"Learn how to go farther with tidymodels in your modeling and machine learning projects.","icon_attribution":"Icons made by [Becris](https://www.flaticon.com/authors/becris) from www.flaticon.com","listing":[{"id":"articles-links","categories":"unnumbered","type":"grid","page-size":40,"contents":["*.qmd","../start/models/index.qmd","../start/recipes/index.qmd","../start/resampling/index.qmd","../start/tuning/index.qmd","../start/case-study/index.qmd"]}]},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/models/coefficients/index.qmd.json b/.quarto/idx/learn/models/coefficients/index.qmd.json new file mode 100644 index 00000000..4b300ecb --- /dev/null +++ b/.quarto/idx/learn/models/coefficients/index.qmd.json @@ -0,0 +1 @@ +{"title":"Working with model coefficients","markdown":{"yaml":{"title":"Working with model coefficients","categories":["model fitting","tidying results","linear regression","model tuning"],"type":"learn-subsection","weight":5,"description":"Create models that use coefficients, extract them from fitted models, and visualize them.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\npkgs <- c(\"tidymodels\", \"glmnet\")\nlibrary(Matrix)\nlibrary(glmnet)\n```\n\n\nThere are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients. \n\nThis article describes how to retrieve the estimated coefficients from models fit using tidymodels. `r article_req_pkgs(pkgs)`\n\n## Linear regression\n\nLet's start with a linear regression model: \n\n$$\\hat{y} = \\hat{\\beta}_0 + \\hat{\\beta}_1x_1 + \\ldots + \\hat{\\beta}_px_p$$ \n\nThe $\\beta$ values are the coefficients and the $x_j$ are model predictors, or features. \n\nLet's use the [Chicago train data](https://bookdown.org/max/FES/chicago-intro.html) where we predict the ridership at the Clark and Lake station (column name: `ridership`) with the previous ridership data 14 days prior at three of the stations. \n\nThe data are in the modeldata package: \n\n```{r}\n#| label: \"setup-tm\"\n#| message: false\n#| warning: false\nlibrary(tidymodels)\ntidymodels_prefer()\ntheme_set(theme_bw())\n\ndata(Chicago)\n\nChicago <- Chicago %>% select(ridership, Clark_Lake, Austin, Harlem)\n```\n\n### A single model\n\nLet's start by fitting only a single parsnip model object. We'll create a model specification using `linear_reg()`. \n\n::: {.callout-note}\nThe default engine is `\"lm\"` so no call to `set_engine()` is required. \n:::\n\nThe `fit()` function estimates the model coefficients, given a formula and data set. \n\n\n```{r}\n#| label: \"lm-single\"\nlm_spec <- linear_reg()\nlm_fit <- fit(lm_spec, ridership ~ ., data = Chicago)\nlm_fit\n```\n\nThe best way to retrieve the fitted parameters is to use the `tidy()` method. This function, in the broom package, returns the coefficients and their associated statistics in a data frame with standardized column names: \n\n```{r}\n#| label: \"lm-tidy\"\ntidy(lm_fit)\n```\n\nWe'll use this function in subsequent sections. \n\n### Resampled or tuned models\n\nThe tidymodels framework emphasizes the use of resampling methods to evaluate and characterize how well a model works. While time series resampling methods are appropriate for these data, we can also use the [bootstrap](https://www.tmwr.org/resampling.html#bootstrap) to resample the data. This is a standard resampling approach when evaluating the uncertainty in statistical estimates. \n\nWe'll use five bootstrap resamples of the data to simplify the plots and output (normally, we would use a larger number of resamples for more reliable estimates).\n\n```{r}\n#| label: \"bootstraps\"\nset.seed(123)\nbt <- bootstraps(Chicago, times = 5)\n```\n\nWith resampling, we fit the same model to the different simulated versions of the data set produced by resampling. The tidymodels function [`fit_resamples()`](https://www.tmwr.org/resampling.html#resampling-performance) is the recommended approach for doing so. \n\n::: {.callout-warning}\n The `fit_resamples()` function does not automatically save the model objects for each resample since these can be quite large and its main purpose is estimating performance. However, we can pass a function to `fit_resamples()` that _can_ save the model object or any other aspect of the fit. \n:::\n\nThis function takes a single argument that represents the fitted [workflow object](https://www.tmwr.org/workflows.html) (even if you don't give `fit_resamples()` a workflow).\n\nFrom this, we can extract the model fit. There are two \"levels\" of model objects that are available: \n\n* The parsnip model object, which wraps the underlying model object. We retrieve this using the `extract_fit_parsnip()` function. \n\n* The underlying model object (a.k.a. the engine fit) via the `extract_fit_engine()`. \n\nWe'll use the latter option and then tidy this model object as we did in the previous section. Let's add this to the control function so that we can re-use it. \n\n```{r}\n#| label: \"lm-ctrl\"\nget_lm_coefs <- function(x) {\n x %>% \n # get the lm model object\n extract_fit_engine() %>% \n # transform its format\n tidy()\n}\ntidy_ctrl <- control_grid(extract = get_lm_coefs)\n```\n\nThis argument is then passed to `fit_resamples()`:\n\n```{r}\n#| label: \"lm-resampled\"\nlm_res <- \n lm_spec %>% \n fit_resamples(ridership ~ ., resamples = bt, control = tidy_ctrl)\nlm_res\n```\n\nNote that there is a `.extracts` column in our resampling results. This object contains the output of our `get_lm_coefs()` function for each resample. The structure of the elements of this column is a little complex. Let's start by looking at the first element (which corresponds to the first resample): \n\n\n```{r}\n#| label: \"lm-extract-ex\"\nlm_res$.extracts[[1]]\n```\n\nThere is _another_ column in this element called `.extracts` that has the results of the `tidy()` function call: \n\n```{r}\n#| label: \"lm-extract-again\"\nlm_res$.extracts[[1]]$.extracts[[1]]\n```\n\nThese nested columns can be flattened via the purrr `unnest()` function: \n\n```{r}\n#| label: \"lm-extract-almost\"\nlm_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) \n```\n\nWe still have a column of nested tibbles, so we can run the same command again to get the data into a more useful format: \n\n```{r}\n#| label: \"lm-extract-final\"\nlm_coefs <- \n lm_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n unnest(.extracts)\n\nlm_coefs %>% select(id, term, estimate, p.value)\n```\n\nThat's better! Now, let's plot the model coefficients for each resample: \n\n```{r}\n#| label: \"lm-plot\"\nlm_coefs %>%\n filter(term != \"(Intercept)\") %>% \n ggplot(aes(x = term, y = estimate, group = id, col = id)) + \n geom_hline(yintercept = 0, lty = 3) + \n geom_line(alpha = 0.3, lwd = 1.2) + \n labs(y = \"Coefficient\", x = NULL) +\n theme(legend.position = \"top\")\n```\n\nThere seems to be a lot of uncertainty in the coefficient for the Austin station data, but less for the other two. \n\nLooking at the code for unnesting the results, you may find the double-nesting structure excessive or cumbersome. However, the extraction functionality is flexible, and a simpler structure would prevent many use cases. \n\n## More complex: a glmnet model\n\nThe glmnet model can fit the same linear regression model structure shown above. It uses regularization (a.k.a penalization) to estimate the model parameters. This has the benefit of shrinking the coefficients towards zero, important in situations where there are strong correlations between predictors or if some feature selection is required. Both of these cases are true for our Chicago train data set. \n\nThere are two types of penalization that this model uses: \n\n* Lasso (a.k.a. $L_1$) penalties can shrink the model terms so much that they are absolute zero (i.e. their effect is entirely removed from the model). \n\n* Weight decay (a.k.a ridge regression or $L_2$) uses a different type of penalty that is most useful for highly correlated predictors. \n\nThe glmnet model has two primary tuning parameters, the total amount of penalization and the mixture of the two penalty types. For example, this specification:\n\n```{r}\n#| label: \"glmnet-spec\"\nglmnet_spec <- \n linear_reg(penalty = 0.1, mixture = 0.95) %>% \n set_engine(\"glmnet\")\n```\n\nhas a penalty that is 95% lasso and 5% weight decay. The total amount of these two penalties is 0.1 (which is fairly high). \n\n::: {.callout-note}\nModels with regularization require that predictors are all on the same scale. The ridership at our three stations are very different, but glmnet [automatically centers and scales the data](https://parsnip.tidymodels.org/reference/details_linear_reg_glmnet.html). You can use recipes to [center and scale your data yourself](https://recipes.tidymodels.org/reference/step_normalize.html). \n:::\n\nLet's combine the model specification with a formula in a model `workflow()` and then fit the model to the data:\n\n```{r}\n#| label: \"glmnet-wflow\"\nglmnet_wflow <- \n workflow() %>% \n add_model(glmnet_spec) %>% \n add_formula(ridership ~ .)\n\nglmnet_fit <- fit(glmnet_wflow, Chicago)\nglmnet_fit\n```\n\nIn this output, the term `lambda` is used to represent the penalty. \n\nNote that the output shows many values of the penalty despite our specification of `penalty = 0.1`. It turns out that this model fits a \"path\" of penalty values. Even though we are interested in a value of 0.1, we can get the model coefficients for many associated values of the penalty from the same model object. \n\nLet's look at two different approaches to obtaining the coefficients. Both will use the `tidy()` method. One will tidy a glmnet object and the other will tidy a tidymodels object. \n\n### Using glmnet penalty values\n\nThis glmnet fit contains multiple penalty values which depend on the data set; changing the data (or the mixture amount) often produces a different set of values. For this data set, there are `r length(extract_fit_engine(glmnet_fit)$lambda)` penalties available. To get the set of penalties produced for this data set, we can extract the engine fit and tidy: \n\n```{r}\n#| label: \"glmnet-tidy\"\nglmnet_fit %>% \n extract_fit_engine() %>% \n tidy() %>% \n rename(penalty = lambda) %>% # <- for consistent naming\n filter(term != \"(Intercept)\")\n```\n\nThis works well but, it turns out that our penalty value (0.1) is not in the list produced by the model! The underlying package has functions that use interpolation to produce coefficients for this specific value, but the `tidy()` method for glmnet objects does not use it. \n\n### Using specific penalty values\n\nIf we run the `tidy()` method on the workflow or parsnip object, a different function is used that returns the coefficients for the penalty value that we specified: \n\n```{r}\n#| label: \"glmnet-tidy-parsnip\"\ntidy(glmnet_fit)\n```\n\nFor any another (single) penalty, we can use an additional argument:\n\n```{r}\n#| label: \"glmnet-tidy-parsnip-alt\"\ntidy(glmnet_fit, penalty = 5.5620) # A value from above\n```\n\nThe reason for having two `tidy()` methods is that, with tidymodels, the focus is on using a specific penalty value. \n\n\n### Tuning a glmnet model\n\nIf we know a priori acceptable values for penalty and mixture, we can use the `fit_resamples()` function as we did before with linear regression. Otherwise, we can tune those parameters with the tidymodels `tune_*()` functions. \n\nLet's tune our glmnet model over both parameters with this grid: \n\n```{r}\n#| label: \"glmnet-grid\"\npen_vals <- 10^seq(-3, 0, length.out = 10)\ngrid <- crossing(penalty = pen_vals, mixture = c(0.1, 1.0))\n```\n\nHere is where more glmnet-related complexity comes in: we know that each resample and each value of `mixture` will probably produce a different set of penalty values contained in the model object. _How can we look at the coefficients at the specific penalty values that we are using to tune?_\n\nThe approach that we suggest is to use the special `path_values` option for glmnet. Details are described in the [technical documentation about glmnet and tidymodels](https://parsnip.tidymodels.org/reference/glmnet-details.html#arguments) but in short, this parameter will assign the collection of penalty values used by each glmnet fit (regardless of the data or value of mixture). \n\nWe can pass these as an engine argument and then update our previous workflow object:\n\n```{r}\n#| label: \"glmnet-tune\"\nglmnet_tune_spec <- \n linear_reg(penalty = tune(), mixture = tune()) %>% \n set_engine(\"glmnet\", path_values = pen_vals)\n\nglmnet_wflow <- \n glmnet_wflow %>% \n update_model(glmnet_tune_spec)\n```\n\nNow we will use an extraction function similar to when we used ordinary least squares. We add an additional argument to retain coefficients that are shrunk to zero by the lasso penalty: \n\n```{r}\n#| label: \"glmnet-tuning\"\nget_glmnet_coefs <- function(x) {\n x %>% \n extract_fit_engine() %>% \n tidy(return_zeros = TRUE) %>% \n rename(penalty = lambda)\n}\nparsnip_ctrl <- control_grid(extract = get_glmnet_coefs)\n\nglmnet_res <- \n glmnet_wflow %>% \n tune_grid(\n resamples = bt,\n grid = grid,\n control = parsnip_ctrl\n )\nglmnet_res\n```\n\nAs noted before, the elements of the main `.extracts` column have an embedded list column with the results of `get_glmnet_coefs()`: \n\n```{r}\n#| label: \"glmnet-extract-single\"\nglmnet_res$.extracts[[1]] %>% head()\n\nglmnet_res$.extracts[[1]]$.extracts[[1]] %>% head()\n```\n\nAs before, we'll have to use a double `unnest()`. Since the penalty value is in both the top-level and lower-level `.extracts`, we'll use `select()` to get rid of the first version (but keep `mixture`):\n\n```{r}\n#| label: \"glmnet-extract-1\"\n#| eval: false\nglmnet_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n select(id, mixture, .extracts) %>% # <- removes the first penalty column\n unnest(.extracts)\n```\n\nBut wait! We know that each glmnet fit contains all of the coefficients. This means, for a specific resample and value of `mixture`, the results are the same: \n\n```{r}\n#| label: \"glmnet-extract-dups\"\nall.equal(\n # First bootstrap, first `mixture`, first `penalty`\n glmnet_res$.extracts[[1]]$.extracts[[1]],\n # First bootstrap, first `mixture`, second `penalty`\n glmnet_res$.extracts[[1]]$.extracts[[2]]\n)\n```\n\nFor this reason, we'll add a `slice(1)` when grouping by `id` and `mixture`. This will get rid of the replicated results. \n\n```{r}\n#| label: \"glmnet-extract-final\"\nglmnet_coefs <- \n glmnet_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n select(id, mixture, .extracts) %>% \n group_by(id, mixture) %>% # ┐\n slice(1) %>% # │ Remove the redundant results\n ungroup() %>% # ┘\n unnest(.extracts)\n\nglmnet_coefs %>% \n select(id, penalty, mixture, term, estimate) %>% \n filter(term != \"(Intercept)\")\n```\n\nNow we have the coefficients. Let's look at how they behave as more regularization is used: \n\n```{r}\n#| label: \"glmnet-plot\"\n#| fig-height: 4\n#| fig-width: 8.5\nglmnet_coefs %>% \n filter(term != \"(Intercept)\") %>% \n mutate(mixture = format(mixture)) %>% \n ggplot(aes(x = penalty, y = estimate, col = mixture, groups = id)) + \n geom_hline(yintercept = 0, lty = 3) +\n geom_line(alpha = 0.5, lwd = 1.2) + \n facet_wrap(~ term) + \n scale_x_log10() +\n scale_color_brewer(palette = \"Accent\") +\n labs(y = \"coefficient\") +\n theme(legend.position = \"top\")\n```\n\nNotice a couple of things: \n\n* With a pure lasso model (i.e., `mixture = 1`), the Austin station predictor is selected out in each resample. With a mixture of both penalties, its influence increases. Also, as the penalty increases, the uncertainty in this coefficient decreases. \n\n* The Harlem predictor is either quickly selected out of the model or goes from negative to positive. \n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\npkgs <- c(\"tidymodels\", \"glmnet\")\nlibrary(Matrix)\nlibrary(glmnet)\n```\n\n## Introduction \n\nThere are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients. \n\nThis article describes how to retrieve the estimated coefficients from models fit using tidymodels. `r article_req_pkgs(pkgs)`\n\n## Linear regression\n\nLet's start with a linear regression model: \n\n$$\\hat{y} = \\hat{\\beta}_0 + \\hat{\\beta}_1x_1 + \\ldots + \\hat{\\beta}_px_p$$ \n\nThe $\\beta$ values are the coefficients and the $x_j$ are model predictors, or features. \n\nLet's use the [Chicago train data](https://bookdown.org/max/FES/chicago-intro.html) where we predict the ridership at the Clark and Lake station (column name: `ridership`) with the previous ridership data 14 days prior at three of the stations. \n\nThe data are in the modeldata package: \n\n```{r}\n#| label: \"setup-tm\"\n#| message: false\n#| warning: false\nlibrary(tidymodels)\ntidymodels_prefer()\ntheme_set(theme_bw())\n\ndata(Chicago)\n\nChicago <- Chicago %>% select(ridership, Clark_Lake, Austin, Harlem)\n```\n\n### A single model\n\nLet's start by fitting only a single parsnip model object. We'll create a model specification using `linear_reg()`. \n\n::: {.callout-note}\nThe default engine is `\"lm\"` so no call to `set_engine()` is required. \n:::\n\nThe `fit()` function estimates the model coefficients, given a formula and data set. \n\n\n```{r}\n#| label: \"lm-single\"\nlm_spec <- linear_reg()\nlm_fit <- fit(lm_spec, ridership ~ ., data = Chicago)\nlm_fit\n```\n\nThe best way to retrieve the fitted parameters is to use the `tidy()` method. This function, in the broom package, returns the coefficients and their associated statistics in a data frame with standardized column names: \n\n```{r}\n#| label: \"lm-tidy\"\ntidy(lm_fit)\n```\n\nWe'll use this function in subsequent sections. \n\n### Resampled or tuned models\n\nThe tidymodels framework emphasizes the use of resampling methods to evaluate and characterize how well a model works. While time series resampling methods are appropriate for these data, we can also use the [bootstrap](https://www.tmwr.org/resampling.html#bootstrap) to resample the data. This is a standard resampling approach when evaluating the uncertainty in statistical estimates. \n\nWe'll use five bootstrap resamples of the data to simplify the plots and output (normally, we would use a larger number of resamples for more reliable estimates).\n\n```{r}\n#| label: \"bootstraps\"\nset.seed(123)\nbt <- bootstraps(Chicago, times = 5)\n```\n\nWith resampling, we fit the same model to the different simulated versions of the data set produced by resampling. The tidymodels function [`fit_resamples()`](https://www.tmwr.org/resampling.html#resampling-performance) is the recommended approach for doing so. \n\n::: {.callout-warning}\n The `fit_resamples()` function does not automatically save the model objects for each resample since these can be quite large and its main purpose is estimating performance. However, we can pass a function to `fit_resamples()` that _can_ save the model object or any other aspect of the fit. \n:::\n\nThis function takes a single argument that represents the fitted [workflow object](https://www.tmwr.org/workflows.html) (even if you don't give `fit_resamples()` a workflow).\n\nFrom this, we can extract the model fit. There are two \"levels\" of model objects that are available: \n\n* The parsnip model object, which wraps the underlying model object. We retrieve this using the `extract_fit_parsnip()` function. \n\n* The underlying model object (a.k.a. the engine fit) via the `extract_fit_engine()`. \n\nWe'll use the latter option and then tidy this model object as we did in the previous section. Let's add this to the control function so that we can re-use it. \n\n```{r}\n#| label: \"lm-ctrl\"\nget_lm_coefs <- function(x) {\n x %>% \n # get the lm model object\n extract_fit_engine() %>% \n # transform its format\n tidy()\n}\ntidy_ctrl <- control_grid(extract = get_lm_coefs)\n```\n\nThis argument is then passed to `fit_resamples()`:\n\n```{r}\n#| label: \"lm-resampled\"\nlm_res <- \n lm_spec %>% \n fit_resamples(ridership ~ ., resamples = bt, control = tidy_ctrl)\nlm_res\n```\n\nNote that there is a `.extracts` column in our resampling results. This object contains the output of our `get_lm_coefs()` function for each resample. The structure of the elements of this column is a little complex. Let's start by looking at the first element (which corresponds to the first resample): \n\n\n```{r}\n#| label: \"lm-extract-ex\"\nlm_res$.extracts[[1]]\n```\n\nThere is _another_ column in this element called `.extracts` that has the results of the `tidy()` function call: \n\n```{r}\n#| label: \"lm-extract-again\"\nlm_res$.extracts[[1]]$.extracts[[1]]\n```\n\nThese nested columns can be flattened via the purrr `unnest()` function: \n\n```{r}\n#| label: \"lm-extract-almost\"\nlm_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) \n```\n\nWe still have a column of nested tibbles, so we can run the same command again to get the data into a more useful format: \n\n```{r}\n#| label: \"lm-extract-final\"\nlm_coefs <- \n lm_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n unnest(.extracts)\n\nlm_coefs %>% select(id, term, estimate, p.value)\n```\n\nThat's better! Now, let's plot the model coefficients for each resample: \n\n```{r}\n#| label: \"lm-plot\"\nlm_coefs %>%\n filter(term != \"(Intercept)\") %>% \n ggplot(aes(x = term, y = estimate, group = id, col = id)) + \n geom_hline(yintercept = 0, lty = 3) + \n geom_line(alpha = 0.3, lwd = 1.2) + \n labs(y = \"Coefficient\", x = NULL) +\n theme(legend.position = \"top\")\n```\n\nThere seems to be a lot of uncertainty in the coefficient for the Austin station data, but less for the other two. \n\nLooking at the code for unnesting the results, you may find the double-nesting structure excessive or cumbersome. However, the extraction functionality is flexible, and a simpler structure would prevent many use cases. \n\n## More complex: a glmnet model\n\nThe glmnet model can fit the same linear regression model structure shown above. It uses regularization (a.k.a penalization) to estimate the model parameters. This has the benefit of shrinking the coefficients towards zero, important in situations where there are strong correlations between predictors or if some feature selection is required. Both of these cases are true for our Chicago train data set. \n\nThere are two types of penalization that this model uses: \n\n* Lasso (a.k.a. $L_1$) penalties can shrink the model terms so much that they are absolute zero (i.e. their effect is entirely removed from the model). \n\n* Weight decay (a.k.a ridge regression or $L_2$) uses a different type of penalty that is most useful for highly correlated predictors. \n\nThe glmnet model has two primary tuning parameters, the total amount of penalization and the mixture of the two penalty types. For example, this specification:\n\n```{r}\n#| label: \"glmnet-spec\"\nglmnet_spec <- \n linear_reg(penalty = 0.1, mixture = 0.95) %>% \n set_engine(\"glmnet\")\n```\n\nhas a penalty that is 95% lasso and 5% weight decay. The total amount of these two penalties is 0.1 (which is fairly high). \n\n::: {.callout-note}\nModels with regularization require that predictors are all on the same scale. The ridership at our three stations are very different, but glmnet [automatically centers and scales the data](https://parsnip.tidymodels.org/reference/details_linear_reg_glmnet.html). You can use recipes to [center and scale your data yourself](https://recipes.tidymodels.org/reference/step_normalize.html). \n:::\n\nLet's combine the model specification with a formula in a model `workflow()` and then fit the model to the data:\n\n```{r}\n#| label: \"glmnet-wflow\"\nglmnet_wflow <- \n workflow() %>% \n add_model(glmnet_spec) %>% \n add_formula(ridership ~ .)\n\nglmnet_fit <- fit(glmnet_wflow, Chicago)\nglmnet_fit\n```\n\nIn this output, the term `lambda` is used to represent the penalty. \n\nNote that the output shows many values of the penalty despite our specification of `penalty = 0.1`. It turns out that this model fits a \"path\" of penalty values. Even though we are interested in a value of 0.1, we can get the model coefficients for many associated values of the penalty from the same model object. \n\nLet's look at two different approaches to obtaining the coefficients. Both will use the `tidy()` method. One will tidy a glmnet object and the other will tidy a tidymodels object. \n\n### Using glmnet penalty values\n\nThis glmnet fit contains multiple penalty values which depend on the data set; changing the data (or the mixture amount) often produces a different set of values. For this data set, there are `r length(extract_fit_engine(glmnet_fit)$lambda)` penalties available. To get the set of penalties produced for this data set, we can extract the engine fit and tidy: \n\n```{r}\n#| label: \"glmnet-tidy\"\nglmnet_fit %>% \n extract_fit_engine() %>% \n tidy() %>% \n rename(penalty = lambda) %>% # <- for consistent naming\n filter(term != \"(Intercept)\")\n```\n\nThis works well but, it turns out that our penalty value (0.1) is not in the list produced by the model! The underlying package has functions that use interpolation to produce coefficients for this specific value, but the `tidy()` method for glmnet objects does not use it. \n\n### Using specific penalty values\n\nIf we run the `tidy()` method on the workflow or parsnip object, a different function is used that returns the coefficients for the penalty value that we specified: \n\n```{r}\n#| label: \"glmnet-tidy-parsnip\"\ntidy(glmnet_fit)\n```\n\nFor any another (single) penalty, we can use an additional argument:\n\n```{r}\n#| label: \"glmnet-tidy-parsnip-alt\"\ntidy(glmnet_fit, penalty = 5.5620) # A value from above\n```\n\nThe reason for having two `tidy()` methods is that, with tidymodels, the focus is on using a specific penalty value. \n\n\n### Tuning a glmnet model\n\nIf we know a priori acceptable values for penalty and mixture, we can use the `fit_resamples()` function as we did before with linear regression. Otherwise, we can tune those parameters with the tidymodels `tune_*()` functions. \n\nLet's tune our glmnet model over both parameters with this grid: \n\n```{r}\n#| label: \"glmnet-grid\"\npen_vals <- 10^seq(-3, 0, length.out = 10)\ngrid <- crossing(penalty = pen_vals, mixture = c(0.1, 1.0))\n```\n\nHere is where more glmnet-related complexity comes in: we know that each resample and each value of `mixture` will probably produce a different set of penalty values contained in the model object. _How can we look at the coefficients at the specific penalty values that we are using to tune?_\n\nThe approach that we suggest is to use the special `path_values` option for glmnet. Details are described in the [technical documentation about glmnet and tidymodels](https://parsnip.tidymodels.org/reference/glmnet-details.html#arguments) but in short, this parameter will assign the collection of penalty values used by each glmnet fit (regardless of the data or value of mixture). \n\nWe can pass these as an engine argument and then update our previous workflow object:\n\n```{r}\n#| label: \"glmnet-tune\"\nglmnet_tune_spec <- \n linear_reg(penalty = tune(), mixture = tune()) %>% \n set_engine(\"glmnet\", path_values = pen_vals)\n\nglmnet_wflow <- \n glmnet_wflow %>% \n update_model(glmnet_tune_spec)\n```\n\nNow we will use an extraction function similar to when we used ordinary least squares. We add an additional argument to retain coefficients that are shrunk to zero by the lasso penalty: \n\n```{r}\n#| label: \"glmnet-tuning\"\nget_glmnet_coefs <- function(x) {\n x %>% \n extract_fit_engine() %>% \n tidy(return_zeros = TRUE) %>% \n rename(penalty = lambda)\n}\nparsnip_ctrl <- control_grid(extract = get_glmnet_coefs)\n\nglmnet_res <- \n glmnet_wflow %>% \n tune_grid(\n resamples = bt,\n grid = grid,\n control = parsnip_ctrl\n )\nglmnet_res\n```\n\nAs noted before, the elements of the main `.extracts` column have an embedded list column with the results of `get_glmnet_coefs()`: \n\n```{r}\n#| label: \"glmnet-extract-single\"\nglmnet_res$.extracts[[1]] %>% head()\n\nglmnet_res$.extracts[[1]]$.extracts[[1]] %>% head()\n```\n\nAs before, we'll have to use a double `unnest()`. Since the penalty value is in both the top-level and lower-level `.extracts`, we'll use `select()` to get rid of the first version (but keep `mixture`):\n\n```{r}\n#| label: \"glmnet-extract-1\"\n#| eval: false\nglmnet_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n select(id, mixture, .extracts) %>% # <- removes the first penalty column\n unnest(.extracts)\n```\n\nBut wait! We know that each glmnet fit contains all of the coefficients. This means, for a specific resample and value of `mixture`, the results are the same: \n\n```{r}\n#| label: \"glmnet-extract-dups\"\nall.equal(\n # First bootstrap, first `mixture`, first `penalty`\n glmnet_res$.extracts[[1]]$.extracts[[1]],\n # First bootstrap, first `mixture`, second `penalty`\n glmnet_res$.extracts[[1]]$.extracts[[2]]\n)\n```\n\nFor this reason, we'll add a `slice(1)` when grouping by `id` and `mixture`. This will get rid of the replicated results. \n\n```{r}\n#| label: \"glmnet-extract-final\"\nglmnet_coefs <- \n glmnet_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n select(id, mixture, .extracts) %>% \n group_by(id, mixture) %>% # ┐\n slice(1) %>% # │ Remove the redundant results\n ungroup() %>% # ┘\n unnest(.extracts)\n\nglmnet_coefs %>% \n select(id, penalty, mixture, term, estimate) %>% \n filter(term != \"(Intercept)\")\n```\n\nNow we have the coefficients. Let's look at how they behave as more regularization is used: \n\n```{r}\n#| label: \"glmnet-plot\"\n#| fig-height: 4\n#| fig-width: 8.5\nglmnet_coefs %>% \n filter(term != \"(Intercept)\") %>% \n mutate(mixture = format(mixture)) %>% \n ggplot(aes(x = penalty, y = estimate, col = mixture, groups = id)) + \n geom_hline(yintercept = 0, lty = 3) +\n geom_line(alpha = 0.5, lwd = 1.2) + \n facet_wrap(~ term) + \n scale_x_log10() +\n scale_color_brewer(palette = \"Accent\") +\n labs(y = \"coefficient\") +\n theme(legend.position = \"top\")\n```\n\nNotice a couple of things: \n\n* With a pure lasso model (i.e., `mixture = 1`), the Austin station predictor is selected out in each resample. With a mixture of both penalties, its influence increases. Also, as the penalty increases, the uncertainty in this coefficient decreases. \n\n* The Harlem predictor is either quickly selected out of the model or goes from negative to positive. \n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Working with model coefficients","categories":["model fitting","tidying results","linear regression","model tuning"],"type":"learn-subsection","weight":5,"description":"Create models that use coefficients, extract them from fitted models, and visualize them.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/models/parsnip-nnet/index.qmd.json b/.quarto/idx/learn/models/parsnip-nnet/index.qmd.json new file mode 100644 index 00000000..38f3cb76 --- /dev/null +++ b/.quarto/idx/learn/models/parsnip-nnet/index.qmd.json @@ -0,0 +1 @@ +{"title":"Classification models using a neural network","markdown":{"yaml":{"title":"Classification models using a neural network","categories":["model fitting","torch","neural networks"],"type":"learn-subsection","weight":2,"description":"Train a classification model and evaluate its performance.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n \n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\", \"brulee\", \"AppliedPredictiveModeling\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\n`r article_req_pkgs(pkgs)` You will also need the python torch library installed (see `?torch::install_torch()`).\n\nWe can create classification models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict categorical quantities or class labels. Here, let's fit a single classification model using a neural network and evaluate using a validation set. While the [tune](https://tune.tidymodels.org/) package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage. \n\n## Fitting a neural network\n\n\nLet's fit a model to a small, two predictor classification data set. The data are in the modeldata package (part of tidymodels) and have been split into training, validation, and test data sets. In this analysis, the test set is left untouched; this article tries to emulate a good data usage methodology where the test set would only be evaluated once at the end after a variety of models have been considered. \n\n\n```{r}\n#| label: \"biv--split\"\nlibrary(AppliedPredictiveModeling)\n\nset.seed(321)\ncls_train <- quadBoundaryFunc(2000) %>% select(A = X1, B = X2, class)\ncls_val <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)\ncls_test <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)\n```\n\nA plot of the data shows two right-skewed predictors: \n\n```{r}\n#| label: \"biv-plot\"\n#| fig-width: 6\n#| fig-height: 6.1\nggplot(cls_train, aes(x = A, y = B, col = class)) + \n geom_point(alpha = 1 / 4, cex = 3) + \n coord_fixed()\n```\n\nLet's use a single hidden layer neural network to predict the outcome. To do this, we transform the predictor columns to be more symmetric (via the `step_BoxCox()` function) and on a common scale (using `step_normalize()`). We can use [recipes](https://recipes.tidymodels.org/) to do so:\n\n```{r}\n#| label: \"biv--proc\"\nbiv_rec <- \n recipe(class ~ ., data = cls_train) %>%\n step_normalize(all_predictors())\n```\n\nThis recipe is not directly executed; the steps will be estimated when the model is fit. \n\nWe can use the brulee package to fit a model with 5 hidden units and a 10% dropout rate, to regularize the model:\n\n```{r}\n#| label: \"biv-nnet\"\nnnet_spec <- \n mlp(epochs = 1000, hidden_units = 10, penalty = 0.01, learn_rate = 0.1) %>% \n set_engine(\"brulee\", validation = 0) %>% \n set_mode(\"classification\")\n\nnnet_wflow <- \n biv_rec %>% \n workflow(nnet_spec)\n\nset.seed(987)\nnnet_fit <- fit(nnet_wflow, cls_train)\nnnet_fit %>% extract_fit_engine()\n```\n\n## Model performance\n\nIn parsnip, the `predict()` function can be used to characterize performance on the validation set. Since parsnip always produces tibble outputs, these can just be column bound to the original data: \n\n```{r}\n#| label: \"biv--perf\"\nval_results <- \n cls_val %>%\n bind_cols(\n predict(nnet_fit, new_data = cls_val),\n predict(nnet_fit, new_data = cls_val, type = \"prob\")\n )\nval_results %>% slice(1:5)\n\nval_results %>% roc_auc(truth = class, .pred_Class1)\n\nval_results %>% accuracy(truth = class, .pred_class)\n\nval_results %>% conf_mat(truth = class, .pred_class)\n```\n\nLet's also create a grid to get a visual sense of the class boundary for the test set.\n\n```{r}\n#| label: \"biv-boundary\"\n#| fig-width: 6\n#| fig-height: 6.1\na_rng <- range(cls_train$A)\nb_rng <- range(cls_train$B)\nx_grid <-\n expand.grid(A = seq(a_rng[1], a_rng[2], length.out = 100),\n B = seq(b_rng[1], b_rng[2], length.out = 100))\n\n\n# Make predictions using the transformed predictors but \n# attach them to the predictors in the original units: \nx_grid <- \n x_grid %>% \n bind_cols(predict(nnet_fit, x_grid, type = \"prob\"))\n\nggplot(x_grid, aes(x = A, y = B)) + \n geom_point(data = cls_test, aes(col = class), alpha = 1 / 2, cex = 3) +\n geom_contour(aes(z = .pred_Class1), breaks = .5, col = \"black\", linewidth = 1) + \n coord_fixed()\n```\n\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n \n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\", \"brulee\", \"AppliedPredictiveModeling\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n## Introduction\n\n`r article_req_pkgs(pkgs)` You will also need the python torch library installed (see `?torch::install_torch()`).\n\nWe can create classification models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict categorical quantities or class labels. Here, let's fit a single classification model using a neural network and evaluate using a validation set. While the [tune](https://tune.tidymodels.org/) package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage. \n\n## Fitting a neural network\n\n\nLet's fit a model to a small, two predictor classification data set. The data are in the modeldata package (part of tidymodels) and have been split into training, validation, and test data sets. In this analysis, the test set is left untouched; this article tries to emulate a good data usage methodology where the test set would only be evaluated once at the end after a variety of models have been considered. \n\n\n```{r}\n#| label: \"biv--split\"\nlibrary(AppliedPredictiveModeling)\n\nset.seed(321)\ncls_train <- quadBoundaryFunc(2000) %>% select(A = X1, B = X2, class)\ncls_val <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)\ncls_test <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)\n```\n\nA plot of the data shows two right-skewed predictors: \n\n```{r}\n#| label: \"biv-plot\"\n#| fig-width: 6\n#| fig-height: 6.1\nggplot(cls_train, aes(x = A, y = B, col = class)) + \n geom_point(alpha = 1 / 4, cex = 3) + \n coord_fixed()\n```\n\nLet's use a single hidden layer neural network to predict the outcome. To do this, we transform the predictor columns to be more symmetric (via the `step_BoxCox()` function) and on a common scale (using `step_normalize()`). We can use [recipes](https://recipes.tidymodels.org/) to do so:\n\n```{r}\n#| label: \"biv--proc\"\nbiv_rec <- \n recipe(class ~ ., data = cls_train) %>%\n step_normalize(all_predictors())\n```\n\nThis recipe is not directly executed; the steps will be estimated when the model is fit. \n\nWe can use the brulee package to fit a model with 5 hidden units and a 10% dropout rate, to regularize the model:\n\n```{r}\n#| label: \"biv-nnet\"\nnnet_spec <- \n mlp(epochs = 1000, hidden_units = 10, penalty = 0.01, learn_rate = 0.1) %>% \n set_engine(\"brulee\", validation = 0) %>% \n set_mode(\"classification\")\n\nnnet_wflow <- \n biv_rec %>% \n workflow(nnet_spec)\n\nset.seed(987)\nnnet_fit <- fit(nnet_wflow, cls_train)\nnnet_fit %>% extract_fit_engine()\n```\n\n## Model performance\n\nIn parsnip, the `predict()` function can be used to characterize performance on the validation set. Since parsnip always produces tibble outputs, these can just be column bound to the original data: \n\n```{r}\n#| label: \"biv--perf\"\nval_results <- \n cls_val %>%\n bind_cols(\n predict(nnet_fit, new_data = cls_val),\n predict(nnet_fit, new_data = cls_val, type = \"prob\")\n )\nval_results %>% slice(1:5)\n\nval_results %>% roc_auc(truth = class, .pred_Class1)\n\nval_results %>% accuracy(truth = class, .pred_class)\n\nval_results %>% conf_mat(truth = class, .pred_class)\n```\n\nLet's also create a grid to get a visual sense of the class boundary for the test set.\n\n```{r}\n#| label: \"biv-boundary\"\n#| fig-width: 6\n#| fig-height: 6.1\na_rng <- range(cls_train$A)\nb_rng <- range(cls_train$B)\nx_grid <-\n expand.grid(A = seq(a_rng[1], a_rng[2], length.out = 100),\n B = seq(b_rng[1], b_rng[2], length.out = 100))\n\n\n# Make predictions using the transformed predictors but \n# attach them to the predictors in the original units: \nx_grid <- \n x_grid %>% \n bind_cols(predict(nnet_fit, x_grid, type = \"prob\"))\n\nggplot(x_grid, aes(x = A, y = B)) + \n geom_point(data = cls_test, aes(col = class), alpha = 1 / 2, cex = 3) +\n geom_contour(aes(z = .pred_Class1), breaks = .5, col = \"black\", linewidth = 1) + \n coord_fixed()\n```\n\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Classification models using a neural network","categories":["model fitting","torch","neural networks"],"type":"learn-subsection","weight":2,"description":"Train a classification model and evaluate its performance.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/models/parsnip-ranger-glmnet/index.qmd.json b/.quarto/idx/learn/models/parsnip-ranger-glmnet/index.qmd.json new file mode 100644 index 00000000..3e8b2081 --- /dev/null +++ b/.quarto/idx/learn/models/parsnip-ranger-glmnet/index.qmd.json @@ -0,0 +1 @@ +{"title":"Regression models two ways","markdown":{"yaml":{"title":"Regression models two ways","categories":["model fitting","random forests","linear regression"],"type":"learn-subsection","weight":1,"description":"Create and train different kinds of regression models with different computational engines.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(ranger)\nlibrary(randomForest)\nlibrary(glmnet)\n\npreds <- c(\"Longitude\", \"Latitude\", \"Lot_Area\", \"Neighborhood\", \"Year_Sold\")\npred_names <- paste0(\"`\", preds, \"`\")\n\npkgs <- c(\"tidymodels\", \"ranger\", \"randomForest\", \"glmnet\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\n`r article_req_pkgs(pkgs)`\n\nWe can create regression models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict continuous or numeric quantities. Here, let's first fit a random forest model, which does _not_ require all numeric input (see discussion [here](https://bookdown.org/max/FES/categorical-trees.html)) and discuss how to use `fit()` and `fit_xy()`, as well as _data descriptors_. \n\nSecond, let's fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip. \n\n## The Ames housing data\n\nWe'll use the Ames housing data set to demonstrate how to create regression models using parsnip. First, set up the data set and create a simple training/test set split:\n\n```{r}\n#| label: \"ames-split\"\nlibrary(tidymodels)\n\ndata(ames)\n\nset.seed(4595)\ndata_split <- initial_split(ames, strata = \"Sale_Price\", prop = 0.75)\n\names_train <- training(data_split)\names_test <- testing(data_split)\n```\n\nThe use of the test set here is _only for illustration_; normally in a data analysis these data would be saved to the very end after many models have been evaluated. \n\n## Random forest\n\nWe'll start by fitting a random forest model to a small set of parameters. Let's create a model with the predictors `r knitr::combine_words(pred_names)`. A simple random forest model can be specified via:\n\n```{r}\n#| label: \"rf-basic\"\nrf_defaults <- rand_forest(mode = \"regression\")\nrf_defaults\n```\n\nThe model will be fit with the ranger package by default. Since we didn't add any extra arguments to `fit`, _many_ of the arguments will be set to their defaults from the function `ranger::ranger()`. The help pages for the model function describe the default parameters and you can also use the `translate()` function to check out such details. \n\nThe parsnip package provides two different interfaces to fit a model: \n\n- the formula interface (`fit()`), and\n- the non-formula interface (`fit_xy()`).\n\nLet's start with the non-formula interface:\n\n\n```{r}\n#| label: \"rf-basic-xy\"\npreds <- c(\"Longitude\", \"Latitude\", \"Lot_Area\", \"Neighborhood\", \"Year_Sold\")\n\nrf_xy_fit <- \n rf_defaults %>%\n set_engine(\"ranger\") %>%\n fit_xy(\n x = ames_train[, preds],\n y = log10(ames_train$Sale_Price)\n )\n\nrf_xy_fit\n```\n\nThe non-formula interface doesn't do anything to the predictors before passing them to the underlying model function. This particular model does _not_ require indicator variables (sometimes called \"dummy variables\") to be created prior to fitting the model. Note that the output shows \"Number of independent variables: 5\".\n\nFor regression models, we can use the basic `predict()` method, which returns a tibble with a column named `.pred`:\n\n```{r}\n#| label: \"rf-basic-xy-pred\"\ntest_results <- \n ames_test %>%\n select(Sale_Price) %>%\n mutate(Sale_Price = log10(Sale_Price)) %>%\n bind_cols(\n predict(rf_xy_fit, new_data = ames_test[, preds])\n )\ntest_results %>% slice(1:5)\n\n# summarize performance\ntest_results %>% metrics(truth = Sale_Price, estimate = .pred) \n```\n\nNote that: \n\n * If the model required indicator variables, we would have to create them manually prior to using `fit()` (perhaps using the recipes package).\n * We had to manually log the outcome prior to modeling. \n\nNow, for illustration, let's use the formula method using some new parameter values:\n\n```{r}\n#| label: \"rf-basic-form\"\nrand_forest(mode = \"regression\", mtry = 3, trees = 1000) %>%\n set_engine(\"ranger\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n```\n \nSuppose that we would like to use the randomForest package instead of ranger. To do so, the only part of the syntax that needs to change is the `set_engine()` argument:\n\n\n```{r}\n#| label: \"rf-rf\"\nrand_forest(mode = \"regression\", mtry = 3, trees = 1000) %>%\n set_engine(\"randomForest\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n```\n\nLook at the formula code that was printed out; one function uses the argument name `ntree` and the other uses `num.trees`. The parsnip models don't require you to know the specific names of the main arguments. \n\nNow suppose that we want to modify the value of `mtry` based on the number of predictors in the data. Usually, a good default value is `floor(sqrt(num_predictors))` but a pure bagging model requires an `mtry` value equal to the total number of parameters. There may be cases where you may not know how many predictors are going to be present when the model will be fit (perhaps due to the generation of indicator variables or a variable filter) so this might be difficult to know exactly ahead of time when you write your code. \n\nWhen the model it being fit by parsnip, [_data descriptors_](https://parsnip.tidymodels.org/reference/descriptors.html) are made available. These attempt to let you know what you will have available when the model is fit. When a model object is created (say using `rand_forest()`), the values of the arguments that you give it are _immediately evaluated_ unless you delay them. To delay the evaluation of any argument, you can used `rlang::expr()` to make an expression. \n\nTwo relevant data descriptors for our example model are:\n\n * `.preds()`: the number of predictor _variables_ in the data set that are associated with the predictors **prior to dummy variable creation**.\n * `.cols()`: the number of predictor _columns_ after dummy variables (or other encodings) are created.\n\nSince ranger won't create indicator values, `.preds()` would be appropriate for `mtry` for a bagging model. \n\nFor example, let's use an expression with the `.preds()` descriptor to fit a bagging model: \n\n```{r}\n#| label: \"bagged\"\nrand_forest(mode = \"regression\", mtry = .preds(), trees = 1000) %>%\n set_engine(\"ranger\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n```\n\n\n## Regularized regression\n\nA linear model might work for this data set as well. We can use the `linear_reg()` parsnip model. There are two engines that can perform regularization/penalization, the glmnet and sparklyr packages. Let's use the former here. The glmnet package only implements a non-formula method, but parsnip will allow either one to be used. \n\nWhen regularization is used, the predictors should first be centered and scaled before being passed to the model. The formula method won't do that automatically so we will need to do this ourselves. We'll use the [recipes](https://recipes.tidymodels.org/) package for these steps. \n\n```{r}\n#| label: \"glmn-form\"\nnorm_recipe <- \n recipe(\n Sale_Price ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, \n data = ames_train\n ) %>%\n step_other(Neighborhood) %>% \n step_dummy(all_nominal()) %>%\n step_center(all_predictors()) %>%\n step_scale(all_predictors()) %>%\n step_log(Sale_Price, base = 10) %>% \n # estimate the means and standard deviations\n prep(training = ames_train, retain = TRUE)\n\n# Now let's fit the model using the processed version of the data\n\nglmn_fit <- \n linear_reg(penalty = 0.001, mixture = 0.5) %>% \n set_engine(\"glmnet\") %>%\n fit(Sale_Price ~ ., data = bake(norm_recipe, new_data = NULL))\nglmn_fit\n```\n\nIf `penalty` were not specified, all of the `lambda` values would be computed. \n\nTo get the predictions for this specific value of `lambda` (aka `penalty`):\n\n```{r}\n#| label: \"glmn-pred\"\n# First, get the processed version of the test set predictors:\ntest_normalized <- bake(norm_recipe, new_data = ames_test, all_predictors())\n\ntest_results <- \n test_results %>%\n rename(`random forest` = .pred) %>%\n bind_cols(\n predict(glmn_fit, new_data = test_normalized) %>%\n rename(glmnet = .pred)\n )\ntest_results\n\ntest_results %>% metrics(truth = Sale_Price, estimate = glmnet) \n\ntest_results %>% \n gather(model, prediction, -Sale_Price) %>% \n ggplot(aes(x = prediction, y = Sale_Price)) + \n geom_abline(col = \"green\", lty = 2) + \n geom_point(alpha = .4) + \n facet_wrap(~model) + \n coord_fixed()\n```\n\nThis final plot compares the performance of the random forest and regularized regression models.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(ranger)\nlibrary(randomForest)\nlibrary(glmnet)\n\npreds <- c(\"Longitude\", \"Latitude\", \"Lot_Area\", \"Neighborhood\", \"Year_Sold\")\npred_names <- paste0(\"`\", preds, \"`\")\n\npkgs <- c(\"tidymodels\", \"ranger\", \"randomForest\", \"glmnet\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nWe can create regression models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict continuous or numeric quantities. Here, let's first fit a random forest model, which does _not_ require all numeric input (see discussion [here](https://bookdown.org/max/FES/categorical-trees.html)) and discuss how to use `fit()` and `fit_xy()`, as well as _data descriptors_. \n\nSecond, let's fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip. \n\n## The Ames housing data\n\nWe'll use the Ames housing data set to demonstrate how to create regression models using parsnip. First, set up the data set and create a simple training/test set split:\n\n```{r}\n#| label: \"ames-split\"\nlibrary(tidymodels)\n\ndata(ames)\n\nset.seed(4595)\ndata_split <- initial_split(ames, strata = \"Sale_Price\", prop = 0.75)\n\names_train <- training(data_split)\names_test <- testing(data_split)\n```\n\nThe use of the test set here is _only for illustration_; normally in a data analysis these data would be saved to the very end after many models have been evaluated. \n\n## Random forest\n\nWe'll start by fitting a random forest model to a small set of parameters. Let's create a model with the predictors `r knitr::combine_words(pred_names)`. A simple random forest model can be specified via:\n\n```{r}\n#| label: \"rf-basic\"\nrf_defaults <- rand_forest(mode = \"regression\")\nrf_defaults\n```\n\nThe model will be fit with the ranger package by default. Since we didn't add any extra arguments to `fit`, _many_ of the arguments will be set to their defaults from the function `ranger::ranger()`. The help pages for the model function describe the default parameters and you can also use the `translate()` function to check out such details. \n\nThe parsnip package provides two different interfaces to fit a model: \n\n- the formula interface (`fit()`), and\n- the non-formula interface (`fit_xy()`).\n\nLet's start with the non-formula interface:\n\n\n```{r}\n#| label: \"rf-basic-xy\"\npreds <- c(\"Longitude\", \"Latitude\", \"Lot_Area\", \"Neighborhood\", \"Year_Sold\")\n\nrf_xy_fit <- \n rf_defaults %>%\n set_engine(\"ranger\") %>%\n fit_xy(\n x = ames_train[, preds],\n y = log10(ames_train$Sale_Price)\n )\n\nrf_xy_fit\n```\n\nThe non-formula interface doesn't do anything to the predictors before passing them to the underlying model function. This particular model does _not_ require indicator variables (sometimes called \"dummy variables\") to be created prior to fitting the model. Note that the output shows \"Number of independent variables: 5\".\n\nFor regression models, we can use the basic `predict()` method, which returns a tibble with a column named `.pred`:\n\n```{r}\n#| label: \"rf-basic-xy-pred\"\ntest_results <- \n ames_test %>%\n select(Sale_Price) %>%\n mutate(Sale_Price = log10(Sale_Price)) %>%\n bind_cols(\n predict(rf_xy_fit, new_data = ames_test[, preds])\n )\ntest_results %>% slice(1:5)\n\n# summarize performance\ntest_results %>% metrics(truth = Sale_Price, estimate = .pred) \n```\n\nNote that: \n\n * If the model required indicator variables, we would have to create them manually prior to using `fit()` (perhaps using the recipes package).\n * We had to manually log the outcome prior to modeling. \n\nNow, for illustration, let's use the formula method using some new parameter values:\n\n```{r}\n#| label: \"rf-basic-form\"\nrand_forest(mode = \"regression\", mtry = 3, trees = 1000) %>%\n set_engine(\"ranger\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n```\n \nSuppose that we would like to use the randomForest package instead of ranger. To do so, the only part of the syntax that needs to change is the `set_engine()` argument:\n\n\n```{r}\n#| label: \"rf-rf\"\nrand_forest(mode = \"regression\", mtry = 3, trees = 1000) %>%\n set_engine(\"randomForest\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n```\n\nLook at the formula code that was printed out; one function uses the argument name `ntree` and the other uses `num.trees`. The parsnip models don't require you to know the specific names of the main arguments. \n\nNow suppose that we want to modify the value of `mtry` based on the number of predictors in the data. Usually, a good default value is `floor(sqrt(num_predictors))` but a pure bagging model requires an `mtry` value equal to the total number of parameters. There may be cases where you may not know how many predictors are going to be present when the model will be fit (perhaps due to the generation of indicator variables or a variable filter) so this might be difficult to know exactly ahead of time when you write your code. \n\nWhen the model it being fit by parsnip, [_data descriptors_](https://parsnip.tidymodels.org/reference/descriptors.html) are made available. These attempt to let you know what you will have available when the model is fit. When a model object is created (say using `rand_forest()`), the values of the arguments that you give it are _immediately evaluated_ unless you delay them. To delay the evaluation of any argument, you can used `rlang::expr()` to make an expression. \n\nTwo relevant data descriptors for our example model are:\n\n * `.preds()`: the number of predictor _variables_ in the data set that are associated with the predictors **prior to dummy variable creation**.\n * `.cols()`: the number of predictor _columns_ after dummy variables (or other encodings) are created.\n\nSince ranger won't create indicator values, `.preds()` would be appropriate for `mtry` for a bagging model. \n\nFor example, let's use an expression with the `.preds()` descriptor to fit a bagging model: \n\n```{r}\n#| label: \"bagged\"\nrand_forest(mode = \"regression\", mtry = .preds(), trees = 1000) %>%\n set_engine(\"ranger\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n```\n\n\n## Regularized regression\n\nA linear model might work for this data set as well. We can use the `linear_reg()` parsnip model. There are two engines that can perform regularization/penalization, the glmnet and sparklyr packages. Let's use the former here. The glmnet package only implements a non-formula method, but parsnip will allow either one to be used. \n\nWhen regularization is used, the predictors should first be centered and scaled before being passed to the model. The formula method won't do that automatically so we will need to do this ourselves. We'll use the [recipes](https://recipes.tidymodels.org/) package for these steps. \n\n```{r}\n#| label: \"glmn-form\"\nnorm_recipe <- \n recipe(\n Sale_Price ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, \n data = ames_train\n ) %>%\n step_other(Neighborhood) %>% \n step_dummy(all_nominal()) %>%\n step_center(all_predictors()) %>%\n step_scale(all_predictors()) %>%\n step_log(Sale_Price, base = 10) %>% \n # estimate the means and standard deviations\n prep(training = ames_train, retain = TRUE)\n\n# Now let's fit the model using the processed version of the data\n\nglmn_fit <- \n linear_reg(penalty = 0.001, mixture = 0.5) %>% \n set_engine(\"glmnet\") %>%\n fit(Sale_Price ~ ., data = bake(norm_recipe, new_data = NULL))\nglmn_fit\n```\n\nIf `penalty` were not specified, all of the `lambda` values would be computed. \n\nTo get the predictions for this specific value of `lambda` (aka `penalty`):\n\n```{r}\n#| label: \"glmn-pred\"\n# First, get the processed version of the test set predictors:\ntest_normalized <- bake(norm_recipe, new_data = ames_test, all_predictors())\n\ntest_results <- \n test_results %>%\n rename(`random forest` = .pred) %>%\n bind_cols(\n predict(glmn_fit, new_data = test_normalized) %>%\n rename(glmnet = .pred)\n )\ntest_results\n\ntest_results %>% metrics(truth = Sale_Price, estimate = glmnet) \n\ntest_results %>% \n gather(model, prediction, -Sale_Price) %>% \n ggplot(aes(x = prediction, y = Sale_Price)) + \n geom_abline(col = \"green\", lty = 2) + \n geom_point(alpha = .4) + \n facet_wrap(~model) + \n coord_fixed()\n```\n\nThis final plot compares the performance of the random forest and regularized regression models.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Regression models two ways","categories":["model fitting","random forests","linear regression"],"type":"learn-subsection","weight":1,"description":"Create and train different kinds of regression models with different computational engines.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/models/pls/index.qmd.json b/.quarto/idx/learn/models/pls/index.qmd.json new file mode 100644 index 00000000..67c176d0 --- /dev/null +++ b/.quarto/idx/learn/models/pls/index.qmd.json @@ -0,0 +1 @@ +{"title":"Multivariate analysis using partial least squares","markdown":{"yaml":{"title":"Multivariate analysis using partial least squares","categories":["pre-processing","multivariate analysis","partial least squares"],"type":"learn-subsection","weight":6,"description":"Build and fit a predictive model with more than one outcome.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(pls)\nlibrary(tidymodels)\nlibrary(sessioninfo)\npkgs <- c(\"modeldata\", \"pls\", \"tidymodels\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\n`r article_req_pkgs(pkgs)`\n\n\"Multivariate analysis\" usually refers to multiple _outcomes_ being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns `y1` and `y2` representing two outcomes to be predicted. The `lm()` function would look something like:\n\n```{r}\n#| label: \"lm\"\n#| eval: false\nlm(cbind(y1, y2) ~ ., data = dat)\n```\n\nThis `cbind()` call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes. \n\nThe data that we'll use has three outcomes. From `?modeldata::meats`:\n\n> \"These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents.\n\n> \"For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is `-log10` of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry.\"\n\nThe goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here. \n\nTo start, let's take the two data matrices (called `endpoints` and `absorp`) and bind them together in a data frame:\n\n```{r}\n#| label: \"data\"\nlibrary(modeldata)\ndata(meats)\n```\n\nThe three _outcomes_ have fairly high correlations also. \n\n## Preprocessing the data\n\nIf the outcomes can be predicted using a linear model, partial least squares (PLS) is an ideal method. PLS models the data as a function of a set of unobserved _latent_ variables that are derived in a manner similar to principal component analysis (PCA). \n\nPLS, unlike PCA, also incorporates the outcome data when creating the PLS components. Like PCA, it tries to maximize the variance of the predictors that are explained by the components but it also tries to simultaneously maximize the correlation between those components and the outcomes. In this way, PLS _chases_ variation of the predictors and outcomes. \n\nSince we are working with variances and covariances, we need to standardize the data. The recipe will center and scale all of the variables. \n\nMany base R functions that deal with multivariate outcomes using a formula require the use of `cbind()` on the left-hand side of the formula to work with the traditional formula methods. In tidymodels, recipes do not; the outcomes can be symbolically \"added\" together on the left-hand side:\n\n```{r}\n#| label: \"recipe\"\nnorm_rec <- \n recipe(water + fat + protein ~ ., data = meats) %>%\n step_normalize(everything()) \n```\n\nBefore we can finalize the PLS model, the number of PLS components to retain must be determined. This can be done using performance metrics such as the root mean squared error. However, we can also calculate the proportion of variance explained by the components for the _predictors and each of the outcomes_. This allows an informed choice to be made based on the level of evidence that the situation requires. \n\nSince the data set isn't large, let's use resampling to measure these proportions. With ten repeats of 10-fold cross-validation, we build the PLS model on 90% of the data and evaluate on the heldout 10%. For each of the 100 models, we extract and save the proportions. \n\nThe folds can be created using the [rsample](https://rsample.tidymodels.org/) package and the recipe can be estimated for each resample using the [`prepper()`](https://rsample.tidymodels.org/reference/prepper.html) function: \n\n```{r}\n#| label: \"cv\"\nset.seed(57343)\nfolds <- vfold_cv(meats, repeats = 10)\n\nfolds <- \n folds %>%\n mutate(recipes = map(splits, prepper, recipe = norm_rec))\n```\n\n## Partial least squares\n\nThe complicated parts for moving forward are:\n\n1. Formatting the predictors and outcomes into the format that the pls package requires, and\n2. Estimating the proportions. \n\nFor the first part, the standardized outcomes and predictors need to be formatted into two separate matrices. Since we used `retain = TRUE` when prepping the recipes, we can `bake()` with `new_data = NULl` to get the processed data back out. To save the data as a matrix, the option `composition = \"matrix\"` will avoid saving the data as tibbles and use the required format. \n\nThe pls package expects a simple formula to specify the model, but each side of the formula should _represent a matrix_. In other words, we need a data set with two columns where each column is a matrix. The secret to doing this is to \"protect\" the two matrices using `I()` when adding them to the data frame.\n\nThe calculation for the proportion of variance explained is straightforward for the predictors; the function `pls::explvar()` will compute that. For the outcomes, the process is more complicated. A ready-made function to compute these is not obvious but there is some code inside of the summary function to do the computation (see below). \n\nThe function `get_var_explained()` shown here will do all these computations and return a data frame with columns `components`, `source` (for the predictors, water, etc), and the `proportion` of variance that is explained by the components. \n\n\n```{r}\n#| label: \"var-explained\"\nlibrary(pls)\n\nget_var_explained <- function(recipe, ...) {\n \n # Extract the predictors and outcomes into their own matrices\n y_mat <- bake(recipe, new_data = NULL, composition = \"matrix\", all_outcomes())\n x_mat <- bake(recipe, new_data = NULL, composition = \"matrix\", all_predictors())\n \n # The pls package prefers the data in a data frame where the outcome\n # and predictors are in _matrices_. To make sure this is formatted\n # properly, use the `I()` function to inhibit `data.frame()` from making\n # all the individual columns. `pls_format` should have two columns.\n pls_format <- data.frame(\n endpoints = I(y_mat),\n measurements = I(x_mat)\n )\n # Fit the model\n mod <- plsr(endpoints ~ measurements, data = pls_format)\n \n # Get the proportion of the predictor variance that is explained\n # by the model for different number of components. \n xve <- explvar(mod)/100 \n\n # To do the same for the outcome, it is more complex. This code \n # was extracted from pls:::summary.mvr. \n explained <- \n drop(pls::R2(mod, estimate = \"train\", intercept = FALSE)$val) %>% \n # transpose so that components are in rows\n t() %>% \n as_tibble() %>%\n # Add the predictor proportions\n mutate(predictors = cumsum(xve) %>% as.vector(),\n components = seq_along(xve)) %>%\n # Put into a tidy format that is tall\n pivot_longer(\n cols = c(-components),\n names_to = \"source\",\n values_to = \"proportion\"\n )\n}\n```\n\nWe compute this data frame for each resample and save the results in the different columns. \n\n```{r}\n#| label: \"get-estimates\"\nfolds <- \n folds %>%\n mutate(var = map(recipes, get_var_explained),\n var = unname(var))\n```\n\nTo extract and aggregate these data, simple row binding can be used to stack the data vertically. Most of the action happens in the first 15 components so let's filter the data and compute the _average_ proportion.\n\n```{r}\n#| label: \"collapse-and-average\"\nvariance_data <- \n bind_rows(folds[[\"var\"]]) %>%\n filter(components <= 15) %>%\n group_by(components, source) %>%\n summarize(proportion = mean(proportion))\n```\n\nThe plot below shows that, if the protein measurement is important, you might require 10 or so components to achieve a good representation of that outcome. Note that the predictor variance is captured extremely well using a single component. This is due to the high degree of correlation in those data. \n\n```{r}\n#| label: \"plot\"\n#| fig-width: 6\n#| fig-height: 4.25\n#| out-width: \"100%\"\nggplot(variance_data, aes(x = components, y = proportion, col = source)) + \n geom_line(alpha = 0.5, size = 1.2) + \n geom_point() \n```\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(pls)\nlibrary(tidymodels)\nlibrary(sessioninfo)\npkgs <- c(\"modeldata\", \"pls\", \"tidymodels\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\n\"Multivariate analysis\" usually refers to multiple _outcomes_ being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns `y1` and `y2` representing two outcomes to be predicted. The `lm()` function would look something like:\n\n```{r}\n#| label: \"lm\"\n#| eval: false\nlm(cbind(y1, y2) ~ ., data = dat)\n```\n\nThis `cbind()` call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes. \n\nThe data that we'll use has three outcomes. From `?modeldata::meats`:\n\n> \"These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents.\n\n> \"For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is `-log10` of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry.\"\n\nThe goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here. \n\nTo start, let's take the two data matrices (called `endpoints` and `absorp`) and bind them together in a data frame:\n\n```{r}\n#| label: \"data\"\nlibrary(modeldata)\ndata(meats)\n```\n\nThe three _outcomes_ have fairly high correlations also. \n\n## Preprocessing the data\n\nIf the outcomes can be predicted using a linear model, partial least squares (PLS) is an ideal method. PLS models the data as a function of a set of unobserved _latent_ variables that are derived in a manner similar to principal component analysis (PCA). \n\nPLS, unlike PCA, also incorporates the outcome data when creating the PLS components. Like PCA, it tries to maximize the variance of the predictors that are explained by the components but it also tries to simultaneously maximize the correlation between those components and the outcomes. In this way, PLS _chases_ variation of the predictors and outcomes. \n\nSince we are working with variances and covariances, we need to standardize the data. The recipe will center and scale all of the variables. \n\nMany base R functions that deal with multivariate outcomes using a formula require the use of `cbind()` on the left-hand side of the formula to work with the traditional formula methods. In tidymodels, recipes do not; the outcomes can be symbolically \"added\" together on the left-hand side:\n\n```{r}\n#| label: \"recipe\"\nnorm_rec <- \n recipe(water + fat + protein ~ ., data = meats) %>%\n step_normalize(everything()) \n```\n\nBefore we can finalize the PLS model, the number of PLS components to retain must be determined. This can be done using performance metrics such as the root mean squared error. However, we can also calculate the proportion of variance explained by the components for the _predictors and each of the outcomes_. This allows an informed choice to be made based on the level of evidence that the situation requires. \n\nSince the data set isn't large, let's use resampling to measure these proportions. With ten repeats of 10-fold cross-validation, we build the PLS model on 90% of the data and evaluate on the heldout 10%. For each of the 100 models, we extract and save the proportions. \n\nThe folds can be created using the [rsample](https://rsample.tidymodels.org/) package and the recipe can be estimated for each resample using the [`prepper()`](https://rsample.tidymodels.org/reference/prepper.html) function: \n\n```{r}\n#| label: \"cv\"\nset.seed(57343)\nfolds <- vfold_cv(meats, repeats = 10)\n\nfolds <- \n folds %>%\n mutate(recipes = map(splits, prepper, recipe = norm_rec))\n```\n\n## Partial least squares\n\nThe complicated parts for moving forward are:\n\n1. Formatting the predictors and outcomes into the format that the pls package requires, and\n2. Estimating the proportions. \n\nFor the first part, the standardized outcomes and predictors need to be formatted into two separate matrices. Since we used `retain = TRUE` when prepping the recipes, we can `bake()` with `new_data = NULl` to get the processed data back out. To save the data as a matrix, the option `composition = \"matrix\"` will avoid saving the data as tibbles and use the required format. \n\nThe pls package expects a simple formula to specify the model, but each side of the formula should _represent a matrix_. In other words, we need a data set with two columns where each column is a matrix. The secret to doing this is to \"protect\" the two matrices using `I()` when adding them to the data frame.\n\nThe calculation for the proportion of variance explained is straightforward for the predictors; the function `pls::explvar()` will compute that. For the outcomes, the process is more complicated. A ready-made function to compute these is not obvious but there is some code inside of the summary function to do the computation (see below). \n\nThe function `get_var_explained()` shown here will do all these computations and return a data frame with columns `components`, `source` (for the predictors, water, etc), and the `proportion` of variance that is explained by the components. \n\n\n```{r}\n#| label: \"var-explained\"\nlibrary(pls)\n\nget_var_explained <- function(recipe, ...) {\n \n # Extract the predictors and outcomes into their own matrices\n y_mat <- bake(recipe, new_data = NULL, composition = \"matrix\", all_outcomes())\n x_mat <- bake(recipe, new_data = NULL, composition = \"matrix\", all_predictors())\n \n # The pls package prefers the data in a data frame where the outcome\n # and predictors are in _matrices_. To make sure this is formatted\n # properly, use the `I()` function to inhibit `data.frame()` from making\n # all the individual columns. `pls_format` should have two columns.\n pls_format <- data.frame(\n endpoints = I(y_mat),\n measurements = I(x_mat)\n )\n # Fit the model\n mod <- plsr(endpoints ~ measurements, data = pls_format)\n \n # Get the proportion of the predictor variance that is explained\n # by the model for different number of components. \n xve <- explvar(mod)/100 \n\n # To do the same for the outcome, it is more complex. This code \n # was extracted from pls:::summary.mvr. \n explained <- \n drop(pls::R2(mod, estimate = \"train\", intercept = FALSE)$val) %>% \n # transpose so that components are in rows\n t() %>% \n as_tibble() %>%\n # Add the predictor proportions\n mutate(predictors = cumsum(xve) %>% as.vector(),\n components = seq_along(xve)) %>%\n # Put into a tidy format that is tall\n pivot_longer(\n cols = c(-components),\n names_to = \"source\",\n values_to = \"proportion\"\n )\n}\n```\n\nWe compute this data frame for each resample and save the results in the different columns. \n\n```{r}\n#| label: \"get-estimates\"\nfolds <- \n folds %>%\n mutate(var = map(recipes, get_var_explained),\n var = unname(var))\n```\n\nTo extract and aggregate these data, simple row binding can be used to stack the data vertically. Most of the action happens in the first 15 components so let's filter the data and compute the _average_ proportion.\n\n```{r}\n#| label: \"collapse-and-average\"\nvariance_data <- \n bind_rows(folds[[\"var\"]]) %>%\n filter(components <= 15) %>%\n group_by(components, source) %>%\n summarize(proportion = mean(proportion))\n```\n\nThe plot below shows that, if the protein measurement is important, you might require 10 or so components to achieve a good representation of that outcome. Note that the predictor variance is captured extremely well using a single component. This is due to the high degree of correlation in those data. \n\n```{r}\n#| label: \"plot\"\n#| fig-width: 6\n#| fig-height: 4.25\n#| out-width: \"100%\"\nggplot(variance_data, aes(x = components, y = proportion, col = source)) + \n geom_line(alpha = 0.5, size = 1.2) + \n geom_point() \n```\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Multivariate analysis using partial least squares","categories":["pre-processing","multivariate analysis","partial least squares"],"type":"learn-subsection","weight":6,"description":"Build and fit a predictive model with more than one outcome.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/models/sub-sampling/index.qmd.json b/.quarto/idx/learn/models/sub-sampling/index.qmd.json new file mode 100644 index 00000000..66411c4e --- /dev/null +++ b/.quarto/idx/learn/models/sub-sampling/index.qmd.json @@ -0,0 +1 @@ +{"title":"Subsampling for class imbalances","markdown":{"yaml":{"title":"Subsampling for class imbalances","categories":["model fitting","pre-processing","class imbalances","discriminant analysis"],"type":"learn-subsection","weight":3,"description":"Improve model performance in imbalanced data sets through undersampling or oversampling.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(readr)\nlibrary(klaR)\nlibrary(tidymodels)\nlibrary(discrim)\nlibrary(themis)\nlibrary(ROSE)\n\npkgs <- c(\"tidymodels\", \"klaR\", \"themis\", \"discrim\", \"readr\", \"ROSE\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\n`r article_req_pkgs(pkgs)`\n\nSubsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance. \n\nThis article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/measuring-performance.html) for more information on these metrics. \n\n## Simulated data\n\nConsider a two-class problem where the first class has a very low rate of occurrence. The data were simulated and can be imported into R using the code below:\n\n```{r}\n#| label: \"load-data\"\n#| message: false\nimbal_data <- \n readr::read_csv(\"https://bit.ly/imbal_data\") %>% \n mutate(Class = factor(Class))\ndim(imbal_data)\ntable(imbal_data$Class)\n```\n\nIf \"Class1\" is the event of interest, it is very likely that a classification model would be able to achieve very good _specificity_ since almost all of the data are of the second class. _Sensitivity_, however, would likely be poor since the models will optimize accuracy (or other loss functions) by predicting everything to be the majority class. \n\nOne result of class imbalance when there are two classes is that the default probability cutoff of 50% is inappropriate; a different cutoff that is more extreme might be able to achieve good performance. \n\n## Subsampling the data\n\nOne way to alleviate this issue is to _subsample_ the data. There are a number of ways to do this but the most simple one is to _sample down_ (undersample) the majority class data until it occurs with the same frequency as the minority class. While it may seem counterintuitive, throwing out a large percentage of your data can be effective at producing a useful model that can recognize both the majority and minority classes. In some cases, this even means that the overall performance of the model is better (e.g. improved area under the ROC curve). However, subsampling almost always produces models that are _better calibrated_, meaning that the distributions of the class probabilities are more well behaved. As a result, the default 50% cutoff is much more likely to produce better sensitivity and specificity values than they would otherwise. \n\nLet's explore subsampling using `themis::step_rose()` in a recipe for the simulated data. It uses the ROSE (random over sampling examples) method from [Menardi, G. and Torelli, N. (2014)](https://scholar.google.com/scholar?hl=en&q=%22training+and+assessing+classification+rules+with+imbalanced+data%22). This is an example of an oversampling strategy, rather than undersampling.\n\nIn terms of workflow:\n\n * It is extremely important that subsampling occurs _inside of resampling_. Otherwise, the resampling process can produce [poor estimates of model performance](https://topepo.github.io/caret/subsampling-for-class-imbalances.html#resampling). \n * The subsampling process should only be applied to the analysis set. The assessment set should reflect the event rates seen \"in the wild\" and, for this reason, the `skip` argument to `step_downsample()` and other subsampling recipes steps has a default of `TRUE`. \n\nHere is a simple recipe implementing oversampling: \n\n```{r}\n#| label: \"rec\"\nlibrary(tidymodels)\nlibrary(themis)\nimbal_rec <- \n recipe(Class ~ ., data = imbal_data) %>%\n step_rose(Class)\n```\n\nFor a model, let's use a [quadratic discriminant analysis](https://en.wikipedia.org/wiki/Quadratic_classifier#Quadratic_discriminant_analysis) (QDA) model. From the discrim package, this model can be specified using:\n\n```{r}\n#| label: \"qda\"\nlibrary(discrim)\nqda_mod <- \n discrim_regularized(frac_common_cov = 0, frac_identity = 0) %>% \n set_engine(\"klaR\")\n```\n\nTo keep these objects bound together, they can be combined in a [workflow](https://workflows.tidymodels.org/):\n\n```{r}\n#| label: \"wflw\"\nqda_rose_wflw <- \n workflow() %>% \n add_model(qda_mod) %>% \n add_recipe(imbal_rec)\nqda_rose_wflw\n```\n\n## Model performance\n\nStratified, repeated 10-fold cross-validation is used to resample the model:\n\n```{r}\n#| label: \"cv\"\nset.seed(5732)\ncv_folds <- vfold_cv(imbal_data, strata = \"Class\", repeats = 5)\n```\n\nTo measure model performance, let's use two metrics:\n\n * The area under the [ROC curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) is an overall assessment of performance across _all_ cutoffs. Values near one indicate very good results while values near 0.5 would imply that the model is very poor. \n * The _J_ index (a.k.a. [Youden's _J_](https://en.wikipedia.org/wiki/Youden%27s_J_statistic) statistic) is `sensitivity + specificity - 1`. Values near one are once again best. \n\nIf a model is poorly calibrated, the ROC curve value might not show diminished performance. However, the _J_ index would be lower for models with pathological distributions for the class probabilities. The yardstick package will be used to compute these metrics. \n\n```{r}\n#| label: \"metrics\"\ncls_metrics <- metric_set(roc_auc, j_index)\n```\n\nNow, we train the models and generate the results using `tune::fit_resamples()`:\n\n```{r}\n#| label: \"resample-rose\"\n#| message: false\nset.seed(2180)\nqda_rose_res <- fit_resamples(\n qda_rose_wflw, \n resamples = cv_folds, \n metrics = cls_metrics\n)\n\ncollect_metrics(qda_rose_res)\n```\n\nWhat do the results look like without using ROSE? We can create another workflow and fit the QDA model along the same resamples:\n\n```{r}\n#| label: \"qda-only\"\nqda_wflw <- \n workflow() %>% \n add_model(qda_mod) %>% \n add_formula(Class ~ .)\n\nset.seed(2180)\nqda_only_res <- fit_resamples(qda_wflw, resamples = cv_folds, metrics = cls_metrics)\ncollect_metrics(qda_only_res)\n```\n\nIt looks like ROSE helped a lot, especially with the J-index. Class imbalance sampling methods tend to greatly improve metrics based on the hard class predictions (i.e., the categorical predictions) because the default cutoff tends to be a better balance of sensitivity and specificity. \n\nLet's plot the metrics for each resample to see how the individual results changed. \n\n```{r}\n#| label: \"merge-metrics\"\nno_sampling <- \n qda_only_res %>% \n collect_metrics(summarize = FALSE) %>% \n dplyr::select(-.estimator) %>% \n mutate(sampling = \"no_sampling\")\n\nwith_sampling <- \n qda_rose_res %>% \n collect_metrics(summarize = FALSE) %>% \n dplyr::select(-.estimator) %>% \n mutate(sampling = \"rose\")\n\nbind_rows(no_sampling, with_sampling) %>% \n mutate(label = paste(id2, id)) %>% \n ggplot(aes(x = sampling, y = .estimate, group = label)) + \n geom_line(alpha = .4) + \n facet_wrap(~ .metric, scales = \"free_y\")\n```\n\nThis visually demonstrates that the subsampling mostly affects metrics that use the hard class predictions. \n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(readr)\nlibrary(klaR)\nlibrary(tidymodels)\nlibrary(discrim)\nlibrary(themis)\nlibrary(ROSE)\n\npkgs <- c(\"tidymodels\", \"klaR\", \"themis\", \"discrim\", \"readr\", \"ROSE\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nSubsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance. \n\nThis article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/measuring-performance.html) for more information on these metrics. \n\n## Simulated data\n\nConsider a two-class problem where the first class has a very low rate of occurrence. The data were simulated and can be imported into R using the code below:\n\n```{r}\n#| label: \"load-data\"\n#| message: false\nimbal_data <- \n readr::read_csv(\"https://bit.ly/imbal_data\") %>% \n mutate(Class = factor(Class))\ndim(imbal_data)\ntable(imbal_data$Class)\n```\n\nIf \"Class1\" is the event of interest, it is very likely that a classification model would be able to achieve very good _specificity_ since almost all of the data are of the second class. _Sensitivity_, however, would likely be poor since the models will optimize accuracy (or other loss functions) by predicting everything to be the majority class. \n\nOne result of class imbalance when there are two classes is that the default probability cutoff of 50% is inappropriate; a different cutoff that is more extreme might be able to achieve good performance. \n\n## Subsampling the data\n\nOne way to alleviate this issue is to _subsample_ the data. There are a number of ways to do this but the most simple one is to _sample down_ (undersample) the majority class data until it occurs with the same frequency as the minority class. While it may seem counterintuitive, throwing out a large percentage of your data can be effective at producing a useful model that can recognize both the majority and minority classes. In some cases, this even means that the overall performance of the model is better (e.g. improved area under the ROC curve). However, subsampling almost always produces models that are _better calibrated_, meaning that the distributions of the class probabilities are more well behaved. As a result, the default 50% cutoff is much more likely to produce better sensitivity and specificity values than they would otherwise. \n\nLet's explore subsampling using `themis::step_rose()` in a recipe for the simulated data. It uses the ROSE (random over sampling examples) method from [Menardi, G. and Torelli, N. (2014)](https://scholar.google.com/scholar?hl=en&q=%22training+and+assessing+classification+rules+with+imbalanced+data%22). This is an example of an oversampling strategy, rather than undersampling.\n\nIn terms of workflow:\n\n * It is extremely important that subsampling occurs _inside of resampling_. Otherwise, the resampling process can produce [poor estimates of model performance](https://topepo.github.io/caret/subsampling-for-class-imbalances.html#resampling). \n * The subsampling process should only be applied to the analysis set. The assessment set should reflect the event rates seen \"in the wild\" and, for this reason, the `skip` argument to `step_downsample()` and other subsampling recipes steps has a default of `TRUE`. \n\nHere is a simple recipe implementing oversampling: \n\n```{r}\n#| label: \"rec\"\nlibrary(tidymodels)\nlibrary(themis)\nimbal_rec <- \n recipe(Class ~ ., data = imbal_data) %>%\n step_rose(Class)\n```\n\nFor a model, let's use a [quadratic discriminant analysis](https://en.wikipedia.org/wiki/Quadratic_classifier#Quadratic_discriminant_analysis) (QDA) model. From the discrim package, this model can be specified using:\n\n```{r}\n#| label: \"qda\"\nlibrary(discrim)\nqda_mod <- \n discrim_regularized(frac_common_cov = 0, frac_identity = 0) %>% \n set_engine(\"klaR\")\n```\n\nTo keep these objects bound together, they can be combined in a [workflow](https://workflows.tidymodels.org/):\n\n```{r}\n#| label: \"wflw\"\nqda_rose_wflw <- \n workflow() %>% \n add_model(qda_mod) %>% \n add_recipe(imbal_rec)\nqda_rose_wflw\n```\n\n## Model performance\n\nStratified, repeated 10-fold cross-validation is used to resample the model:\n\n```{r}\n#| label: \"cv\"\nset.seed(5732)\ncv_folds <- vfold_cv(imbal_data, strata = \"Class\", repeats = 5)\n```\n\nTo measure model performance, let's use two metrics:\n\n * The area under the [ROC curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) is an overall assessment of performance across _all_ cutoffs. Values near one indicate very good results while values near 0.5 would imply that the model is very poor. \n * The _J_ index (a.k.a. [Youden's _J_](https://en.wikipedia.org/wiki/Youden%27s_J_statistic) statistic) is `sensitivity + specificity - 1`. Values near one are once again best. \n\nIf a model is poorly calibrated, the ROC curve value might not show diminished performance. However, the _J_ index would be lower for models with pathological distributions for the class probabilities. The yardstick package will be used to compute these metrics. \n\n```{r}\n#| label: \"metrics\"\ncls_metrics <- metric_set(roc_auc, j_index)\n```\n\nNow, we train the models and generate the results using `tune::fit_resamples()`:\n\n```{r}\n#| label: \"resample-rose\"\n#| message: false\nset.seed(2180)\nqda_rose_res <- fit_resamples(\n qda_rose_wflw, \n resamples = cv_folds, \n metrics = cls_metrics\n)\n\ncollect_metrics(qda_rose_res)\n```\n\nWhat do the results look like without using ROSE? We can create another workflow and fit the QDA model along the same resamples:\n\n```{r}\n#| label: \"qda-only\"\nqda_wflw <- \n workflow() %>% \n add_model(qda_mod) %>% \n add_formula(Class ~ .)\n\nset.seed(2180)\nqda_only_res <- fit_resamples(qda_wflw, resamples = cv_folds, metrics = cls_metrics)\ncollect_metrics(qda_only_res)\n```\n\nIt looks like ROSE helped a lot, especially with the J-index. Class imbalance sampling methods tend to greatly improve metrics based on the hard class predictions (i.e., the categorical predictions) because the default cutoff tends to be a better balance of sensitivity and specificity. \n\nLet's plot the metrics for each resample to see how the individual results changed. \n\n```{r}\n#| label: \"merge-metrics\"\nno_sampling <- \n qda_only_res %>% \n collect_metrics(summarize = FALSE) %>% \n dplyr::select(-.estimator) %>% \n mutate(sampling = \"no_sampling\")\n\nwith_sampling <- \n qda_rose_res %>% \n collect_metrics(summarize = FALSE) %>% \n dplyr::select(-.estimator) %>% \n mutate(sampling = \"rose\")\n\nbind_rows(no_sampling, with_sampling) %>% \n mutate(label = paste(id2, id)) %>% \n ggplot(aes(x = sampling, y = .estimate, group = label)) + \n geom_line(alpha = .4) + \n facet_wrap(~ .metric, scales = \"free_y\")\n```\n\nThis visually demonstrates that the subsampling mostly affects metrics that use the hard class predictions. \n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Subsampling for class imbalances","categories":["model fitting","pre-processing","class imbalances","discriminant analysis"],"type":"learn-subsection","weight":3,"description":"Improve model performance in imbalanced data sets through undersampling or oversampling.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/models/time-series/index.qmd.json b/.quarto/idx/learn/models/time-series/index.qmd.json new file mode 100644 index 00000000..67c80c72 --- /dev/null +++ b/.quarto/idx/learn/models/time-series/index.qmd.json @@ -0,0 +1 @@ +{"title":"Modeling time series with tidy resampling","markdown":{"yaml":{"title":"Modeling time series with tidy resampling","categories":["model fitting","time series"],"type":"learn-subsection","weight":4,"description":"Calculate performance estimates for time series forecasts using resampling.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(timetk)\nlibrary(forecast)\nlibrary(tidymodels)\nlibrary(sweep)\nlibrary(zoo)\npkgs <- c(\"tidymodels\", \"timetk\", \"forecast\", \"sweep\", \"zoo\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\n`r article_req_pkgs(pkgs)`\n\n\"[Demo Week: Tidy Forecasting with sweep](https://www.business-science.io/code-tools/2017/10/25/demo_week_sweep.html)\" is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using [rolling forecast origin resampling](https://robjhyndman.com/hyndsight/crossvalidation/). \n\n## Example data\n\nThe data for this article are sales of alcoholic beverages originally from [the Federal Reserve Bank of St. Louis website](https://fred.stlouisfed.org/series/S4248SM144NCEN).\n\n```{r}\n#| label: \"read-data\"\nlibrary(tidymodels)\nlibrary(modeldata)\ndata(\"drinks\")\nglimpse(drinks)\n```\n\nEach row represents one month of sales (in millions of US dollars). \n\n## Time series resampling\n\nSuppose that we need predictions for one year ahead and our model should use the most recent data from the last 20 years. To set up this resampling scheme:\n\n```{r}\n#| label: \"rof\"\nroll_rs <- rolling_origin(\n drinks, \n initial = 12 * 20, \n assess = 12,\n cumulative = FALSE\n )\n\nnrow(roll_rs)\n\nroll_rs\n```\n\nEach `split` element contains the information about that resample:\n\n```{r}\n#| label: \"split\"\nroll_rs$splits[[1]]\n```\n\nFor plotting, let's index each split by the first day of the assessment set:\n\n```{r}\n#| label: \"labels\"\nget_date <- function(x) {\n min(assessment(x)$date)\n}\n\nstart_date <- map(roll_rs$splits, get_date)\nroll_rs$start_date <- do.call(\"c\", start_date)\nhead(roll_rs$start_date)\n```\n\nThis resampling scheme has `r nrow(roll_rs)` splits of the data so that there will be `r nrow(roll_rs)` ARIMA models that are fit. To create the models, we use the `auto.arima()` function from the forecast package. The rsample functions `analysis()` and `assessment()` return a data frame, so another step converts the data to a `ts` object called `mod_dat` using a function in the timetk package.\n\n```{r}\n#| label: \"model-fun\"\nlibrary(forecast) # for `auto.arima`\nlibrary(timetk) # for `tk_ts`\nlibrary(zoo) # for `as.yearmon`\n\nfit_model <- function(x, ...) {\n # suggested by Matt Dancho:\n x %>%\n analysis() %>%\n # Since the first day changes over resamples, adjust it\n # based on the first date value in the data frame \n tk_ts(start = .$date[[1]] %>% as.yearmon(), \n frequency = 12, \n silent = TRUE) %>%\n auto.arima(...)\n}\n```\n\nSave each model in a new column:\n\n```{r}\n#| label: \"model-fit\"\n#| warning: false\n#| message: false\nroll_rs$arima <- map(roll_rs$splits, fit_model)\n\n# For example:\nroll_rs$arima[[1]]\n```\n\n(There are some warnings produced by these regarding extra columns in the data that can be ignored.)\n\n## Model performance\n\nUsing the model fits, let's measure performance in two ways:\n\n * _Interpolation_ error will measure how well the model fits to the data that were used to create the model. This is most likely optimistic since no holdout method is used. \n * _Extrapolation_ or _forecast_ error evaluates the performance of the model on the data from the following year (that were not used in the model fit).\n \nIn each case, the mean absolute percent error (MAPE) is the statistic used to characterize the model fits. The interpolation error can be computed from the `Arima` object. To make things easy, let's use the sweep package's `sw_glance()` function:\n\n```{r}\n#| label: \"interp\"\nlibrary(sweep)\n\nroll_rs$interpolation <- map_dbl(\n roll_rs$arima,\n function(x) \n sw_glance(x)[[\"MAPE\"]]\n )\n\nsummary(roll_rs$interpolation)\n```\n\nFor the extrapolation error, the model and split objects are required. Using these:\n\n```{r}\n#| label: \"extrap\"\nget_extrap <- function(split, mod) {\n n <- nrow(assessment(split))\n # Get assessment data\n pred_dat <- assessment(split) %>%\n mutate(\n pred = as.vector(forecast(mod, h = n)$mean),\n pct_error = ( S4248SM144NCEN - pred ) / S4248SM144NCEN * 100\n )\n mean(abs(pred_dat$pct_error))\n}\n\nroll_rs$extrapolation <- \n map2_dbl(roll_rs$splits, roll_rs$arima, get_extrap)\n\nsummary(roll_rs$extrapolation)\n```\n\nWhat do these error estimates look like over time?\n\n```{r}\n#| label: \"plot\"\nroll_rs %>%\n select(interpolation, extrapolation, start_date) %>%\n pivot_longer(cols = matches(\"ation\"), names_to = \"error\", values_to = \"MAPE\") %>%\n ggplot(aes(x = start_date, y = MAPE, col = error)) + \n geom_point() + \n geom_line()\n```\n\nIt is likely that the interpolation error is an underestimate to some degree, as mentioned above. \n\nIt is also worth noting that `rolling_origin()` can be used over calendar periods, rather than just over a fixed window size. This is especially useful for irregular series where a fixed window size might not make sense because of missing data points, or because of calendar features like different months having a different number of days.\n\nThe example below demonstrates this idea by splitting `drinks` into a nested set of 26 years, and rolling over years rather than months. Note that the end result accomplishes a different task than the original example; in this new case, each slice moves forward an entire year, rather than just one month.\n\n```{r}\n#| label: \"rof-annual\"\n# The idea is to nest by the period to roll over,\n# which in this case is the year.\nroll_rs_annual <- drinks %>%\n mutate(year = as.POSIXlt(date)$year + 1900) %>%\n nest(data = c(date, S4248SM144NCEN)) %>%\n rolling_origin(\n initial = 20, \n assess = 1, \n cumulative = FALSE\n )\n\nanalysis(roll_rs_annual$splits[[1]])\n```\n\nThe workflow to access these calendar slices is to use `bind_rows()` to join\neach analysis set together.\n\n```{r}\nmutate(\n roll_rs_annual,\n extracted_slice = map(splits, ~ bind_rows(analysis(.x)$data))\n)\n```\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(timetk)\nlibrary(forecast)\nlibrary(tidymodels)\nlibrary(sweep)\nlibrary(zoo)\npkgs <- c(\"tidymodels\", \"timetk\", \"forecast\", \"sweep\", \"zoo\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\n\"[Demo Week: Tidy Forecasting with sweep](https://www.business-science.io/code-tools/2017/10/25/demo_week_sweep.html)\" is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using [rolling forecast origin resampling](https://robjhyndman.com/hyndsight/crossvalidation/). \n\n## Example data\n\nThe data for this article are sales of alcoholic beverages originally from [the Federal Reserve Bank of St. Louis website](https://fred.stlouisfed.org/series/S4248SM144NCEN).\n\n```{r}\n#| label: \"read-data\"\nlibrary(tidymodels)\nlibrary(modeldata)\ndata(\"drinks\")\nglimpse(drinks)\n```\n\nEach row represents one month of sales (in millions of US dollars). \n\n## Time series resampling\n\nSuppose that we need predictions for one year ahead and our model should use the most recent data from the last 20 years. To set up this resampling scheme:\n\n```{r}\n#| label: \"rof\"\nroll_rs <- rolling_origin(\n drinks, \n initial = 12 * 20, \n assess = 12,\n cumulative = FALSE\n )\n\nnrow(roll_rs)\n\nroll_rs\n```\n\nEach `split` element contains the information about that resample:\n\n```{r}\n#| label: \"split\"\nroll_rs$splits[[1]]\n```\n\nFor plotting, let's index each split by the first day of the assessment set:\n\n```{r}\n#| label: \"labels\"\nget_date <- function(x) {\n min(assessment(x)$date)\n}\n\nstart_date <- map(roll_rs$splits, get_date)\nroll_rs$start_date <- do.call(\"c\", start_date)\nhead(roll_rs$start_date)\n```\n\nThis resampling scheme has `r nrow(roll_rs)` splits of the data so that there will be `r nrow(roll_rs)` ARIMA models that are fit. To create the models, we use the `auto.arima()` function from the forecast package. The rsample functions `analysis()` and `assessment()` return a data frame, so another step converts the data to a `ts` object called `mod_dat` using a function in the timetk package.\n\n```{r}\n#| label: \"model-fun\"\nlibrary(forecast) # for `auto.arima`\nlibrary(timetk) # for `tk_ts`\nlibrary(zoo) # for `as.yearmon`\n\nfit_model <- function(x, ...) {\n # suggested by Matt Dancho:\n x %>%\n analysis() %>%\n # Since the first day changes over resamples, adjust it\n # based on the first date value in the data frame \n tk_ts(start = .$date[[1]] %>% as.yearmon(), \n frequency = 12, \n silent = TRUE) %>%\n auto.arima(...)\n}\n```\n\nSave each model in a new column:\n\n```{r}\n#| label: \"model-fit\"\n#| warning: false\n#| message: false\nroll_rs$arima <- map(roll_rs$splits, fit_model)\n\n# For example:\nroll_rs$arima[[1]]\n```\n\n(There are some warnings produced by these regarding extra columns in the data that can be ignored.)\n\n## Model performance\n\nUsing the model fits, let's measure performance in two ways:\n\n * _Interpolation_ error will measure how well the model fits to the data that were used to create the model. This is most likely optimistic since no holdout method is used. \n * _Extrapolation_ or _forecast_ error evaluates the performance of the model on the data from the following year (that were not used in the model fit).\n \nIn each case, the mean absolute percent error (MAPE) is the statistic used to characterize the model fits. The interpolation error can be computed from the `Arima` object. To make things easy, let's use the sweep package's `sw_glance()` function:\n\n```{r}\n#| label: \"interp\"\nlibrary(sweep)\n\nroll_rs$interpolation <- map_dbl(\n roll_rs$arima,\n function(x) \n sw_glance(x)[[\"MAPE\"]]\n )\n\nsummary(roll_rs$interpolation)\n```\n\nFor the extrapolation error, the model and split objects are required. Using these:\n\n```{r}\n#| label: \"extrap\"\nget_extrap <- function(split, mod) {\n n <- nrow(assessment(split))\n # Get assessment data\n pred_dat <- assessment(split) %>%\n mutate(\n pred = as.vector(forecast(mod, h = n)$mean),\n pct_error = ( S4248SM144NCEN - pred ) / S4248SM144NCEN * 100\n )\n mean(abs(pred_dat$pct_error))\n}\n\nroll_rs$extrapolation <- \n map2_dbl(roll_rs$splits, roll_rs$arima, get_extrap)\n\nsummary(roll_rs$extrapolation)\n```\n\nWhat do these error estimates look like over time?\n\n```{r}\n#| label: \"plot\"\nroll_rs %>%\n select(interpolation, extrapolation, start_date) %>%\n pivot_longer(cols = matches(\"ation\"), names_to = \"error\", values_to = \"MAPE\") %>%\n ggplot(aes(x = start_date, y = MAPE, col = error)) + \n geom_point() + \n geom_line()\n```\n\nIt is likely that the interpolation error is an underestimate to some degree, as mentioned above. \n\nIt is also worth noting that `rolling_origin()` can be used over calendar periods, rather than just over a fixed window size. This is especially useful for irregular series where a fixed window size might not make sense because of missing data points, or because of calendar features like different months having a different number of days.\n\nThe example below demonstrates this idea by splitting `drinks` into a nested set of 26 years, and rolling over years rather than months. Note that the end result accomplishes a different task than the original example; in this new case, each slice moves forward an entire year, rather than just one month.\n\n```{r}\n#| label: \"rof-annual\"\n# The idea is to nest by the period to roll over,\n# which in this case is the year.\nroll_rs_annual <- drinks %>%\n mutate(year = as.POSIXlt(date)$year + 1900) %>%\n nest(data = c(date, S4248SM144NCEN)) %>%\n rolling_origin(\n initial = 20, \n assess = 1, \n cumulative = FALSE\n )\n\nanalysis(roll_rs_annual$splits[[1]])\n```\n\nThe workflow to access these calendar slices is to use `bind_rows()` to join\neach analysis set together.\n\n```{r}\nmutate(\n roll_rs_annual,\n extracted_slice = map(splits, ~ bind_rows(analysis(.x)$data))\n)\n```\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Modeling time series with tidy resampling","categories":["model fitting","time series"],"type":"learn-subsection","weight":4,"description":"Calculate performance estimates for time series forecasts using resampling.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/statistics/bootstrap/index.qmd.json b/.quarto/idx/learn/statistics/bootstrap/index.qmd.json new file mode 100644 index 00000000..652480b8 --- /dev/null +++ b/.quarto/idx/learn/statistics/bootstrap/index.qmd.json @@ -0,0 +1 @@ +{"title":"Bootstrap resampling and tidy regression models","markdown":{"yaml":{"title":"Bootstrap resampling and tidy regression models","categories":["statistical analysis","bootstraping","tidying results","confidence intervals"],"type":"learn-subsection","weight":3,"description":"Apply bootstrap resampling to estimate uncertainty in model parameters.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\nThis article only requires the tidymodels package.\n\nCombining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by [Andrew MacDonald here](https://rstudio-pubs-static.s3.amazonaws.com/19698_a4c472606e3c43e4b94720506e49bb7b.html), and [Hadley has explored efficient support for bootstrapping](https://github.com/hadley/dplyr/issues/269) as a potential enhancement to dplyr. The tidymodels package [broom](https://broom.tidyverse.org/) fits naturally with [dplyr](https://dplyr.tidyverse.org/) in performing these analyses.\n\nBootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate.\n\nLet's say we want to fit a nonlinear model to the weight/mileage relationship in the `mtcars` data set.\n\n```{r}\nlibrary(tidymodels)\n\nggplot(mtcars, aes(mpg, wt)) + \n geom_point()\n```\n\nWe might use the method of nonlinear least squares (via the `nls()` function) to fit a model.\n\n```{r}\nnlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0))\nsummary(nlsfit)\n\nggplot(mtcars, aes(wt, mpg)) +\n geom_point() +\n geom_line(aes(y = predict(nlsfit)))\n```\n\nWhile this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data.\n\n## Bootstrapping models\n\nWe can use the `bootstraps()` function in the rsample package to sample bootstrap replications. First, we construct 2000 bootstrap replicates of the data, each of which has been randomly sampled with replacement. The resulting object is an `rset`, which is a data frame with a column of `rsplit` objects.\n\nAn `rsplit` object has two main components: an analysis data set and an assessment data set, accessible via `analysis(rsplit)` and `assessment(rsplit)` respectively. For bootstrap samples, the analysis data set is the bootstrap sample itself, and the assessment data set consists of all the out-of-bag samples.\n\n```{r}\nset.seed(27)\nboots <- bootstraps(mtcars, times = 2000, apparent = TRUE)\nboots\n```\n\nLet's create a helper function to fit an `nls()` model on each bootstrap sample, and then use `purrr::map()` to apply this function to all the bootstrap samples at once. Similarly, we create a column of tidy coefficient information by unnesting.\n\n```{r}\nfit_nls_on_bootstrap <- function(split) {\n nls(mpg ~ k / wt + b, analysis(split), start = list(k = 1, b = 0))\n}\n\nboot_models <-\n boots %>% \n mutate(model = map(splits, fit_nls_on_bootstrap),\n coef_info = map(model, tidy))\n\nboot_coefs <- \n boot_models %>% \n unnest(coef_info)\n```\n\nThe unnested coefficient information contains a summary of each replication combined in a single data frame:\n\n```{r}\nboot_coefs\n```\n\n## Confidence intervals\n\nWe can then calculate confidence intervals (using what is called the [percentile method](https://www.uvm.edu/~dhowell/StatPages/Randomization%20Tests/ResamplingWithR/BootstMeans/bootstrapping_means.html)):\n\n```{r}\n#| label: \"percentiles\"\npercentile_intervals <- int_pctl(boot_models, coef_info)\npercentile_intervals\n```\n\nOr we can use histograms to get a more detailed idea of the uncertainty in each estimate:\n\n```{r}\nggplot(boot_coefs, aes(estimate)) +\n geom_histogram(bins = 30) +\n facet_wrap( ~ term, scales = \"free\") +\n geom_vline(aes(xintercept = .lower), data = percentile_intervals, col = \"blue\") +\n geom_vline(aes(xintercept = .upper), data = percentile_intervals, col = \"blue\")\n```\n\nThe rsample package also has functions for [other types of confidence intervals](https://rsample.tidymodels.org/reference/int_pctl.html). \n\n## Possible model fits\n\nWe can use `augment()` to visualize the uncertainty in the fitted curve. Since there are so many bootstrap samples, we'll only show a sample of the model fits in our visualization:\n\n```{r}\nboot_aug <- \n boot_models %>% \n sample_n(200) %>% \n mutate(augmented = map(model, augment)) %>% \n unnest(augmented)\n\nboot_aug\n```\n\n```{r}\nggplot(boot_aug, aes(wt, mpg)) +\n geom_line(aes(y = .fitted, group = id), alpha = .2, col = \"blue\") +\n geom_point()\n```\n\nWith only a few small changes, we could easily perform bootstrapping with other kinds of predictive or hypothesis testing models, since the `tidy()` and `augment()` functions works for many statistical outputs. As another example, we could use `smooth.spline()`, which fits a cubic smoothing spline to data:\n\n```{r}\nfit_spline_on_bootstrap <- function(split) {\n data <- analysis(split)\n smooth.spline(data$wt, data$mpg, df = 4)\n}\n\nboot_splines <- \n boots %>% \n sample_n(200) %>% \n mutate(spline = map(splits, fit_spline_on_bootstrap),\n aug_train = map(spline, augment))\n\nsplines_aug <- \n boot_splines %>% \n unnest(aug_train)\n\nggplot(splines_aug, aes(x, y)) +\n geom_line(aes(y = .fitted, group = id), alpha = 0.2, col = \"blue\") +\n geom_point()\n```\n\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n## Introduction\n\nThis article only requires the tidymodels package.\n\nCombining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by [Andrew MacDonald here](https://rstudio-pubs-static.s3.amazonaws.com/19698_a4c472606e3c43e4b94720506e49bb7b.html), and [Hadley has explored efficient support for bootstrapping](https://github.com/hadley/dplyr/issues/269) as a potential enhancement to dplyr. The tidymodels package [broom](https://broom.tidyverse.org/) fits naturally with [dplyr](https://dplyr.tidyverse.org/) in performing these analyses.\n\nBootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate.\n\nLet's say we want to fit a nonlinear model to the weight/mileage relationship in the `mtcars` data set.\n\n```{r}\nlibrary(tidymodels)\n\nggplot(mtcars, aes(mpg, wt)) + \n geom_point()\n```\n\nWe might use the method of nonlinear least squares (via the `nls()` function) to fit a model.\n\n```{r}\nnlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0))\nsummary(nlsfit)\n\nggplot(mtcars, aes(wt, mpg)) +\n geom_point() +\n geom_line(aes(y = predict(nlsfit)))\n```\n\nWhile this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data.\n\n## Bootstrapping models\n\nWe can use the `bootstraps()` function in the rsample package to sample bootstrap replications. First, we construct 2000 bootstrap replicates of the data, each of which has been randomly sampled with replacement. The resulting object is an `rset`, which is a data frame with a column of `rsplit` objects.\n\nAn `rsplit` object has two main components: an analysis data set and an assessment data set, accessible via `analysis(rsplit)` and `assessment(rsplit)` respectively. For bootstrap samples, the analysis data set is the bootstrap sample itself, and the assessment data set consists of all the out-of-bag samples.\n\n```{r}\nset.seed(27)\nboots <- bootstraps(mtcars, times = 2000, apparent = TRUE)\nboots\n```\n\nLet's create a helper function to fit an `nls()` model on each bootstrap sample, and then use `purrr::map()` to apply this function to all the bootstrap samples at once. Similarly, we create a column of tidy coefficient information by unnesting.\n\n```{r}\nfit_nls_on_bootstrap <- function(split) {\n nls(mpg ~ k / wt + b, analysis(split), start = list(k = 1, b = 0))\n}\n\nboot_models <-\n boots %>% \n mutate(model = map(splits, fit_nls_on_bootstrap),\n coef_info = map(model, tidy))\n\nboot_coefs <- \n boot_models %>% \n unnest(coef_info)\n```\n\nThe unnested coefficient information contains a summary of each replication combined in a single data frame:\n\n```{r}\nboot_coefs\n```\n\n## Confidence intervals\n\nWe can then calculate confidence intervals (using what is called the [percentile method](https://www.uvm.edu/~dhowell/StatPages/Randomization%20Tests/ResamplingWithR/BootstMeans/bootstrapping_means.html)):\n\n```{r}\n#| label: \"percentiles\"\npercentile_intervals <- int_pctl(boot_models, coef_info)\npercentile_intervals\n```\n\nOr we can use histograms to get a more detailed idea of the uncertainty in each estimate:\n\n```{r}\nggplot(boot_coefs, aes(estimate)) +\n geom_histogram(bins = 30) +\n facet_wrap( ~ term, scales = \"free\") +\n geom_vline(aes(xintercept = .lower), data = percentile_intervals, col = \"blue\") +\n geom_vline(aes(xintercept = .upper), data = percentile_intervals, col = \"blue\")\n```\n\nThe rsample package also has functions for [other types of confidence intervals](https://rsample.tidymodels.org/reference/int_pctl.html). \n\n## Possible model fits\n\nWe can use `augment()` to visualize the uncertainty in the fitted curve. Since there are so many bootstrap samples, we'll only show a sample of the model fits in our visualization:\n\n```{r}\nboot_aug <- \n boot_models %>% \n sample_n(200) %>% \n mutate(augmented = map(model, augment)) %>% \n unnest(augmented)\n\nboot_aug\n```\n\n```{r}\nggplot(boot_aug, aes(wt, mpg)) +\n geom_line(aes(y = .fitted, group = id), alpha = .2, col = \"blue\") +\n geom_point()\n```\n\nWith only a few small changes, we could easily perform bootstrapping with other kinds of predictive or hypothesis testing models, since the `tidy()` and `augment()` functions works for many statistical outputs. As another example, we could use `smooth.spline()`, which fits a cubic smoothing spline to data:\n\n```{r}\nfit_spline_on_bootstrap <- function(split) {\n data <- analysis(split)\n smooth.spline(data$wt, data$mpg, df = 4)\n}\n\nboot_splines <- \n boots %>% \n sample_n(200) %>% \n mutate(spline = map(splits, fit_spline_on_bootstrap),\n aug_train = map(spline, augment))\n\nsplines_aug <- \n boot_splines %>% \n unnest(aug_train)\n\nggplot(splines_aug, aes(x, y)) +\n geom_line(aes(y = .fitted, group = id), alpha = 0.2, col = \"blue\") +\n geom_point()\n```\n\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Bootstrap resampling and tidy regression models","categories":["statistical analysis","bootstraping","tidying results","confidence intervals"],"type":"learn-subsection","weight":3,"description":"Apply bootstrap resampling to estimate uncertainty in model parameters.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/statistics/infer/index.qmd.json b/.quarto/idx/learn/statistics/infer/index.qmd.json new file mode 100644 index 00000000..485541c6 --- /dev/null +++ b/.quarto/idx/learn/statistics/infer/index.qmd.json @@ -0,0 +1 @@ +{"title":"Hypothesis testing using resampling and tidy data","markdown":{"yaml":{"title":"Hypothesis testing using resampling and tidy data","categories":["statistical analysis","hypothesis testing","bootstraping"],"type":"learn-subsection","weight":4,"description":"Perform common hypothesis tests for statistical inference using flexible functions.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(sessioninfo)\npkgs <- c(\"tidymodels\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\nThis article only requires the tidymodels package. \n\nThe tidymodels package [infer](https://infer.tidymodels.org/) implements an expressive grammar to perform statistical inference that coheres with the `tidyverse` design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs.\n\nRegardless of which hypothesis test we're using, we're still asking the same kind of question: \n\n>Is the effect or difference in our observed data real, or due to chance? \n\nTo answer this question, we start by assuming that the observed data came from some world where \"nothing is going on\" (i.e. the observed effect was simply due to random chance), and call this assumption our **null hypothesis**. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the **alternate hypothesis**, which supposes that the effect present in the observed data is actually due to the fact that \"something is going on.\") We then calculate a **test statistic** from our data that describes the observed effect. We can use this test statistic to calculate a **p-value**, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined **significance level** $\\alpha$, then we can reject our null hypothesis.\n\nIf you are new to hypothesis testing, take a look at \n\n* [Section 9.2 of _Statistical Inference via Data Science_](https://moderndive.com/9-hypothesis-testing.html#understanding-ht)\n* The American Statistical Association's recent [statement on p-values](https://doi.org/10.1080/00031305.2016.1154108) \n\nThe workflow of this package is designed around these ideas. Starting from some data set,\n\n+ `specify()` allows you to specify the variable, or relationship between variables, that you're interested in,\n+ `hypothesize()` allows you to declare the null hypothesis,\n+ `generate()` allows you to generate data reflecting the null hypothesis, and\n+ `calculate()` allows you to calculate a distribution of statistics from the generated data to form the null distribution.\n\nThroughout this vignette, we make use of `gss`, a data set available in infer containing a sample of 500 observations of 11 variables from the *General Social Survey*. \n\n```{r}\n#| label: \"load-gss\"\n#| warning: false\n#| message: false\nlibrary(tidymodels) # Includes the infer package\n\n# load in the data set\ndata(gss)\n\n# take a look at its structure\ndplyr::glimpse(gss)\n```\n\nEach row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this data set is a representative sample of a population we want to learn about: American adults.\n\n## Specify variables\n\nThe `specify()` function can be used to specify which of the variables in the data set you're interested in. If you're only interested in, say, the `age` of the respondents, you might write:\n\n```{r}\n#| label: \"specify-example\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = age)\n```\n\nOn the front end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. What do we see if we check the class of this object, though?\n\n```{r}\n#| label: \"specify-one\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = age) %>%\n class()\n```\n\nWe can see that the infer class has been appended on top of the dataframe classes; this new class stores some extra metadata.\n\nIf you're interested in two variables (`age` and `partyid`, for example) you can `specify()` their relationship in one of two (equivalent) ways:\n\n```{r}\n#| label: \"specify-two\"\n#| warning: false\n#| message: false\n# as a formula\ngss %>%\n specify(age ~ partyid)\n\n# with the named arguments\ngss %>%\n specify(response = age, explanatory = partyid)\n```\n\nIf you're doing inference on one proportion or a difference in proportions, you will need to use the `success` argument to specify which level of your `response` variable is a success. For instance, if you're interested in the proportion of the population with a college degree, you might use the following code:\n\n```{r}\n#| label: \"specify-success\"\n#| warning: false\n#| message: false\n# specifying for inference on proportions\ngss %>%\n specify(response = college, success = \"degree\")\n```\n\n## Declare the hypothesis\n\nThe next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of \"independence\" or \"point\" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`:\n\n```{r}\n#| label: \"hypothesize-independence\"\n#| warning: false\n#| message: false\ngss %>%\n specify(college ~ partyid, success = \"degree\") %>%\n hypothesize(null = \"independence\")\n```\n\nIf you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write:\n\n```{r}\n#| label: \"hypothesize-40-hr-week\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40)\n```\n\nAgain, from the front-end, the dataframe outputted from `hypothesize()` looks almost exactly the same as it did when it came out of `specify()`, but infer now \"knows\" your null hypothesis.\n\n## Generate the distribution\n\nOnce we've asserted our null hypothesis using `hypothesize()`, we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the `type` argument:\n\n* `bootstrap`: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data. \n* `permute`: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample. \n* `simulate`: A value will be sampled from a theoretical distribution with parameters specified in `hypothesize()` for each replicate. (This option is currently only applicable for testing point estimates.) \n\nContinuing on with our example above, about the average number of hours worked a week, we might write:\n\n```{r}\n#| label: \"generate-point\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\")\n```\n\nIn the above example, we take 5000 bootstrap samples to form our null distribution.\n\nTo generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 5000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age:\n\n```{r}\n#| label: \"generate-permute\"\n#| warning: false\n#| message: false\ngss %>%\n specify(partyid ~ age) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\")\n```\n\n## Calculate statistics\n\nDepending on whether you're carrying out computation-based inference or theory-based inference, you will either supply `calculate()` with the output of `generate()` or `hypothesize()`, respectively. The function, for one, takes in a `stat` argument, which is currently one of `\"mean\"`, `\"median\"`, `\"sum\"`, `\"sd\"`, `\"prop\"`, `\"count\"`, `\"diff in means\"`, `\"diff in medians\"`, `\"diff in props\"`, `\"Chisq\"`, `\"F\"`, `\"t\"`, `\"z\"`, `\"slope\"`, or `\"correlation\"`. For example, continuing our example above to calculate the null distribution of mean hours worked per week:\n\n```{r}\n#| label: \"calculate-point\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\") %>%\n calculate(stat = \"mean\")\n```\n\nThe output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or $t$ and $z$ statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write:\n\n```{r}\n#| label: \"specify-diff-in-means\"\n#| warning: false\n#| message: false\ngss %>%\n specify(age ~ college) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(\"diff in means\", order = c(\"degree\", \"no degree\"))\n```\n\n## Other utilities\n\nThe infer package also offers several utilities to extract meaning out of summary statistics and null distributions; the package provides functions to visualize where a statistic is relative to a distribution (with `visualize()`), calculate p-values (with `get_p_value()`), and calculate confidence intervals (with `get_confidence_interval()`).\n\nTo illustrate, we'll go back to the example of determining whether the mean number of hours worked per week is 40 hours.\n\n```{r}\n#| label: \"utilities-examples\"\n# find the point estimate\npoint_estimate <- gss %>%\n specify(response = hours) %>%\n calculate(stat = \"mean\")\n\n# generate a null distribution\nnull_dist <- gss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\") %>%\n calculate(stat = \"mean\")\n```\n\n(Notice the warning: `Removed 1244 rows containing missing values.` This would be worth noting if you were actually carrying out this hypothesis test.)\n\nOur point estimate `r point_estimate` seems *pretty* close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn't 40.\n\nWe could initially just visualize the null distribution.\n\n```{r}\n#| label: \"visualize\"\n#| warning: false\n#| message: false\nnull_dist %>%\n visualize()\n```\n\nWhere does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this.\n\n```{r}\n#| label: \"visualize2\"\n#| warning: false\n#| message: false\nnull_dist %>%\n visualize() +\n shade_p_value(obs_stat = point_estimate, direction = \"two_sided\")\n```\n\nNotice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the `+` operator to apply the `shade_p_value()` function. This is because `visualize()` outputs a plot object from ggplot2 instead of a dataframe, and the `+` operator is needed to add the p-value layer to the plot object.) The red bar looks like it's slightly far out on the right tail of the null distribution, so observing a sample mean of `r point_estimate` hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though?\n\n```{r}\n#| label: \"get_p_value\"\n#| warning: false\n#| message: false\n# get a two-tailed p-value\np_value <- null_dist %>%\n get_p_value(obs_stat = point_estimate, direction = \"two_sided\")\n\np_value\n```\n\nIt looks like the p-value is `r p_value`, which is pretty small---if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (`r abs(point_estimate-40)` hours) from 40 would be `r p_value`. This may or may not be statistically significantly different, depending on the significance level $\\alpha$ you decided on *before* you ran this analysis. If you had set $\\alpha = .05$, then this difference would be statistically significant, but if you had set $\\alpha = .01$, then it would not be.\n\nTo get a confidence interval around our estimate, we can write:\n\n```{r}\n#| label: \"get_conf\"\n#| message: false\n#| warning: false\n# start with the null distribution\nnull_dist %>%\n # calculate the confidence interval around the point estimate\n get_confidence_interval(point_estimate = point_estimate,\n # at the 95% confidence level\n level = .95,\n # using the standard error\n type = \"se\")\n```\n\nAs you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level $\\alpha = .05$.\n\n## Theoretical methods\n\nThe infer package also provides functionality to use theoretical methods for `\"Chisq\"`, `\"F\"` and `\"t\"` test statistics. \n\nGenerally, to find a null distribution using theory-based methods, use the same code that you would use to find the null distribution using randomization-based methods, but skip the `generate()` step. For example, if we wanted to find a null distribution for the relationship between age (`age`) and party identification (`partyid`) using randomization, we could write:\n\n```{r}\n#| message: false\n#| warning: false\nnull_f_distn <- gss %>%\n specify(age ~ partyid) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(stat = \"F\")\n```\n\nTo find the null distribution using theory-based methods, instead, skip the `generate()` step entirely:\n\n```{r}\n#| message: false\n#| warning: false\nnull_f_distn_theoretical <- gss %>%\n specify(age ~ partyid) %>%\n hypothesize(null = \"independence\") %>%\n calculate(stat = \"F\")\n```\n\nWe'll calculate the observed statistic to make use of in the following visualizations; this procedure is the same, regardless of the methods used to find the null distribution.\n\n```{r}\n#| message: false\n#| warning: false\nF_hat <- gss %>% \n specify(age ~ partyid) %>%\n calculate(stat = \"F\")\n```\n\nNow, instead of just piping the null distribution into `visualize()`, as we would do if we wanted to visualize the randomization-based null distribution, we also need to provide `method = \"theoretical\"` to `visualize()`.\n\n```{r}\n#| message: false\n#| warning: false\nvisualize(null_f_distn_theoretical, method = \"theoretical\") +\n shade_p_value(obs_stat = F_hat, direction = \"greater\")\n```\n\nTo get a sense of how the theory-based and randomization-based null distributions relate, we can pipe the randomization-based null distribution into `visualize()` and also specify `method = \"both\"`\n\n```{r}\n#| message: false\n#| warning: false\nvisualize(null_f_distn, method = \"both\") +\n shade_p_value(obs_stat = F_hat, direction = \"greater\")\n```\n\nThat's it! This vignette covers most all of the key functionality of infer. See `help(package = \"infer\")` for a full list of functions and vignettes.\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(sessioninfo)\npkgs <- c(\"tidymodels\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\nThis article only requires the tidymodels package. \n\nThe tidymodels package [infer](https://infer.tidymodels.org/) implements an expressive grammar to perform statistical inference that coheres with the `tidyverse` design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs.\n\nRegardless of which hypothesis test we're using, we're still asking the same kind of question: \n\n>Is the effect or difference in our observed data real, or due to chance? \n\nTo answer this question, we start by assuming that the observed data came from some world where \"nothing is going on\" (i.e. the observed effect was simply due to random chance), and call this assumption our **null hypothesis**. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the **alternate hypothesis**, which supposes that the effect present in the observed data is actually due to the fact that \"something is going on.\") We then calculate a **test statistic** from our data that describes the observed effect. We can use this test statistic to calculate a **p-value**, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined **significance level** $\\alpha$, then we can reject our null hypothesis.\n\nIf you are new to hypothesis testing, take a look at \n\n* [Section 9.2 of _Statistical Inference via Data Science_](https://moderndive.com/9-hypothesis-testing.html#understanding-ht)\n* The American Statistical Association's recent [statement on p-values](https://doi.org/10.1080/00031305.2016.1154108) \n\nThe workflow of this package is designed around these ideas. Starting from some data set,\n\n+ `specify()` allows you to specify the variable, or relationship between variables, that you're interested in,\n+ `hypothesize()` allows you to declare the null hypothesis,\n+ `generate()` allows you to generate data reflecting the null hypothesis, and\n+ `calculate()` allows you to calculate a distribution of statistics from the generated data to form the null distribution.\n\nThroughout this vignette, we make use of `gss`, a data set available in infer containing a sample of 500 observations of 11 variables from the *General Social Survey*. \n\n```{r}\n#| label: \"load-gss\"\n#| warning: false\n#| message: false\nlibrary(tidymodels) # Includes the infer package\n\n# load in the data set\ndata(gss)\n\n# take a look at its structure\ndplyr::glimpse(gss)\n```\n\nEach row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this data set is a representative sample of a population we want to learn about: American adults.\n\n## Specify variables\n\nThe `specify()` function can be used to specify which of the variables in the data set you're interested in. If you're only interested in, say, the `age` of the respondents, you might write:\n\n```{r}\n#| label: \"specify-example\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = age)\n```\n\nOn the front end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. What do we see if we check the class of this object, though?\n\n```{r}\n#| label: \"specify-one\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = age) %>%\n class()\n```\n\nWe can see that the infer class has been appended on top of the dataframe classes; this new class stores some extra metadata.\n\nIf you're interested in two variables (`age` and `partyid`, for example) you can `specify()` their relationship in one of two (equivalent) ways:\n\n```{r}\n#| label: \"specify-two\"\n#| warning: false\n#| message: false\n# as a formula\ngss %>%\n specify(age ~ partyid)\n\n# with the named arguments\ngss %>%\n specify(response = age, explanatory = partyid)\n```\n\nIf you're doing inference on one proportion or a difference in proportions, you will need to use the `success` argument to specify which level of your `response` variable is a success. For instance, if you're interested in the proportion of the population with a college degree, you might use the following code:\n\n```{r}\n#| label: \"specify-success\"\n#| warning: false\n#| message: false\n# specifying for inference on proportions\ngss %>%\n specify(response = college, success = \"degree\")\n```\n\n## Declare the hypothesis\n\nThe next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of \"independence\" or \"point\" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`:\n\n```{r}\n#| label: \"hypothesize-independence\"\n#| warning: false\n#| message: false\ngss %>%\n specify(college ~ partyid, success = \"degree\") %>%\n hypothesize(null = \"independence\")\n```\n\nIf you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write:\n\n```{r}\n#| label: \"hypothesize-40-hr-week\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40)\n```\n\nAgain, from the front-end, the dataframe outputted from `hypothesize()` looks almost exactly the same as it did when it came out of `specify()`, but infer now \"knows\" your null hypothesis.\n\n## Generate the distribution\n\nOnce we've asserted our null hypothesis using `hypothesize()`, we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the `type` argument:\n\n* `bootstrap`: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data. \n* `permute`: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample. \n* `simulate`: A value will be sampled from a theoretical distribution with parameters specified in `hypothesize()` for each replicate. (This option is currently only applicable for testing point estimates.) \n\nContinuing on with our example above, about the average number of hours worked a week, we might write:\n\n```{r}\n#| label: \"generate-point\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\")\n```\n\nIn the above example, we take 5000 bootstrap samples to form our null distribution.\n\nTo generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 5000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age:\n\n```{r}\n#| label: \"generate-permute\"\n#| warning: false\n#| message: false\ngss %>%\n specify(partyid ~ age) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\")\n```\n\n## Calculate statistics\n\nDepending on whether you're carrying out computation-based inference or theory-based inference, you will either supply `calculate()` with the output of `generate()` or `hypothesize()`, respectively. The function, for one, takes in a `stat` argument, which is currently one of `\"mean\"`, `\"median\"`, `\"sum\"`, `\"sd\"`, `\"prop\"`, `\"count\"`, `\"diff in means\"`, `\"diff in medians\"`, `\"diff in props\"`, `\"Chisq\"`, `\"F\"`, `\"t\"`, `\"z\"`, `\"slope\"`, or `\"correlation\"`. For example, continuing our example above to calculate the null distribution of mean hours worked per week:\n\n```{r}\n#| label: \"calculate-point\"\n#| warning: false\n#| message: false\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\") %>%\n calculate(stat = \"mean\")\n```\n\nThe output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or $t$ and $z$ statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write:\n\n```{r}\n#| label: \"specify-diff-in-means\"\n#| warning: false\n#| message: false\ngss %>%\n specify(age ~ college) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(\"diff in means\", order = c(\"degree\", \"no degree\"))\n```\n\n## Other utilities\n\nThe infer package also offers several utilities to extract meaning out of summary statistics and null distributions; the package provides functions to visualize where a statistic is relative to a distribution (with `visualize()`), calculate p-values (with `get_p_value()`), and calculate confidence intervals (with `get_confidence_interval()`).\n\nTo illustrate, we'll go back to the example of determining whether the mean number of hours worked per week is 40 hours.\n\n```{r}\n#| label: \"utilities-examples\"\n# find the point estimate\npoint_estimate <- gss %>%\n specify(response = hours) %>%\n calculate(stat = \"mean\")\n\n# generate a null distribution\nnull_dist <- gss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\") %>%\n calculate(stat = \"mean\")\n```\n\n(Notice the warning: `Removed 1244 rows containing missing values.` This would be worth noting if you were actually carrying out this hypothesis test.)\n\nOur point estimate `r point_estimate` seems *pretty* close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn't 40.\n\nWe could initially just visualize the null distribution.\n\n```{r}\n#| label: \"visualize\"\n#| warning: false\n#| message: false\nnull_dist %>%\n visualize()\n```\n\nWhere does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this.\n\n```{r}\n#| label: \"visualize2\"\n#| warning: false\n#| message: false\nnull_dist %>%\n visualize() +\n shade_p_value(obs_stat = point_estimate, direction = \"two_sided\")\n```\n\nNotice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the `+` operator to apply the `shade_p_value()` function. This is because `visualize()` outputs a plot object from ggplot2 instead of a dataframe, and the `+` operator is needed to add the p-value layer to the plot object.) The red bar looks like it's slightly far out on the right tail of the null distribution, so observing a sample mean of `r point_estimate` hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though?\n\n```{r}\n#| label: \"get_p_value\"\n#| warning: false\n#| message: false\n# get a two-tailed p-value\np_value <- null_dist %>%\n get_p_value(obs_stat = point_estimate, direction = \"two_sided\")\n\np_value\n```\n\nIt looks like the p-value is `r p_value`, which is pretty small---if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (`r abs(point_estimate-40)` hours) from 40 would be `r p_value`. This may or may not be statistically significantly different, depending on the significance level $\\alpha$ you decided on *before* you ran this analysis. If you had set $\\alpha = .05$, then this difference would be statistically significant, but if you had set $\\alpha = .01$, then it would not be.\n\nTo get a confidence interval around our estimate, we can write:\n\n```{r}\n#| label: \"get_conf\"\n#| message: false\n#| warning: false\n# start with the null distribution\nnull_dist %>%\n # calculate the confidence interval around the point estimate\n get_confidence_interval(point_estimate = point_estimate,\n # at the 95% confidence level\n level = .95,\n # using the standard error\n type = \"se\")\n```\n\nAs you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level $\\alpha = .05$.\n\n## Theoretical methods\n\nThe infer package also provides functionality to use theoretical methods for `\"Chisq\"`, `\"F\"` and `\"t\"` test statistics. \n\nGenerally, to find a null distribution using theory-based methods, use the same code that you would use to find the null distribution using randomization-based methods, but skip the `generate()` step. For example, if we wanted to find a null distribution for the relationship between age (`age`) and party identification (`partyid`) using randomization, we could write:\n\n```{r}\n#| message: false\n#| warning: false\nnull_f_distn <- gss %>%\n specify(age ~ partyid) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(stat = \"F\")\n```\n\nTo find the null distribution using theory-based methods, instead, skip the `generate()` step entirely:\n\n```{r}\n#| message: false\n#| warning: false\nnull_f_distn_theoretical <- gss %>%\n specify(age ~ partyid) %>%\n hypothesize(null = \"independence\") %>%\n calculate(stat = \"F\")\n```\n\nWe'll calculate the observed statistic to make use of in the following visualizations; this procedure is the same, regardless of the methods used to find the null distribution.\n\n```{r}\n#| message: false\n#| warning: false\nF_hat <- gss %>% \n specify(age ~ partyid) %>%\n calculate(stat = \"F\")\n```\n\nNow, instead of just piping the null distribution into `visualize()`, as we would do if we wanted to visualize the randomization-based null distribution, we also need to provide `method = \"theoretical\"` to `visualize()`.\n\n```{r}\n#| message: false\n#| warning: false\nvisualize(null_f_distn_theoretical, method = \"theoretical\") +\n shade_p_value(obs_stat = F_hat, direction = \"greater\")\n```\n\nTo get a sense of how the theory-based and randomization-based null distributions relate, we can pipe the randomization-based null distribution into `visualize()` and also specify `method = \"both\"`\n\n```{r}\n#| message: false\n#| warning: false\nvisualize(null_f_distn, method = \"both\") +\n shade_p_value(obs_stat = F_hat, direction = \"greater\")\n```\n\nThat's it! This vignette covers most all of the key functionality of infer. See `help(package = \"infer\")` for a full list of functions and vignettes.\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Hypothesis testing using resampling and tidy data","categories":["statistical analysis","hypothesis testing","bootstraping"],"type":"learn-subsection","weight":4,"description":"Perform common hypothesis tests for statistical inference using flexible functions.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/statistics/k-means/index.qmd.json b/.quarto/idx/learn/statistics/k-means/index.qmd.json new file mode 100644 index 00000000..958bb553 --- /dev/null +++ b/.quarto/idx/learn/statistics/k-means/index.qmd.json @@ -0,0 +1 @@ +{"title":"K-means clustering with tidy data principles","markdown":{"yaml":{"title":"K-means clustering with tidy data principles","categories":["statistical analysis","clustering","tidying results"],"type":"learn-subsection","weight":2,"description":"Summarize clustering characteristics and estimate the best number of clusters for a data set.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\nThis article only requires the tidymodels package.\n\nK-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions: \n\n- `tidy()`\n- `augment()` \n- `glance()`\n\nLet's start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster:\n\n```{r}\nlibrary(tidymodels)\n\nset.seed(27)\n\ncenters <- tibble(\n cluster = factor(1:3), \n num_points = c(100, 150, 50), # number points in each cluster\n x1 = c(5, 0, -3), # x1 coordinate of cluster center\n x2 = c(-1, 1, -2) # x2 coordinate of cluster center\n)\n\nlabelled_points <- \n centers %>%\n mutate(\n x1 = map2(num_points, x1, rnorm),\n x2 = map2(num_points, x2, rnorm)\n ) %>% \n select(-num_points) %>% \n unnest(cols = c(x1, x2))\n\nggplot(labelled_points, aes(x1, x2, color = cluster)) +\n geom_point(alpha = 0.3)\n```\n\nThis is an ideal case for k-means clustering. \n\n## How does K-means work?\n\nRather than using equations, this short animation using the [artwork](https://github.com/allisonhorst/stats-illustrations) of Allison Horst explains the clustering process:\n\n```{r}\n#| label: \"illustrations\"\n#| echo: false\n#| results: asis\n#| fig-align: center\nknitr::include_graphics(\"kmeans.gif\")\n```\n\n## Clustering in R\n\nWe'll use the built-in `kmeans()` function, which accepts a data frame with all numeric columns as it's primary argument.\n\n```{r}\npoints <- \n labelled_points %>% \n select(-cluster)\n\nkclust <- kmeans(points, centers = 3)\nkclust\nsummary(kclust)\n```\n\nThe output is a list of vectors, where each component has a different length. There's one of length `r nrow(points)`, the same as our original data set. There are two elements of length 3 (`withinss` and `tot.withinss`) and `centers` is a matrix with 3 rows. And then there are the elements of length 1: `totss`, `tot.withinss`, `betweenss`, and `iter`. (The value `ifault` indicates possible algorithm problems.)\n\nThese differing lengths have important meaning when we want to tidy our data set; they signify that each type of component communicates a *different kind* of information.\n\n- `cluster` (`r nrow(points)` values) contains information about each *point*\n- `centers`, `withinss`, and `size` (3 values) contain information about each *cluster*\n- `totss`, `tot.withinss`, `betweenss`, and `iter` (1 value) contain information about the *full clustering*\n\nWhich of these do we want to extract? There is no right answer; each of them may be interesting to an analyst. Because they communicate entirely different information (not to mention there's no straightforward way to combine them), they are extracted by separate functions. `augment` adds the point classifications to the original data set:\n\n```{r}\naugment(kclust, points)\n```\n\nThe `tidy()` function summarizes on a per-cluster level:\n\n```{r}\ntidy(kclust)\n```\n\nAnd as it always does, the `glance()` function extracts a single-row summary:\n\n```{r}\nglance(kclust)\n```\n\n## Exploratory clustering\n\nWhile these summaries are useful, they would not have been too difficult to extract out from the data set yourself. The real power comes from combining these analyses with other tools like [dplyr](https://dplyr.tidyverse.org/).\n\nLet's say we want to explore the effect of different choices of `k`, from 1 to 9, on this clustering. First cluster the data 9 times, each using a different value of `k`, then create columns containing the tidied, glanced and augmented data:\n\n```{r}\nkclusts <- \n tibble(k = 1:9) %>%\n mutate(\n kclust = map(k, ~kmeans(points, .x)),\n tidied = map(kclust, tidy),\n glanced = map(kclust, glance),\n augmented = map(kclust, augment, points)\n )\n\nkclusts\n```\n\nWe can turn these into three separate data sets each representing a different type of data: using `tidy()`, using `augment()`, and using `glance()`. Each of these goes into a separate data set as they represent different types of data.\n\n```{r}\nclusters <- \n kclusts %>%\n unnest(cols = c(tidied))\n\nassignments <- \n kclusts %>% \n unnest(cols = c(augmented))\n\nclusterings <- \n kclusts %>%\n unnest(cols = c(glanced))\n```\n\nNow we can plot the original points using the data from `augment()`, with each point colored according to the predicted cluster.\n\n```{r}\n#| fig-width: 7\n#| fig-height: 7\np1 <- \n ggplot(assignments, aes(x = x1, y = x2)) +\n geom_point(aes(color = .cluster), alpha = 0.8) + \n facet_wrap(~ k)\np1\n```\n\nAlready we get a good sense of the proper number of clusters (3), and how the k-means algorithm functions when `k` is too high or too low. We can then add the centers of the cluster using the data from `tidy()`:\n\n```{r}\np2 <- p1 + geom_point(data = clusters, size = 10, shape = \"x\")\np2\n```\n\nThe data from `glance()` fills a different but equally important purpose; it lets us view trends of some summary statistics across values of `k`. Of particular interest is the total within sum of squares, saved in the `tot.withinss` column.\n\n```{r}\nggplot(clusterings, aes(k, tot.withinss)) +\n geom_line() +\n geom_point()\n```\n\nThis represents the variance within the clusters. It decreases as `k` increases, but notice a bend (or \"elbow\") around `k = 3`. This bend indicates that additional clusters beyond the third have little value. (See [here](https://web.stanford.edu/~hastie/Papers/gap.pdf) for a more mathematically rigorous interpretation and implementation of this method). Thus, all three methods of tidying data provided by broom are useful for summarizing clustering output.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\nThis article only requires the tidymodels package.\n\nK-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions: \n\n- `tidy()`\n- `augment()` \n- `glance()`\n\nLet's start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster:\n\n```{r}\nlibrary(tidymodels)\n\nset.seed(27)\n\ncenters <- tibble(\n cluster = factor(1:3), \n num_points = c(100, 150, 50), # number points in each cluster\n x1 = c(5, 0, -3), # x1 coordinate of cluster center\n x2 = c(-1, 1, -2) # x2 coordinate of cluster center\n)\n\nlabelled_points <- \n centers %>%\n mutate(\n x1 = map2(num_points, x1, rnorm),\n x2 = map2(num_points, x2, rnorm)\n ) %>% \n select(-num_points) %>% \n unnest(cols = c(x1, x2))\n\nggplot(labelled_points, aes(x1, x2, color = cluster)) +\n geom_point(alpha = 0.3)\n```\n\nThis is an ideal case for k-means clustering. \n\n## How does K-means work?\n\nRather than using equations, this short animation using the [artwork](https://github.com/allisonhorst/stats-illustrations) of Allison Horst explains the clustering process:\n\n```{r}\n#| label: \"illustrations\"\n#| echo: false\n#| results: asis\n#| fig-align: center\nknitr::include_graphics(\"kmeans.gif\")\n```\n\n## Clustering in R\n\nWe'll use the built-in `kmeans()` function, which accepts a data frame with all numeric columns as it's primary argument.\n\n```{r}\npoints <- \n labelled_points %>% \n select(-cluster)\n\nkclust <- kmeans(points, centers = 3)\nkclust\nsummary(kclust)\n```\n\nThe output is a list of vectors, where each component has a different length. There's one of length `r nrow(points)`, the same as our original data set. There are two elements of length 3 (`withinss` and `tot.withinss`) and `centers` is a matrix with 3 rows. And then there are the elements of length 1: `totss`, `tot.withinss`, `betweenss`, and `iter`. (The value `ifault` indicates possible algorithm problems.)\n\nThese differing lengths have important meaning when we want to tidy our data set; they signify that each type of component communicates a *different kind* of information.\n\n- `cluster` (`r nrow(points)` values) contains information about each *point*\n- `centers`, `withinss`, and `size` (3 values) contain information about each *cluster*\n- `totss`, `tot.withinss`, `betweenss`, and `iter` (1 value) contain information about the *full clustering*\n\nWhich of these do we want to extract? There is no right answer; each of them may be interesting to an analyst. Because they communicate entirely different information (not to mention there's no straightforward way to combine them), they are extracted by separate functions. `augment` adds the point classifications to the original data set:\n\n```{r}\naugment(kclust, points)\n```\n\nThe `tidy()` function summarizes on a per-cluster level:\n\n```{r}\ntidy(kclust)\n```\n\nAnd as it always does, the `glance()` function extracts a single-row summary:\n\n```{r}\nglance(kclust)\n```\n\n## Exploratory clustering\n\nWhile these summaries are useful, they would not have been too difficult to extract out from the data set yourself. The real power comes from combining these analyses with other tools like [dplyr](https://dplyr.tidyverse.org/).\n\nLet's say we want to explore the effect of different choices of `k`, from 1 to 9, on this clustering. First cluster the data 9 times, each using a different value of `k`, then create columns containing the tidied, glanced and augmented data:\n\n```{r}\nkclusts <- \n tibble(k = 1:9) %>%\n mutate(\n kclust = map(k, ~kmeans(points, .x)),\n tidied = map(kclust, tidy),\n glanced = map(kclust, glance),\n augmented = map(kclust, augment, points)\n )\n\nkclusts\n```\n\nWe can turn these into three separate data sets each representing a different type of data: using `tidy()`, using `augment()`, and using `glance()`. Each of these goes into a separate data set as they represent different types of data.\n\n```{r}\nclusters <- \n kclusts %>%\n unnest(cols = c(tidied))\n\nassignments <- \n kclusts %>% \n unnest(cols = c(augmented))\n\nclusterings <- \n kclusts %>%\n unnest(cols = c(glanced))\n```\n\nNow we can plot the original points using the data from `augment()`, with each point colored according to the predicted cluster.\n\n```{r}\n#| fig-width: 7\n#| fig-height: 7\np1 <- \n ggplot(assignments, aes(x = x1, y = x2)) +\n geom_point(aes(color = .cluster), alpha = 0.8) + \n facet_wrap(~ k)\np1\n```\n\nAlready we get a good sense of the proper number of clusters (3), and how the k-means algorithm functions when `k` is too high or too low. We can then add the centers of the cluster using the data from `tidy()`:\n\n```{r}\np2 <- p1 + geom_point(data = clusters, size = 10, shape = \"x\")\np2\n```\n\nThe data from `glance()` fills a different but equally important purpose; it lets us view trends of some summary statistics across values of `k`. Of particular interest is the total within sum of squares, saved in the `tot.withinss` column.\n\n```{r}\nggplot(clusterings, aes(k, tot.withinss)) +\n geom_line() +\n geom_point()\n```\n\nThis represents the variance within the clusters. It decreases as `k` increases, but notice a bend (or \"elbow\") around `k = 3`. This bend indicates that additional clusters beyond the third have little value. (See [here](https://web.stanford.edu/~hastie/Papers/gap.pdf) for a more mathematically rigorous interpretation and implementation of this method). Thus, all three methods of tidying data provided by broom are useful for summarizing clustering output.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"K-means clustering with tidy data principles","categories":["statistical analysis","clustering","tidying results"],"type":"learn-subsection","weight":2,"description":"Summarize clustering characteristics and estimate the best number of clusters for a data set.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/statistics/tidy-analysis/index.qmd.json b/.quarto/idx/learn/statistics/tidy-analysis/index.qmd.json new file mode 100644 index 00000000..be6efd53 --- /dev/null +++ b/.quarto/idx/learn/statistics/tidy-analysis/index.qmd.json @@ -0,0 +1 @@ +{"title":"Correlation and regression fundamentals with tidy data principles","markdown":{"yaml":{"title":"Correlation and regression fundamentals with tidy data principles","categories":["statistical analysis","correlation","tidying results"],"type":"learn-subsection","weight":1,"description":"Analyze the results of correlation tests and simple regression models for many data sets at once.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\nThis article only requires the tidymodels package.\n\nWhile the tidymodels package [broom](https://broom.tidyverse.org/) is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the `nest()/unnest()` functions from [tidyr](https://tidyr.tidyverse.org/) and the `map()` function in [purrr](https://purrr.tidyverse.org/).\n\n## Correlation analysis\n\nLet's demonstrate this with a simple data set, the built-in `Orange`. We start by coercing `Orange` to a `tibble`. This gives a nicer print method that will be especially useful later on when we start working with list-columns.\n\n```{r}\nlibrary(tidymodels)\n\ndata(Orange)\n\nOrange <- as_tibble(Orange)\nOrange\n```\n\nThis contains 35 observations of three variables: `Tree`, `age`, and `circumference`. `Tree` is a factor with five levels describing five trees. As might be expected, age and circumference are correlated:\n\n```{r}\ncor(Orange$age, Orange$circumference)\n\nlibrary(ggplot2)\n\nggplot(Orange, aes(age, circumference, color = Tree)) +\n geom_line()\n```\n\nSuppose you want to test for correlations individually *within* each tree. You can do this with dplyr's `group_by`:\n\n```{r}\nOrange %>% \n group_by(Tree) %>%\n summarize(correlation = cor(age, circumference))\n```\n\n(Note that the correlations are much higher than the aggregated one, and also we can now see the correlation is similar across trees).\n\nSuppose that instead of simply estimating a correlation, we want to perform a hypothesis test with `cor.test()`:\n\n```{r}\nct <- cor.test(Orange$age, Orange$circumference)\nct\n```\n\nThis test output contains multiple values we may be interested in. Some are vectors of length 1, such as the p-value and the estimate, and some are longer, such as the confidence interval. We can get this into a nicely organized tibble using the `tidy()` function:\n\n```{r}\ntidy(ct)\n```\n\nOften, we want to perform multiple tests or fit multiple models, each on a different part of the data. In this case, we recommend a `nest-map-unnest` workflow. For example, suppose we want to perform correlation tests for each different tree. We start by `nest`ing our data based on the group of interest:\n\n```{r}\nnested <- \n Orange %>% \n nest(data = c(age, circumference))\n```\n\nThen we perform a correlation test for each nested tibble using `purrr::map()`:\n\n```{r}\nnested %>% \n mutate(test = map(data, ~ cor.test(.x$age, .x$circumference)))\n```\n\nThis results in a list-column of S3 objects. We want to tidy each of the objects, which we can also do with `map()`.\n\n```{r}\nnested %>% \n mutate(\n test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col\n tidied = map(test, tidy)\n ) \n```\n\nFinally, we want to unnest the tidied data frames so we can see the results in a flat tibble. All together, this looks like:\n\n```{r}\nOrange %>% \n nest(data = c(age, circumference)) %>% \n mutate(\n test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col\n tidied = map(test, tidy)\n ) %>% \n unnest(cols = tidied) %>% \n select(-data, -test)\n```\n\n## Regression models\n\nThis type of workflow becomes even more useful when applied to regressions. Untidy output for a regression looks like:\n\n```{r}\nlm_fit <- lm(age ~ circumference, data = Orange)\nsummary(lm_fit)\n```\n\nWhen we tidy these results, we get multiple rows of output for each model:\n\n```{r}\ntidy(lm_fit)\n```\n\nNow we can handle multiple regressions at once using exactly the same workflow as before:\n\n```{r}\nOrange %>%\n nest(data = c(-Tree)) %>% \n mutate(\n fit = map(data, ~ lm(age ~ circumference, data = .x)),\n tidied = map(fit, tidy)\n ) %>% \n unnest(tidied) %>% \n select(-data, -fit)\n```\n\nYou can just as easily use multiple predictors in the regressions, as shown here on the `mtcars` dataset. We nest the data into automatic vs. manual cars (the `am` column), then perform the regression within each nested tibble.\n\n```{r}\ndata(mtcars)\nmtcars <- as_tibble(mtcars) # to play nicely with list-cols\nmtcars\n\nmtcars %>%\n nest(data = c(-am)) %>% \n mutate(\n fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), # S3 list-col\n tidied = map(fit, tidy)\n ) %>% \n unnest(tidied) %>% \n select(-data, -fit)\n```\n\nWhat if you want not just the `tidy()` output, but the `augment()` and `glance()` outputs as well, while still performing each regression only once? Since we're using list-columns, we can just fit the model once and use multiple list-columns to store the tidied, glanced and augmented outputs.\n\n```{r}\nregressions <- \n mtcars %>%\n nest(data = c(-am)) %>% \n mutate(\n fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)),\n tidied = map(fit, tidy),\n glanced = map(fit, glance),\n augmented = map(fit, augment)\n )\n\nregressions %>% \n select(tidied) %>% \n unnest(tidied)\n\nregressions %>% \n select(glanced) %>% \n unnest(glanced)\n\nregressions %>% \n select(augmented) %>% \n unnest(augmented)\n```\n\nBy combining the estimates and p-values across all groups into the same tidy data frame (instead of a list of output model objects), a new class of analyses and visualizations becomes straightforward. This includes:\n\n- sorting by p-value or estimate to find the most significant terms across all tests,\n- p-value histograms, and\n- volcano plots comparing p-values to effect size estimates.\n\nIn each of these cases, we can easily filter, facet, or distinguish based on the `term` column. In short, this makes the tools of tidy data analysis available for the *results* of data analysis and models, not just the inputs.\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\nThis article only requires the tidymodels package.\n\nWhile the tidymodels package [broom](https://broom.tidyverse.org/) is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the `nest()/unnest()` functions from [tidyr](https://tidyr.tidyverse.org/) and the `map()` function in [purrr](https://purrr.tidyverse.org/).\n\n## Correlation analysis\n\nLet's demonstrate this with a simple data set, the built-in `Orange`. We start by coercing `Orange` to a `tibble`. This gives a nicer print method that will be especially useful later on when we start working with list-columns.\n\n```{r}\nlibrary(tidymodels)\n\ndata(Orange)\n\nOrange <- as_tibble(Orange)\nOrange\n```\n\nThis contains 35 observations of three variables: `Tree`, `age`, and `circumference`. `Tree` is a factor with five levels describing five trees. As might be expected, age and circumference are correlated:\n\n```{r}\ncor(Orange$age, Orange$circumference)\n\nlibrary(ggplot2)\n\nggplot(Orange, aes(age, circumference, color = Tree)) +\n geom_line()\n```\n\nSuppose you want to test for correlations individually *within* each tree. You can do this with dplyr's `group_by`:\n\n```{r}\nOrange %>% \n group_by(Tree) %>%\n summarize(correlation = cor(age, circumference))\n```\n\n(Note that the correlations are much higher than the aggregated one, and also we can now see the correlation is similar across trees).\n\nSuppose that instead of simply estimating a correlation, we want to perform a hypothesis test with `cor.test()`:\n\n```{r}\nct <- cor.test(Orange$age, Orange$circumference)\nct\n```\n\nThis test output contains multiple values we may be interested in. Some are vectors of length 1, such as the p-value and the estimate, and some are longer, such as the confidence interval. We can get this into a nicely organized tibble using the `tidy()` function:\n\n```{r}\ntidy(ct)\n```\n\nOften, we want to perform multiple tests or fit multiple models, each on a different part of the data. In this case, we recommend a `nest-map-unnest` workflow. For example, suppose we want to perform correlation tests for each different tree. We start by `nest`ing our data based on the group of interest:\n\n```{r}\nnested <- \n Orange %>% \n nest(data = c(age, circumference))\n```\n\nThen we perform a correlation test for each nested tibble using `purrr::map()`:\n\n```{r}\nnested %>% \n mutate(test = map(data, ~ cor.test(.x$age, .x$circumference)))\n```\n\nThis results in a list-column of S3 objects. We want to tidy each of the objects, which we can also do with `map()`.\n\n```{r}\nnested %>% \n mutate(\n test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col\n tidied = map(test, tidy)\n ) \n```\n\nFinally, we want to unnest the tidied data frames so we can see the results in a flat tibble. All together, this looks like:\n\n```{r}\nOrange %>% \n nest(data = c(age, circumference)) %>% \n mutate(\n test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col\n tidied = map(test, tidy)\n ) %>% \n unnest(cols = tidied) %>% \n select(-data, -test)\n```\n\n## Regression models\n\nThis type of workflow becomes even more useful when applied to regressions. Untidy output for a regression looks like:\n\n```{r}\nlm_fit <- lm(age ~ circumference, data = Orange)\nsummary(lm_fit)\n```\n\nWhen we tidy these results, we get multiple rows of output for each model:\n\n```{r}\ntidy(lm_fit)\n```\n\nNow we can handle multiple regressions at once using exactly the same workflow as before:\n\n```{r}\nOrange %>%\n nest(data = c(-Tree)) %>% \n mutate(\n fit = map(data, ~ lm(age ~ circumference, data = .x)),\n tidied = map(fit, tidy)\n ) %>% \n unnest(tidied) %>% \n select(-data, -fit)\n```\n\nYou can just as easily use multiple predictors in the regressions, as shown here on the `mtcars` dataset. We nest the data into automatic vs. manual cars (the `am` column), then perform the regression within each nested tibble.\n\n```{r}\ndata(mtcars)\nmtcars <- as_tibble(mtcars) # to play nicely with list-cols\nmtcars\n\nmtcars %>%\n nest(data = c(-am)) %>% \n mutate(\n fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), # S3 list-col\n tidied = map(fit, tidy)\n ) %>% \n unnest(tidied) %>% \n select(-data, -fit)\n```\n\nWhat if you want not just the `tidy()` output, but the `augment()` and `glance()` outputs as well, while still performing each regression only once? Since we're using list-columns, we can just fit the model once and use multiple list-columns to store the tidied, glanced and augmented outputs.\n\n```{r}\nregressions <- \n mtcars %>%\n nest(data = c(-am)) %>% \n mutate(\n fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)),\n tidied = map(fit, tidy),\n glanced = map(fit, glance),\n augmented = map(fit, augment)\n )\n\nregressions %>% \n select(tidied) %>% \n unnest(tidied)\n\nregressions %>% \n select(glanced) %>% \n unnest(glanced)\n\nregressions %>% \n select(augmented) %>% \n unnest(augmented)\n```\n\nBy combining the estimates and p-values across all groups into the same tidy data frame (instead of a list of output model objects), a new class of analyses and visualizations becomes straightforward. This includes:\n\n- sorting by p-value or estimate to find the most significant terms across all tests,\n- p-value histograms, and\n- volcano plots comparing p-values to effect size estimates.\n\nIn each of these cases, we can easily filter, facet, or distinguish based on the `term` column. In short, this makes the tools of tidy data analysis available for the *results* of data analysis and models, not just the inputs.\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Correlation and regression fundamentals with tidy data principles","categories":["statistical analysis","correlation","tidying results"],"type":"learn-subsection","weight":1,"description":"Analyze the results of correlation tests and simple regression models for many data sets at once.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/statistics/xtabs/index.qmd.json b/.quarto/idx/learn/statistics/xtabs/index.qmd.json new file mode 100644 index 00000000..1faebb21 --- /dev/null +++ b/.quarto/idx/learn/statistics/xtabs/index.qmd.json @@ -0,0 +1 @@ +{"title":"Statistical analysis of contingency tables","markdown":{"yaml":{"title":"Statistical analysis of contingency tables","categories":["statistical analysis","analysis of tables","hypothesis testing"],"type":"learn-subsection","weight":5,"description":"Use tests of independence and goodness of fit to analyze tables of counts.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(sessioninfo)\npkgs <- c(\"tidymodels\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\nThis article only requires that you have the tidymodels package installed.\n\nIn this vignette, we'll walk through conducting a $\\chi^2$ (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We'll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we'll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution.\n\nThroughout this vignette, we'll make use of the `ad_data` data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from [Craig-Schapiro _et al_ (2011)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3079734/). See `?ad_data` for more information on the variables included and their source. One of the main research questions in these data were how a person's genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows: \n\n```{r}\n#| label: \"glimpse-ad_data-actual\"\n#| warning: false\n#| message: false\nlibrary(tidymodels) # Includes the infer package\n\ndata(ad_data, package = \"modeldata\")\nad_data %>%\n select(Genotype, Class)\n```\n\nThe three main genetic variants are called E2, E3, and E4. The values in `Genotype` represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of \"E2E4\" means E2 from one parent and E4 from the other). \n\n## Test of independence\n\nTo carry out a chi-squared test of independence, we'll examine the association between their cognitive ability (impaired and healthy) and the genetic makeup. This is what the relationship looks like in the sample data:\n\n```{r}\n#| label: \"plot-indep\"\n#| echo: false\nad_data %>%\n ggplot() +\n aes(y = Genotype, fill = Class) +\n geom_bar(position = \"fill\") +\n scale_fill_brewer(type = \"qual\") +\n labs(y = \"Genotype: Apolipoprotein E Genetics\",\n x = \"Proportion\") \n```\n\nIf there were no relationship, we would expect to see the purple bars reaching to the same length, regardless of cognitive ability. Are the differences we see here, though, just due to random noise?\n\nFirst, to calculate the observed statistic, we can use `specify()` and `calculate()`.\n\n```{r}\n#| label: \"calc-obs-stat-indep\"\n#| warning: false\n#| message: false\n# calculate the observed statistic\nobserved_indep_statistic <- ad_data %>%\n specify(Genotype ~ Class) %>%\n calculate(stat = \"Chisq\")\n```\n\nThe observed $\\chi^2$ statistic is `r observed_indep_statistic`. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between cognitive ability and genetics.\n\nWe can `generate()` the null distribution in one of two ways: using randomization or theory-based methods. The randomization approach permutes the response and explanatory variables, so that each person's genetics is matched up with a random cognitive rating from the sample in order to break up any association between the two.\n\n```{r}\n#| label: \"generate-null-indep\"\n#| warning: false\n#| message: false\n# generate the null distribution using randomization\nnull_distribution_simulated <- ad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(stat = \"Chisq\")\n```\n\nNote that, in the line `specify(Genotype ~ Class)` above, we could use the equivalent syntax `specify(response = Genotype, explanatory = Class)`. The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization.\n\n```{r}\n#| label: \"generate-null-indep-t\"\n#| warning: false\n#| message: false\n# generate the null distribution by theoretical approximation\nnull_distribution_theoretical <- ad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n # note that we skip the generation step here!\n calculate(stat = \"Chisq\")\n```\n\nTo get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:\n\n```{r}\n#| label: \"visualize-indep\"\n#| warning: false\n#| message: false\n# visualize the null distribution and test statistic!\nnull_distribution_simulated %>%\n visualize() + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\nWe could also visualize the observed statistic against the theoretical null distribution. Note that we skip the `generate()` and `calculate()` steps when using the theoretical approach, and that we now need to provide `method = \"theoretical\"` to `visualize()`.\n\n```{r}\n#| label: \"visualize-indep-theor\"\n#| warning: false\n#| message: false\n# visualize the theoretical null distribution and test statistic!\nad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n visualize(method = \"theoretical\") + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\nTo visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and further provide `method = \"both\"`.\n\n```{r}\n#| label: \"visualize-indep-both\"\n#| warning: false\n#| message: false\n# visualize both null distributions and the test statistic!\nnull_distribution_simulated %>%\n visualize(method = \"both\") + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\nEither way, it looks like our observed test statistic would be fairly unlikely if there were actually no association between cognition and genotype. More exactly, we can calculate the p-value:\n\n```{r}\n#| label: \"p-value-indep\"\n#| warning: false\n#| message: false\n# calculate the p value from the observed statistic and null distribution\np_value_independence <- null_distribution_simulated %>%\n get_p_value(obs_stat = observed_indep_statistic,\n direction = \"greater\")\n\np_value_independence\n```\n\nThus, if there were really no relationship between cognition and genotype, the probability that we would see a statistic as or more extreme than `r observed_indep_statistic` is approximately `r p_value_independence`.\n\nNote that, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this:\n\n```{r}\n#| label: \"chisq-indep-wrapper\"\n#| message: false\n#| warning: false\nchisq_test(ad_data, Genotype ~ Class)\n```\n\n\n## Goodness of fit\n\nNow, moving on to a chi-squared goodness of fit test, we'll take a look at just the genotype data. Many papers have investigated the relationship of Apolipoprotein E to diseases. For example, [Song _et al_ (2004)](https://annals.org/aim/article-abstract/717641/meta-analysis-apolipoprotein-e-genotypes-risk-coronary-heart-disease) conducted a meta-analysis of numerous studies that looked at this gene and heart disease. In their paper, they describe the frequency of the different genotypes across many samples. For the cognition study, it might be interesting to see if our sample of genotypes was consistent with this literature (treating the rates, for this analysis, as known). \n\nThe rates of the meta-analysis and our observed data are: \n \n```{r}\n#| label: \"rates\"\n# Song, Y., Stampfer, M. J., & Liu, S. (2004). Meta-Analysis: Apolipoprotein E \n# Genotypes and Risk for Coronary Heart Disease. Annals of Internal Medicine, \n# 141(2), 137.\nmeta_rates <- c(\"E2E2\" = 0.71, \"E2E3\" = 11.4, \"E2E4\" = 2.32,\n \"E3E3\" = 61.0, \"E3E4\" = 22.6, \"E4E4\" = 2.22)\nmeta_rates <- meta_rates/sum(meta_rates) # these add up to slightly > 100%\n\nobs_rates <- table(ad_data$Genotype)/nrow(ad_data)\nround(cbind(obs_rates, meta_rates) * 100, 2)\n```\n\nSuppose our null hypothesis is that `Genotype` follows the same frequency distribution as the meta-analysis. Lets now test whether this difference in distributions is statistically significant.\n\nFirst, to carry out this hypothesis test, we would calculate our observed statistic.\n\n```{r}\n#| label: \"observed-gof-statistic\"\n#| warning: false\n#| message: false\n# calculating the null distribution\nobserved_gof_statistic <- ad_data %>%\n specify(response = Genotype) %>%\n hypothesize(null = \"point\", p = meta_rates) %>%\n calculate(stat = \"Chisq\")\n```\n\nThe observed statistic is `r observed_gof_statistic`. Now, generating a null distribution, by just dropping in a call to `generate()`:\n\n\n```{r}\n#| label: \"null-distribution-gof\"\n#| warning: false\n#| message: false\n# generating a null distribution\nnull_distribution_gof <- ad_data %>%\n specify(response = Genotype) %>%\n hypothesize(null = \"point\", p = meta_rates) %>%\n generate(reps = 5000, type = \"simulate\") %>%\n calculate(stat = \"Chisq\")\n```\n\nAgain, to get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:\n\n```{r}\n#| label: \"visualize-indep-gof\"\n#| warning: false\n#| message: false\n# visualize the null distribution and test statistic!\nnull_distribution_gof %>%\n visualize() + \n shade_p_value(observed_gof_statistic,\n direction = \"greater\")\n```\n\nThis statistic seems like it would be unlikely if our rates were the same as the rates from the meta-analysis! How unlikely, though? Calculating the p-value:\n\n```{r}\n#| label: \"get-p-value-gof\"\n#| warning: false\n#| message: false\n# calculate the p-value\np_value_gof <- null_distribution_gof %>%\n get_p_value(observed_gof_statistic,\n direction = \"greater\")\n\np_value_gof\n```\n\nThus, if each genotype occurred at the same rate as the Song paper, the probability that we would see a distribution like the one we did is approximately `r p_value_gof`.\n\nAgain, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out chi-squared goodness of fit tests on tidy data. The syntax goes like this:\n\n```{r}\n#| label: \"chisq-gof-wrapper\"\n#| message: false\n#| warning: false\nchisq_test(ad_data, response = Genotype, p = meta_rates)\n```\n\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(sessioninfo)\npkgs <- c(\"tidymodels\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n## Introduction\n\nThis article only requires that you have the tidymodels package installed.\n\nIn this vignette, we'll walk through conducting a $\\chi^2$ (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We'll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we'll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution.\n\nThroughout this vignette, we'll make use of the `ad_data` data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from [Craig-Schapiro _et al_ (2011)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3079734/). See `?ad_data` for more information on the variables included and their source. One of the main research questions in these data were how a person's genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows: \n\n```{r}\n#| label: \"glimpse-ad_data-actual\"\n#| warning: false\n#| message: false\nlibrary(tidymodels) # Includes the infer package\n\ndata(ad_data, package = \"modeldata\")\nad_data %>%\n select(Genotype, Class)\n```\n\nThe three main genetic variants are called E2, E3, and E4. The values in `Genotype` represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of \"E2E4\" means E2 from one parent and E4 from the other). \n\n## Test of independence\n\nTo carry out a chi-squared test of independence, we'll examine the association between their cognitive ability (impaired and healthy) and the genetic makeup. This is what the relationship looks like in the sample data:\n\n```{r}\n#| label: \"plot-indep\"\n#| echo: false\nad_data %>%\n ggplot() +\n aes(y = Genotype, fill = Class) +\n geom_bar(position = \"fill\") +\n scale_fill_brewer(type = \"qual\") +\n labs(y = \"Genotype: Apolipoprotein E Genetics\",\n x = \"Proportion\") \n```\n\nIf there were no relationship, we would expect to see the purple bars reaching to the same length, regardless of cognitive ability. Are the differences we see here, though, just due to random noise?\n\nFirst, to calculate the observed statistic, we can use `specify()` and `calculate()`.\n\n```{r}\n#| label: \"calc-obs-stat-indep\"\n#| warning: false\n#| message: false\n# calculate the observed statistic\nobserved_indep_statistic <- ad_data %>%\n specify(Genotype ~ Class) %>%\n calculate(stat = \"Chisq\")\n```\n\nThe observed $\\chi^2$ statistic is `r observed_indep_statistic`. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between cognitive ability and genetics.\n\nWe can `generate()` the null distribution in one of two ways: using randomization or theory-based methods. The randomization approach permutes the response and explanatory variables, so that each person's genetics is matched up with a random cognitive rating from the sample in order to break up any association between the two.\n\n```{r}\n#| label: \"generate-null-indep\"\n#| warning: false\n#| message: false\n# generate the null distribution using randomization\nnull_distribution_simulated <- ad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(stat = \"Chisq\")\n```\n\nNote that, in the line `specify(Genotype ~ Class)` above, we could use the equivalent syntax `specify(response = Genotype, explanatory = Class)`. The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization.\n\n```{r}\n#| label: \"generate-null-indep-t\"\n#| warning: false\n#| message: false\n# generate the null distribution by theoretical approximation\nnull_distribution_theoretical <- ad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n # note that we skip the generation step here!\n calculate(stat = \"Chisq\")\n```\n\nTo get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:\n\n```{r}\n#| label: \"visualize-indep\"\n#| warning: false\n#| message: false\n# visualize the null distribution and test statistic!\nnull_distribution_simulated %>%\n visualize() + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\nWe could also visualize the observed statistic against the theoretical null distribution. Note that we skip the `generate()` and `calculate()` steps when using the theoretical approach, and that we now need to provide `method = \"theoretical\"` to `visualize()`.\n\n```{r}\n#| label: \"visualize-indep-theor\"\n#| warning: false\n#| message: false\n# visualize the theoretical null distribution and test statistic!\nad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n visualize(method = \"theoretical\") + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\nTo visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and further provide `method = \"both\"`.\n\n```{r}\n#| label: \"visualize-indep-both\"\n#| warning: false\n#| message: false\n# visualize both null distributions and the test statistic!\nnull_distribution_simulated %>%\n visualize(method = \"both\") + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\nEither way, it looks like our observed test statistic would be fairly unlikely if there were actually no association between cognition and genotype. More exactly, we can calculate the p-value:\n\n```{r}\n#| label: \"p-value-indep\"\n#| warning: false\n#| message: false\n# calculate the p value from the observed statistic and null distribution\np_value_independence <- null_distribution_simulated %>%\n get_p_value(obs_stat = observed_indep_statistic,\n direction = \"greater\")\n\np_value_independence\n```\n\nThus, if there were really no relationship between cognition and genotype, the probability that we would see a statistic as or more extreme than `r observed_indep_statistic` is approximately `r p_value_independence`.\n\nNote that, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this:\n\n```{r}\n#| label: \"chisq-indep-wrapper\"\n#| message: false\n#| warning: false\nchisq_test(ad_data, Genotype ~ Class)\n```\n\n\n## Goodness of fit\n\nNow, moving on to a chi-squared goodness of fit test, we'll take a look at just the genotype data. Many papers have investigated the relationship of Apolipoprotein E to diseases. For example, [Song _et al_ (2004)](https://annals.org/aim/article-abstract/717641/meta-analysis-apolipoprotein-e-genotypes-risk-coronary-heart-disease) conducted a meta-analysis of numerous studies that looked at this gene and heart disease. In their paper, they describe the frequency of the different genotypes across many samples. For the cognition study, it might be interesting to see if our sample of genotypes was consistent with this literature (treating the rates, for this analysis, as known). \n\nThe rates of the meta-analysis and our observed data are: \n \n```{r}\n#| label: \"rates\"\n# Song, Y., Stampfer, M. J., & Liu, S. (2004). Meta-Analysis: Apolipoprotein E \n# Genotypes and Risk for Coronary Heart Disease. Annals of Internal Medicine, \n# 141(2), 137.\nmeta_rates <- c(\"E2E2\" = 0.71, \"E2E3\" = 11.4, \"E2E4\" = 2.32,\n \"E3E3\" = 61.0, \"E3E4\" = 22.6, \"E4E4\" = 2.22)\nmeta_rates <- meta_rates/sum(meta_rates) # these add up to slightly > 100%\n\nobs_rates <- table(ad_data$Genotype)/nrow(ad_data)\nround(cbind(obs_rates, meta_rates) * 100, 2)\n```\n\nSuppose our null hypothesis is that `Genotype` follows the same frequency distribution as the meta-analysis. Lets now test whether this difference in distributions is statistically significant.\n\nFirst, to carry out this hypothesis test, we would calculate our observed statistic.\n\n```{r}\n#| label: \"observed-gof-statistic\"\n#| warning: false\n#| message: false\n# calculating the null distribution\nobserved_gof_statistic <- ad_data %>%\n specify(response = Genotype) %>%\n hypothesize(null = \"point\", p = meta_rates) %>%\n calculate(stat = \"Chisq\")\n```\n\nThe observed statistic is `r observed_gof_statistic`. Now, generating a null distribution, by just dropping in a call to `generate()`:\n\n\n```{r}\n#| label: \"null-distribution-gof\"\n#| warning: false\n#| message: false\n# generating a null distribution\nnull_distribution_gof <- ad_data %>%\n specify(response = Genotype) %>%\n hypothesize(null = \"point\", p = meta_rates) %>%\n generate(reps = 5000, type = \"simulate\") %>%\n calculate(stat = \"Chisq\")\n```\n\nAgain, to get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:\n\n```{r}\n#| label: \"visualize-indep-gof\"\n#| warning: false\n#| message: false\n# visualize the null distribution and test statistic!\nnull_distribution_gof %>%\n visualize() + \n shade_p_value(observed_gof_statistic,\n direction = \"greater\")\n```\n\nThis statistic seems like it would be unlikely if our rates were the same as the rates from the meta-analysis! How unlikely, though? Calculating the p-value:\n\n```{r}\n#| label: \"get-p-value-gof\"\n#| warning: false\n#| message: false\n# calculate the p-value\np_value_gof <- null_distribution_gof %>%\n get_p_value(observed_gof_statistic,\n direction = \"greater\")\n\np_value_gof\n```\n\nThus, if each genotype occurred at the same rate as the Song paper, the probability that we would see a distribution like the one we did is approximately `r p_value_gof`.\n\nAgain, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out chi-squared goodness of fit tests on tidy data. The syntax goes like this:\n\n```{r}\n#| label: \"chisq-gof-wrapper\"\n#| message: false\n#| warning: false\nchisq_test(ad_data, response = Genotype, p = meta_rates)\n```\n\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Statistical analysis of contingency tables","categories":["statistical analysis","analysis of tables","hypothesis testing"],"type":"learn-subsection","weight":5,"description":"Use tests of independence and goodness of fit to analyze tables of counts.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/work/bayes-opt/index.qmd.json b/.quarto/idx/learn/work/bayes-opt/index.qmd.json new file mode 100644 index 00000000..e97d2a44 --- /dev/null +++ b/.quarto/idx/learn/work/bayes-opt/index.qmd.json @@ -0,0 +1 @@ +{"title":"Iterative Bayesian optimization of a classification model","markdown":{"yaml":{"title":"Iterative Bayesian optimization of a classification model","categories":["model tuning","Bayesian optimization","SVMs"],"type":"learn-subsection","weight":3,"description":"Identify the best hyperparameters for a model using Bayesian optimization of iterative search.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n \n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(tune)\nlibrary(kernlab)\nlibrary(rlang)\nlibrary(doMC)\nlibrary(themis)\nregisterDoMC(cores = parallel::detectCores())\n\npkgs <- c(\"modeldata\", \"kernlab\", \"tidymodels\", \"themis\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nMany of the examples for model tuning focus on [grid search](/learn/work/tune-svm/). For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, _iterative search_ can be used to analyze the existing tuning parameter results and then _predict_ which tuning parameters to try next. \n\nThere are a variety of methods for iterative search and the focus in this article is on _Bayesian optimization_. For more information on this method, these resources might be helpful:\n\n* [_Practical bayesian optimization of machine learning algorithms_](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=Practical+Bayesian+Optimization+of+Machine+Learning+Algorithms&btnG=) (2012). J Snoek, H Larochelle, and RP Adams. Advances in neural information. \n\n* [_A Tutorial on Bayesian Optimization for Machine Learning_](https://www.cs.toronto.edu/~rgrosse/courses/csc411_f18/tutorials/tut8_adams_slides.pdf) (2018). R Adams.\n\n * [_Gaussian Processes for Machine Learning_](http://www.gaussianprocess.org/gpml/) (2006). C E Rasmussen and C Williams.\n\n* [Other articles!](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=\"Bayesian+Optimization\"&btnG=)\n\n\n## Cell segmenting revisited\n\nTo demonstrate this approach to tuning models, let's return to the cell segmentation data from the [Getting Started](/start/resampling/) article on resampling: \n\n```{r}\n#| label: \"import-data\"\nlibrary(tidymodels)\nlibrary(modeldata)\n\n# Load data\ndata(cells)\n\nset.seed(2369)\ntr_te_split <- initial_split(cells %>% select(-case), prop = 3/4)\ncell_train <- training(tr_te_split)\ncell_test <- testing(tr_te_split)\n\nset.seed(1697)\nfolds <- vfold_cv(cell_train, v = 10)\n```\n\n## The tuning scheme\n\nSince the predictors are highly correlated, we can used a recipe to convert the original predictors to principal component scores. There is also slight class imbalance in these data; about `r floor(mean(cells$class == \"PS\") * 100)`% of the data are poorly segmented. To mitigate this, the data will be down-sampled at the end of the pre-processing so that the number of poorly and well segmented cells occur with equal frequency. We can use a recipe for all this pre-processing, but the number of principal components will need to be _tuned_ so that we have enough (but not too many) representations of the data. \n\n```{r}\n#| label: \"recipe\"\nlibrary(themis)\n\ncell_pre_proc <-\n recipe(class ~ ., data = cell_train) %>%\n step_YeoJohnson(all_predictors()) %>%\n step_normalize(all_predictors()) %>%\n step_pca(all_predictors(), num_comp = tune()) %>%\n step_downsample(class)\n```\n\nIn this analysis, we will use a support vector machine to model the data. Let's use a radial basis function (RBF) kernel and tune its main parameter ($\\sigma$). Additionally, the main SVM parameter, the cost value, also needs optimization. \n\n```{r}\n#| label: \"model\"\nsvm_mod <-\n svm_rbf(mode = \"classification\", cost = tune(), rbf_sigma = tune()) %>%\n set_engine(\"kernlab\")\n```\n\nThese two objects (the recipe and model) will be combined into a single object via the `workflow()` function from the [workflows](https://workflows.tidymodels.org/) package; this object will be used in the optimization process. \n\n```{r}\n#| label: \"workflow\"\nsvm_wflow <-\n workflow() %>%\n add_model(svm_mod) %>%\n add_recipe(cell_pre_proc)\n```\n\nFrom this object, we can derive information about what parameters are slated to be tuned. A parameter set is derived by: \n\n```{r}\n#| label: \"pset\"\nsvm_set <- extract_parameter_set_dials(svm_wflow)\nsvm_set\n```\n\nThe default range for the number of PCA components is rather small for this data set. A member of the parameter set can be modified using the `update()` function. Let's constrain the search to one to twenty components by updating the `num_comp` parameter. Additionally, the lower bound of this parameter is set to zero which specifies that the original predictor set should also be evaluated (i.e., with no PCA step at all): \n\n```{r}\n#| label: \"update\"\nsvm_set <- \n svm_set %>% \n update(num_comp = num_comp(c(0L, 20L)))\n```\n\n## Sequential tuning \n\nBayesian optimization is a sequential method that uses a model to predict new candidate parameters for assessment. When scoring potential parameter value, the mean and variance of performance are predicted. The strategy used to define how these two statistical quantities are used is defined by an _acquisition function_. \n\nFor example, one approach for scoring new candidates is to use a confidence bound. Suppose accuracy is being optimized. For a metric that we want to maximize, a lower confidence bound can be used. The multiplier on the standard error (denoted as $\\kappa$) is a value that can be used to make trade-offs between **exploration** and **exploitation**. \n\n * **Exploration** means that the search will consider candidates in untested space.\n\n * **Exploitation** focuses in areas where the previous best results occurred. \n\nThe variance predicted by the Bayesian model is mostly spatial variation; the value will be large for candidate values that are not close to values that have already been evaluated. If the standard error multiplier is high, the search process will be more likely to avoid areas without candidate values in the vicinity. \n\nWe'll use another acquisition function, _expected improvement_, that determines which candidates are likely to be helpful relative to the current best results. This is the default acquisition function. More information on these functions can be found in the [package vignette for acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html). \n\n```{r}\n#| label: \"search\"\n#| cache: false\nset.seed(12)\nsearch_res <-\n svm_wflow %>% \n tune_bayes(\n resamples = folds,\n # To use non-default parameter ranges\n param_info = svm_set,\n # Generate five at semi-random to start\n initial = 5,\n iter = 50,\n # How to measure performance?\n metrics = metric_set(roc_auc),\n control = control_bayes(no_improve = 30, verbose = TRUE)\n )\n```\n\nThe resulting tibble is a stacked set of rows of the rsample object with an additional column for the iteration number:\n\n```{r}\n#| label: \"show-iters\"\nsearch_res\n```\n\nAs with grid search, we can summarize the results over resamples:\n\n```{r}\n#| label: \"summarize-iters\"\nestimates <- \n collect_metrics(search_res) %>% \n arrange(.iter)\n\nestimates\n```\n\n\nThe best performance of the initial set of candidate values was `AUC = `r max(estimates$mean[estimates$.iter == 0])` `. The best results were achieved at iteration `r estimates$.iter[which.max(estimates$mean)]` with a corresponding AUC value of `r max(estimates$mean)`. The five best results are:\n\n```{r}\n#| label: \"best\"\nshow_best(search_res, metric = \"roc_auc\")\n```\n\nA plot of the search iterations can be created via:\n\n```{r}\n#| label: \"bo-plot\"\nautoplot(search_res, type = \"performance\")\n```\n\nThere are many parameter combinations have roughly equivalent results. \n\nHow did the parameters change over iterations? \n\n\n```{r}\n#| label: \"bo-param-plot\"\n#| fig-width: 9\nautoplot(search_res, type = \"parameters\") + \n labs(x = \"Iterations\", y = NULL)\n```\n\n\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n \n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(tune)\nlibrary(kernlab)\nlibrary(rlang)\nlibrary(doMC)\nlibrary(themis)\nregisterDoMC(cores = parallel::detectCores())\n\npkgs <- c(\"modeldata\", \"kernlab\", \"tidymodels\", \"themis\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nMany of the examples for model tuning focus on [grid search](/learn/work/tune-svm/). For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, _iterative search_ can be used to analyze the existing tuning parameter results and then _predict_ which tuning parameters to try next. \n\nThere are a variety of methods for iterative search and the focus in this article is on _Bayesian optimization_. For more information on this method, these resources might be helpful:\n\n* [_Practical bayesian optimization of machine learning algorithms_](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=Practical+Bayesian+Optimization+of+Machine+Learning+Algorithms&btnG=) (2012). J Snoek, H Larochelle, and RP Adams. Advances in neural information. \n\n* [_A Tutorial on Bayesian Optimization for Machine Learning_](https://www.cs.toronto.edu/~rgrosse/courses/csc411_f18/tutorials/tut8_adams_slides.pdf) (2018). R Adams.\n\n * [_Gaussian Processes for Machine Learning_](http://www.gaussianprocess.org/gpml/) (2006). C E Rasmussen and C Williams.\n\n* [Other articles!](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=\"Bayesian+Optimization\"&btnG=)\n\n\n## Cell segmenting revisited\n\nTo demonstrate this approach to tuning models, let's return to the cell segmentation data from the [Getting Started](/start/resampling/) article on resampling: \n\n```{r}\n#| label: \"import-data\"\nlibrary(tidymodels)\nlibrary(modeldata)\n\n# Load data\ndata(cells)\n\nset.seed(2369)\ntr_te_split <- initial_split(cells %>% select(-case), prop = 3/4)\ncell_train <- training(tr_te_split)\ncell_test <- testing(tr_te_split)\n\nset.seed(1697)\nfolds <- vfold_cv(cell_train, v = 10)\n```\n\n## The tuning scheme\n\nSince the predictors are highly correlated, we can used a recipe to convert the original predictors to principal component scores. There is also slight class imbalance in these data; about `r floor(mean(cells$class == \"PS\") * 100)`% of the data are poorly segmented. To mitigate this, the data will be down-sampled at the end of the pre-processing so that the number of poorly and well segmented cells occur with equal frequency. We can use a recipe for all this pre-processing, but the number of principal components will need to be _tuned_ so that we have enough (but not too many) representations of the data. \n\n```{r}\n#| label: \"recipe\"\nlibrary(themis)\n\ncell_pre_proc <-\n recipe(class ~ ., data = cell_train) %>%\n step_YeoJohnson(all_predictors()) %>%\n step_normalize(all_predictors()) %>%\n step_pca(all_predictors(), num_comp = tune()) %>%\n step_downsample(class)\n```\n\nIn this analysis, we will use a support vector machine to model the data. Let's use a radial basis function (RBF) kernel and tune its main parameter ($\\sigma$). Additionally, the main SVM parameter, the cost value, also needs optimization. \n\n```{r}\n#| label: \"model\"\nsvm_mod <-\n svm_rbf(mode = \"classification\", cost = tune(), rbf_sigma = tune()) %>%\n set_engine(\"kernlab\")\n```\n\nThese two objects (the recipe and model) will be combined into a single object via the `workflow()` function from the [workflows](https://workflows.tidymodels.org/) package; this object will be used in the optimization process. \n\n```{r}\n#| label: \"workflow\"\nsvm_wflow <-\n workflow() %>%\n add_model(svm_mod) %>%\n add_recipe(cell_pre_proc)\n```\n\nFrom this object, we can derive information about what parameters are slated to be tuned. A parameter set is derived by: \n\n```{r}\n#| label: \"pset\"\nsvm_set <- extract_parameter_set_dials(svm_wflow)\nsvm_set\n```\n\nThe default range for the number of PCA components is rather small for this data set. A member of the parameter set can be modified using the `update()` function. Let's constrain the search to one to twenty components by updating the `num_comp` parameter. Additionally, the lower bound of this parameter is set to zero which specifies that the original predictor set should also be evaluated (i.e., with no PCA step at all): \n\n```{r}\n#| label: \"update\"\nsvm_set <- \n svm_set %>% \n update(num_comp = num_comp(c(0L, 20L)))\n```\n\n## Sequential tuning \n\nBayesian optimization is a sequential method that uses a model to predict new candidate parameters for assessment. When scoring potential parameter value, the mean and variance of performance are predicted. The strategy used to define how these two statistical quantities are used is defined by an _acquisition function_. \n\nFor example, one approach for scoring new candidates is to use a confidence bound. Suppose accuracy is being optimized. For a metric that we want to maximize, a lower confidence bound can be used. The multiplier on the standard error (denoted as $\\kappa$) is a value that can be used to make trade-offs between **exploration** and **exploitation**. \n\n * **Exploration** means that the search will consider candidates in untested space.\n\n * **Exploitation** focuses in areas where the previous best results occurred. \n\nThe variance predicted by the Bayesian model is mostly spatial variation; the value will be large for candidate values that are not close to values that have already been evaluated. If the standard error multiplier is high, the search process will be more likely to avoid areas without candidate values in the vicinity. \n\nWe'll use another acquisition function, _expected improvement_, that determines which candidates are likely to be helpful relative to the current best results. This is the default acquisition function. More information on these functions can be found in the [package vignette for acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html). \n\n```{r}\n#| label: \"search\"\n#| cache: false\nset.seed(12)\nsearch_res <-\n svm_wflow %>% \n tune_bayes(\n resamples = folds,\n # To use non-default parameter ranges\n param_info = svm_set,\n # Generate five at semi-random to start\n initial = 5,\n iter = 50,\n # How to measure performance?\n metrics = metric_set(roc_auc),\n control = control_bayes(no_improve = 30, verbose = TRUE)\n )\n```\n\nThe resulting tibble is a stacked set of rows of the rsample object with an additional column for the iteration number:\n\n```{r}\n#| label: \"show-iters\"\nsearch_res\n```\n\nAs with grid search, we can summarize the results over resamples:\n\n```{r}\n#| label: \"summarize-iters\"\nestimates <- \n collect_metrics(search_res) %>% \n arrange(.iter)\n\nestimates\n```\n\n\nThe best performance of the initial set of candidate values was `AUC = `r max(estimates$mean[estimates$.iter == 0])` `. The best results were achieved at iteration `r estimates$.iter[which.max(estimates$mean)]` with a corresponding AUC value of `r max(estimates$mean)`. The five best results are:\n\n```{r}\n#| label: \"best\"\nshow_best(search_res, metric = \"roc_auc\")\n```\n\nA plot of the search iterations can be created via:\n\n```{r}\n#| label: \"bo-plot\"\nautoplot(search_res, type = \"performance\")\n```\n\nThere are many parameter combinations have roughly equivalent results. \n\nHow did the parameters change over iterations? \n\n\n```{r}\n#| label: \"bo-param-plot\"\n#| fig-width: 9\nautoplot(search_res, type = \"parameters\") + \n labs(x = \"Iterations\", y = NULL)\n```\n\n\n\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Iterative Bayesian optimization of a classification model","categories":["model tuning","Bayesian optimization","SVMs"],"type":"learn-subsection","weight":3,"description":"Identify the best hyperparameters for a model using Bayesian optimization of iterative search.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/work/case-weights/index.qmd.json b/.quarto/idx/learn/work/case-weights/index.qmd.json new file mode 100644 index 00000000..cfc6a505 --- /dev/null +++ b/.quarto/idx/learn/work/case-weights/index.qmd.json @@ -0,0 +1 @@ +{"title":"Creating case weights based on time","markdown":{"yaml":{"title":"Creating case weights based on time","categories":["model fitting","case weights","time series"],"type":"learn-subsection","weight":5,"description":"Create models that use coefficients, extract them from fitted models, and visualize them.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\npkgs <- c(\"tidymodels\")\n```\n \n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nThis article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others.\n\n## Example Data\n\nTo demonstrate we will use the Chicago data from the modeldata package.\n\n```{r}\nlibrary(tidymodels)\ndata(Chicago)\n\nChicago <- Chicago %>%\n select(ridership, date, one_of(stations))\n```\n\nFrom `?Chicago`\n\n> These data are from Kuhn and Johnson (2020) and contain an abbreviated training set for modeling the number of people (in thousands) who enter the Clark and Lake L station.\n\n> The date column corresponds to the current date. The columns with station names (Austin through California) are a sample of the columns used in the original analysis (for filesize reasons). These are 14 day lag variables (i.e. date - 14 days). There are columns related to weather and sports team schedules.\n\nFor simplicity, we have limited our view to the date and station variables.\n\n## Creating weights\n\nThis data set contains daily information from `r min(Chicago$date)` to `r max(Chicago$date)`. We will pretend that it is January 1st, 2016 and we want to predict the ridership for the remainder of 2016 using the date and station variables as predictors. Without any weighting, all the previous observations would have the same influence on the model. This may not be ideal since some observations appear a long time ago and not be as representative of the future as more recent observations. \n\nWe could just use recent observations to fit the model, ensuring that the training data stays as close to the testing data as possible. While a tempting idea, it would throw out a lot of informative data. Instead let us assign a weight to each observation, related to how long ago the observation was taken. This way we are not completely throwing away any observation; we are only giving less weight to data farther in the past. \n\nWe need to decide on a way to calculate the case weights. The main thing constraint is that the weight cannot be negative, and it would be nice if today was weighted as 1. So we need a function that is 1 when `x = 0` and decreasing otherwise. There are many kinds of functions like that, and we will be using this exponential decay function\n\n$$ weight = base ^ x $$\n\nwhere `base` is some constant and `x` is the number of days. To make sure that we select a reasonable `base`, we need to do some manual testing, starting with looking at how old the oldest observation is.\n\n```{r}\ndifftime(\"2016-01-01\", min(Chicago$date))\n```\n\nUsing this information we can visualize the weight curve, to see if we like the value of `base`.\n\n```{r}\ntibble_days <- tibble(days = 0:5457)\n\ntibble_days %>%\n ggplot(aes(days)) +\n geom_function(fun = ~ 0.99 ^ .x)\n```\n\nsetting `base` to 0.99 appears to be down weighted too much. Any observation more than a year old would have no influence.\n\nLet us try a few more values to find \n\n```{r}\nmap_dfr(\n c(0.99, 0.999, 0.9999),\n ~ tibble_days %>% mutate(base = factor(.x), value = .x ^ days)\n) %>%\n ggplot(aes(days, value, group = base, color = base)) +\n geom_line()\n```\n\nFrom this, we could pick something around 0.999 since it gives a better balance. Let's create a small function to help us encode this weight. \n\n```{r}\nweights_from_dates <- function(x, ref) {\n if_else(\n condition = x >= ref,\n true = 1, # <- Notice that I'm setting any future weight to 1.\n false = 0.999 ^ as.numeric(difftime(ref, x, units = \"days\"))\n )\n}\n```\n\nWe then modify `Chicago` to add a weight column, explicitly making it an importance weight with `importance_weight()`.\n\n```{r}\nChicago <- Chicago %>%\n mutate(weight = weights_from_dates(date, \"2016-01-01\"),\n weight = importance_weights(weight))\n```\n\nThis approach to creating importance weights from dates is not limited to cases where we have daily observations. You are free to create similar weights if you have gaps or repeated observations within the same day. Likewise, you don't need to use days as the unit. Seconds, weeks, or years could be used as well.\n\n## Modeling\n\nWe start by splitting up our data into a training and testing set based on the day `\"2016-01-01\"`. We added weights to the data set before splitting it so each set has weights.\n\n```{r}\nChicago_train <- Chicago %>% filter(date < \"2016-01-01\")\nChicago_test <- Chicago %>% filter(date >= \"2016-01-01\")\n```\n\nNext, we are going to create a recipe. The weights won't have any influence on the preprocessing since none of these operations are supervised and we are using importance weights.\n\n```{r}\nbase_recipe <-\n recipe(ridership ~ ., data = Chicago_train) %>%\n # Create date features\n step_date(date) %>%\n step_holiday(date, keep_original_cols = FALSE) %>%\n # Remove any columns with a single unique value\n step_zv(all_predictors()) %>%\n # Normalize all the numerical features\n step_normalize(all_numeric_predictors()) %>%\n # Perform PCA to reduce the correlation bet the stations\n step_pca(all_numeric_predictors(), threshold = 0.95)\n```\n\nNext we need to build the rest of the workflow. We use a linear regression specification\n\n```{r}\nlm_spec <-\n linear_reg() %>%\n set_engine(\"lm\")\n```\n\nand we add these together in the workflow. To activate the case weights, we use the `add_case_weights()` function to specify the name of the case weights being used.\n\n```{r}\nlm_wflow <-\n workflow() %>% \n add_case_weights(weight) %>%\n add_recipe(base_recipe) %>%\n add_model(lm_spec)\n\nlm_wflow\n```\n\nWith all that done we can fit the workflow with the usual syntax: \n\n```{r}\nlm_fit <- fit(lm_wflow, data = Chicago_train)\nlm_fit\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\npkgs <- c(\"tidymodels\")\n```\n \n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nThis article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others.\n\n## Example Data\n\nTo demonstrate we will use the Chicago data from the modeldata package.\n\n```{r}\nlibrary(tidymodels)\ndata(Chicago)\n\nChicago <- Chicago %>%\n select(ridership, date, one_of(stations))\n```\n\nFrom `?Chicago`\n\n> These data are from Kuhn and Johnson (2020) and contain an abbreviated training set for modeling the number of people (in thousands) who enter the Clark and Lake L station.\n\n> The date column corresponds to the current date. The columns with station names (Austin through California) are a sample of the columns used in the original analysis (for filesize reasons). These are 14 day lag variables (i.e. date - 14 days). There are columns related to weather and sports team schedules.\n\nFor simplicity, we have limited our view to the date and station variables.\n\n## Creating weights\n\nThis data set contains daily information from `r min(Chicago$date)` to `r max(Chicago$date)`. We will pretend that it is January 1st, 2016 and we want to predict the ridership for the remainder of 2016 using the date and station variables as predictors. Without any weighting, all the previous observations would have the same influence on the model. This may not be ideal since some observations appear a long time ago and not be as representative of the future as more recent observations. \n\nWe could just use recent observations to fit the model, ensuring that the training data stays as close to the testing data as possible. While a tempting idea, it would throw out a lot of informative data. Instead let us assign a weight to each observation, related to how long ago the observation was taken. This way we are not completely throwing away any observation; we are only giving less weight to data farther in the past. \n\nWe need to decide on a way to calculate the case weights. The main thing constraint is that the weight cannot be negative, and it would be nice if today was weighted as 1. So we need a function that is 1 when `x = 0` and decreasing otherwise. There are many kinds of functions like that, and we will be using this exponential decay function\n\n$$ weight = base ^ x $$\n\nwhere `base` is some constant and `x` is the number of days. To make sure that we select a reasonable `base`, we need to do some manual testing, starting with looking at how old the oldest observation is.\n\n```{r}\ndifftime(\"2016-01-01\", min(Chicago$date))\n```\n\nUsing this information we can visualize the weight curve, to see if we like the value of `base`.\n\n```{r}\ntibble_days <- tibble(days = 0:5457)\n\ntibble_days %>%\n ggplot(aes(days)) +\n geom_function(fun = ~ 0.99 ^ .x)\n```\n\nsetting `base` to 0.99 appears to be down weighted too much. Any observation more than a year old would have no influence.\n\nLet us try a few more values to find \n\n```{r}\nmap_dfr(\n c(0.99, 0.999, 0.9999),\n ~ tibble_days %>% mutate(base = factor(.x), value = .x ^ days)\n) %>%\n ggplot(aes(days, value, group = base, color = base)) +\n geom_line()\n```\n\nFrom this, we could pick something around 0.999 since it gives a better balance. Let's create a small function to help us encode this weight. \n\n```{r}\nweights_from_dates <- function(x, ref) {\n if_else(\n condition = x >= ref,\n true = 1, # <- Notice that I'm setting any future weight to 1.\n false = 0.999 ^ as.numeric(difftime(ref, x, units = \"days\"))\n )\n}\n```\n\nWe then modify `Chicago` to add a weight column, explicitly making it an importance weight with `importance_weight()`.\n\n```{r}\nChicago <- Chicago %>%\n mutate(weight = weights_from_dates(date, \"2016-01-01\"),\n weight = importance_weights(weight))\n```\n\nThis approach to creating importance weights from dates is not limited to cases where we have daily observations. You are free to create similar weights if you have gaps or repeated observations within the same day. Likewise, you don't need to use days as the unit. Seconds, weeks, or years could be used as well.\n\n## Modeling\n\nWe start by splitting up our data into a training and testing set based on the day `\"2016-01-01\"`. We added weights to the data set before splitting it so each set has weights.\n\n```{r}\nChicago_train <- Chicago %>% filter(date < \"2016-01-01\")\nChicago_test <- Chicago %>% filter(date >= \"2016-01-01\")\n```\n\nNext, we are going to create a recipe. The weights won't have any influence on the preprocessing since none of these operations are supervised and we are using importance weights.\n\n```{r}\nbase_recipe <-\n recipe(ridership ~ ., data = Chicago_train) %>%\n # Create date features\n step_date(date) %>%\n step_holiday(date, keep_original_cols = FALSE) %>%\n # Remove any columns with a single unique value\n step_zv(all_predictors()) %>%\n # Normalize all the numerical features\n step_normalize(all_numeric_predictors()) %>%\n # Perform PCA to reduce the correlation bet the stations\n step_pca(all_numeric_predictors(), threshold = 0.95)\n```\n\nNext we need to build the rest of the workflow. We use a linear regression specification\n\n```{r}\nlm_spec <-\n linear_reg() %>%\n set_engine(\"lm\")\n```\n\nand we add these together in the workflow. To activate the case weights, we use the `add_case_weights()` function to specify the name of the case weights being used.\n\n```{r}\nlm_wflow <-\n workflow() %>% \n add_case_weights(weight) %>%\n add_recipe(base_recipe) %>%\n add_model(lm_spec)\n\nlm_wflow\n```\n\nWith all that done we can fit the workflow with the usual syntax: \n\n```{r}\nlm_fit <- fit(lm_wflow, data = Chicago_train)\nlm_fit\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Creating case weights based on time","categories":["model fitting","case weights","time series"],"type":"learn-subsection","weight":5,"description":"Create models that use coefficients, extract them from fitted models, and visualize them.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/work/nested-resampling/index.qmd.json b/.quarto/idx/learn/work/nested-resampling/index.qmd.json new file mode 100644 index 00000000..12479da6 --- /dev/null +++ b/.quarto/idx/learn/work/nested-resampling/index.qmd.json @@ -0,0 +1 @@ +{"title":"Nested resampling","markdown":{"yaml":{"title":"Nested resampling","categories":["nested resampling","SVMs"],"type":"learn-subsection","weight":2,"description":"Estimate the best hyperparameters for a model using nested resampling.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels) \nlibrary(scales)\nlibrary(mlbench)\nlibrary(kernlab)\nlibrary(furrr)\n\npkgs <- c(\"tidymodels\", \"scales\", \"mlbench\", \"kernlab\", \"furrr\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nIn this article, we discuss an alternative method for evaluating and tuning models, called [nested resampling](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22nested+resampling%22+inner+outer&btnG=). While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance.\n\n## Resampling models\n\nA typical scheme for splitting the data when developing a predictive model is to create an initial split of the data into a training and test set. If resampling is used, it is executed on the training set. A series of binary splits is created. In rsample, we use the term *analysis set* for the data that are used to fit the model and the term *assessment set* for the set used to compute performance:\n\n```{r}\n#| label: \"resampling-fig\"\n#| echo: false\n#| fig-align: center\n#| out-width: \"70%\"\nknitr::include_graphics(\"img/resampling.svg\")\n```\n\nA common method for tuning models is [grid search](/learn/work/tune-svm/) where a candidate set of tuning parameters is created. The full set of models for every combination of the tuning parameter grid and the resamples is fitted. Each time, the assessment data are used to measure performance and the average value is determined for each tuning parameter.\n\nThe potential problem is that once we pick the tuning parameter associated with the best performance, this performance value is usually quoted as the performance of the model. There is serious potential for *optimization bias* since we use the same data to tune the model and to assess performance. This would result in an optimistic estimate of performance.\n\nNested resampling uses an additional layer of resampling that separates the tuning activities from the process used to estimate the efficacy of the model. An *outer* resampling scheme is used and, for every split in the outer resample, another full set of resampling splits are created on the original analysis set. For example, if 10-fold cross-validation is used on the outside and 5-fold cross-validation on the inside, a total of 500 models will be fit. The parameter tuning will be conducted 10 times and the best parameters are determined from the average of the 5 assessment sets. This process occurs 10 times.\n\nOnce the tuning results are complete, a model is fit to each of the outer resampling splits using the best parameter associated with that resample. The average of the outer method's assessment sets are a unbiased estimate of the model.\n\nWe will simulate some regression data to illustrate the methods. The mlbench package has a function `mlbench::mlbench.friedman1()` that can simulate a complex regression data structure from the [original MARS publication](https://scholar.google.com/scholar?hl=en&q=%22Multivariate+adaptive+regression+splines%22&btnG=&as_sdt=1%2C7&as_sdtp=). A training set size of 100 data points are generated as well as a large set that will be used to characterize how well the resampling procedure performed.\n\n```{r}\n#| label: \"sim-data\"\nlibrary(mlbench)\nsim_data <- function(n) {\n tmp <- mlbench.friedman1(n, sd = 1)\n tmp <- cbind(tmp$x, tmp$y)\n tmp <- as.data.frame(tmp)\n names(tmp)[ncol(tmp)] <- \"y\"\n tmp\n}\n\nset.seed(9815)\ntrain_dat <- sim_data(100)\nlarge_dat <- sim_data(10^5)\n```\n\n## Nested resampling\n\nTo get started, the types of resampling methods need to be specified. This isn't a large data set, so 5 repeats of 10-fold cross validation will be used as the *outer* resampling method for generating the estimate of overall performance. To tune the model, it would be good to have precise estimates for each of the values of the tuning parameter so let's use 25 iterations of the bootstrap. This means that there will eventually be `5 * 10 * 25 = 1250` models that are fit to the data *per tuning parameter*. These models will be discarded once the performance of the model has been quantified.\n\nTo create the tibble with the resampling specifications:\n\n```{r}\n#| label: \"tibble-gen\"\nlibrary(tidymodels)\nresults <- nested_cv(train_dat, \n outside = vfold_cv(repeats = 5), \n inside = bootstraps(times = 25))\nresults\n```\n\nThe splitting information for each resample is contained in the `split` objects. Focusing on the second fold of the first repeat:\n\n```{r}\n#| label: \"split-example\"\nresults$splits[[2]]\n```\n\n`<90/10/100>` indicates the number of observations in the analysis set, assessment set, and the original data.\n\nEach element of `inner_resamples` has its own tibble with the bootstrapping splits.\n\n```{r}\n#| label: \"inner-splits\"\nresults$inner_resamples[[5]]\n```\n\nThese are self-contained, meaning that the bootstrap sample is aware that it is a sample of a specific 90% of the data:\n\n```{r}\n#| label: \"inner-boot-split\"\nresults$inner_resamples[[5]]$splits[[1]]\n```\n\nTo start, we need to define how the model will be created and measured. Let's use a radial basis support vector machine model via the function `kernlab::ksvm`. This model is generally considered to have *two* tuning parameters: the SVM cost value and the kernel parameter `sigma`. For illustration purposes here, only the cost value will be tuned and the function `kernlab::sigest` will be used to estimate `sigma` during each model fit. This is automatically done by `ksvm`.\n\nAfter the model is fit to the analysis set, the root-mean squared error (RMSE) is computed on the assessment set. **One important note:** for this model, it is critical to center and scale the predictors before computing dot products. We don't do this operation here because `mlbench.friedman1` simulates all of the predictors to be standardized uniform random variables.\n\nOur function to fit the model and compute the RMSE is:\n\n```{r}\n#| label: \"rmse-func\"\nlibrary(kernlab)\n\n# `object` will be an `rsplit` object from our `results` tibble\n# `cost` is the tuning parameter\nsvm_rmse <- function(object, cost = 1) {\n y_col <- ncol(object$data)\n mod <- \n svm_rbf(mode = \"regression\", cost = cost) %>% \n set_engine(\"kernlab\") %>% \n fit(y ~ ., data = analysis(object))\n \n holdout_pred <- \n predict(mod, assessment(object) %>% dplyr::select(-y)) %>% \n bind_cols(assessment(object) %>% dplyr::select(y))\n rmse(holdout_pred, truth = y, estimate = .pred)$.estimate\n}\n\n# In some case, we want to parameterize the function over the tuning parameter:\nrmse_wrapper <- function(cost, object) svm_rmse(object, cost)\n```\n\nFor the nested resampling, a model needs to be fit for each tuning parameter and each bootstrap split. To do this, create a wrapper:\n\n```{r}\n#| label: \"inner-tune-func\"\n# `object` will be an `rsplit` object for the bootstrap samples\ntune_over_cost <- function(object) {\n tibble(cost = 2 ^ seq(-2, 8, by = 1)) %>% \n mutate(RMSE = map_dbl(cost, rmse_wrapper, object = object))\n}\n```\n\nSince this will be called across the set of outer cross-validation splits, another wrapper is required:\n\n```{r}\n#| label: \"inner-func\"\n# `object` is an `rsplit` object in `results$inner_resamples` \nsummarize_tune_results <- function(object) {\n # Return row-bound tibble that has the 25 bootstrap results\n map_df(object$splits, tune_over_cost) %>%\n # For each value of the tuning parameter, compute the \n # average RMSE which is the inner bootstrap estimate. \n group_by(cost) %>%\n summarize(mean_RMSE = mean(RMSE, na.rm = TRUE),\n n = length(RMSE),\n .groups = \"drop\")\n}\n```\n\nNow that those functions are defined, we can execute all the inner resampling loops:\n\n```{r}\n#| label: \"inner-runs\"\n#| eval: false\ntuning_results <- map(results$inner_resamples, summarize_tune_results) \n```\n\nAlternatively, since these computations can be run in parallel, we can use the furrr package. Instead of using `map()`, the function `future_map()` parallelizes the iterations using the [future package](https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html). The `multisession` plan uses the local cores to process the inner resampling loop. The end results are the same as the sequential computations.\n\n```{r}\n#| label: \"inner-runs-parallel\"\n#| warning: false\nlibrary(furrr)\nplan(multisession)\n\ntuning_results <- future_map(results$inner_resamples, summarize_tune_results) \n```\n\nThe object `tuning_results` is a list of data frames for each of the 50 outer resamples.\n\nLet's make a plot of the averaged results to see what the relationship is between the RMSE and the tuning parameters for each of the inner bootstrapping operations:\n\n```{r}\n#| label: \"rmse-plot\"\n#| fig-height: 4\n#| message: false\nlibrary(scales)\n\npooled_inner <- tuning_results %>% bind_rows\n\nbest_cost <- function(dat) dat[which.min(dat$mean_RMSE),]\n\np <- \n ggplot(pooled_inner, aes(x = cost, y = mean_RMSE)) + \n scale_x_continuous(trans = 'log2') +\n xlab(\"SVM Cost\") + ylab(\"Inner RMSE\")\n\nfor (i in 1:length(tuning_results))\n p <- p +\n geom_line(data = tuning_results[[i]], alpha = .2) +\n geom_point(data = best_cost(tuning_results[[i]]), pch = 16, alpha = 3/4)\n\np <- p + geom_smooth(data = pooled_inner, se = FALSE)\np\n```\n\nEach gray line is a separate bootstrap resampling curve created from a different 90% of the data. The blue line is a LOESS smooth of all the results pooled together.\n\nTo determine the best parameter estimate for each of the outer resampling iterations:\n\n```{r}\n#| label: \"choose\"\n#| fig-height: 4\ncost_vals <- \n tuning_results %>% \n map_df(best_cost) %>% \n select(cost)\n\nresults <- \n bind_cols(results, cost_vals) %>% \n mutate(cost = factor(cost, levels = paste(2 ^ seq(-2, 8, by = 1))))\n\nggplot(results, aes(x = cost)) + \n geom_bar() + \n xlab(\"SVM Cost\") + \n scale_x_discrete(drop = FALSE)\n```\n\nMost of the resamples produced an optimal cost value of 2.0, but the distribution is right-skewed due to the flat trend in the resampling profile once the cost value becomes 10 or larger.\n\nNow that we have these estimates, we can compute the outer resampling results for each of the `r nrow(results)` splits using the corresponding tuning parameter value:\n\n```{r}\n#| label: \"run-out-r\"\nresults <- \n results %>% \n mutate(RMSE = map2_dbl(splits, cost, svm_rmse))\n\nsummary(results$RMSE)\n```\n\nThe estimated RMSE for the model tuning process is `r round(mean(results$RMSE), 2)`.\n\nWhat is the RMSE estimate for the non-nested procedure when only the outer resampling method is used? For each cost value in the tuning grid, `r nrow(results)` SVM models are fit and their RMSE values are averaged. The table of cost values and mean RMSE estimates is used to determine the best cost value. The associated RMSE is the biased estimate.\n\n```{r}\n#| label: \"not-nested\"\n#| fig-height: 4\nnot_nested <- \n map(results$splits, tune_over_cost) %>%\n bind_rows\n\nouter_summary <- not_nested %>% \n group_by(cost) %>% \n summarize(outer_RMSE = mean(RMSE), n = length(RMSE))\n\nouter_summary\n\nggplot(outer_summary, aes(x = cost, y = outer_RMSE)) + \n geom_point() + \n geom_line() + \n scale_x_continuous(trans = 'log2') +\n xlab(\"SVM Cost\") + ylab(\"RMSE\")\n```\n\nThe non-nested procedure estimates the RMSE to be `r round(min(outer_summary$outer_RMSE), 2)`. Both estimates are fairly close.\n\nThe approximately true RMSE for an SVM model with a cost value of 2.0 can be approximated with the large sample that was simulated at the beginning.\n\n```{r}\n#| label: \"large-sample-estimate\"\nfinalModel <- ksvm(y ~ ., data = train_dat, C = 2)\nlarge_pred <- predict(finalModel, large_dat[, -ncol(large_dat)])\nsqrt(mean((large_dat$y - large_pred) ^ 2, na.rm = TRUE))\n```\n\nThe nested procedure produces a closer estimate to the approximate truth but the non-nested estimate is very similar.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels) \nlibrary(scales)\nlibrary(mlbench)\nlibrary(kernlab)\nlibrary(furrr)\n\npkgs <- c(\"tidymodels\", \"scales\", \"mlbench\", \"kernlab\", \"furrr\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nIn this article, we discuss an alternative method for evaluating and tuning models, called [nested resampling](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22nested+resampling%22+inner+outer&btnG=). While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance.\n\n## Resampling models\n\nA typical scheme for splitting the data when developing a predictive model is to create an initial split of the data into a training and test set. If resampling is used, it is executed on the training set. A series of binary splits is created. In rsample, we use the term *analysis set* for the data that are used to fit the model and the term *assessment set* for the set used to compute performance:\n\n```{r}\n#| label: \"resampling-fig\"\n#| echo: false\n#| fig-align: center\n#| out-width: \"70%\"\nknitr::include_graphics(\"img/resampling.svg\")\n```\n\nA common method for tuning models is [grid search](/learn/work/tune-svm/) where a candidate set of tuning parameters is created. The full set of models for every combination of the tuning parameter grid and the resamples is fitted. Each time, the assessment data are used to measure performance and the average value is determined for each tuning parameter.\n\nThe potential problem is that once we pick the tuning parameter associated with the best performance, this performance value is usually quoted as the performance of the model. There is serious potential for *optimization bias* since we use the same data to tune the model and to assess performance. This would result in an optimistic estimate of performance.\n\nNested resampling uses an additional layer of resampling that separates the tuning activities from the process used to estimate the efficacy of the model. An *outer* resampling scheme is used and, for every split in the outer resample, another full set of resampling splits are created on the original analysis set. For example, if 10-fold cross-validation is used on the outside and 5-fold cross-validation on the inside, a total of 500 models will be fit. The parameter tuning will be conducted 10 times and the best parameters are determined from the average of the 5 assessment sets. This process occurs 10 times.\n\nOnce the tuning results are complete, a model is fit to each of the outer resampling splits using the best parameter associated with that resample. The average of the outer method's assessment sets are a unbiased estimate of the model.\n\nWe will simulate some regression data to illustrate the methods. The mlbench package has a function `mlbench::mlbench.friedman1()` that can simulate a complex regression data structure from the [original MARS publication](https://scholar.google.com/scholar?hl=en&q=%22Multivariate+adaptive+regression+splines%22&btnG=&as_sdt=1%2C7&as_sdtp=). A training set size of 100 data points are generated as well as a large set that will be used to characterize how well the resampling procedure performed.\n\n```{r}\n#| label: \"sim-data\"\nlibrary(mlbench)\nsim_data <- function(n) {\n tmp <- mlbench.friedman1(n, sd = 1)\n tmp <- cbind(tmp$x, tmp$y)\n tmp <- as.data.frame(tmp)\n names(tmp)[ncol(tmp)] <- \"y\"\n tmp\n}\n\nset.seed(9815)\ntrain_dat <- sim_data(100)\nlarge_dat <- sim_data(10^5)\n```\n\n## Nested resampling\n\nTo get started, the types of resampling methods need to be specified. This isn't a large data set, so 5 repeats of 10-fold cross validation will be used as the *outer* resampling method for generating the estimate of overall performance. To tune the model, it would be good to have precise estimates for each of the values of the tuning parameter so let's use 25 iterations of the bootstrap. This means that there will eventually be `5 * 10 * 25 = 1250` models that are fit to the data *per tuning parameter*. These models will be discarded once the performance of the model has been quantified.\n\nTo create the tibble with the resampling specifications:\n\n```{r}\n#| label: \"tibble-gen\"\nlibrary(tidymodels)\nresults <- nested_cv(train_dat, \n outside = vfold_cv(repeats = 5), \n inside = bootstraps(times = 25))\nresults\n```\n\nThe splitting information for each resample is contained in the `split` objects. Focusing on the second fold of the first repeat:\n\n```{r}\n#| label: \"split-example\"\nresults$splits[[2]]\n```\n\n`<90/10/100>` indicates the number of observations in the analysis set, assessment set, and the original data.\n\nEach element of `inner_resamples` has its own tibble with the bootstrapping splits.\n\n```{r}\n#| label: \"inner-splits\"\nresults$inner_resamples[[5]]\n```\n\nThese are self-contained, meaning that the bootstrap sample is aware that it is a sample of a specific 90% of the data:\n\n```{r}\n#| label: \"inner-boot-split\"\nresults$inner_resamples[[5]]$splits[[1]]\n```\n\nTo start, we need to define how the model will be created and measured. Let's use a radial basis support vector machine model via the function `kernlab::ksvm`. This model is generally considered to have *two* tuning parameters: the SVM cost value and the kernel parameter `sigma`. For illustration purposes here, only the cost value will be tuned and the function `kernlab::sigest` will be used to estimate `sigma` during each model fit. This is automatically done by `ksvm`.\n\nAfter the model is fit to the analysis set, the root-mean squared error (RMSE) is computed on the assessment set. **One important note:** for this model, it is critical to center and scale the predictors before computing dot products. We don't do this operation here because `mlbench.friedman1` simulates all of the predictors to be standardized uniform random variables.\n\nOur function to fit the model and compute the RMSE is:\n\n```{r}\n#| label: \"rmse-func\"\nlibrary(kernlab)\n\n# `object` will be an `rsplit` object from our `results` tibble\n# `cost` is the tuning parameter\nsvm_rmse <- function(object, cost = 1) {\n y_col <- ncol(object$data)\n mod <- \n svm_rbf(mode = \"regression\", cost = cost) %>% \n set_engine(\"kernlab\") %>% \n fit(y ~ ., data = analysis(object))\n \n holdout_pred <- \n predict(mod, assessment(object) %>% dplyr::select(-y)) %>% \n bind_cols(assessment(object) %>% dplyr::select(y))\n rmse(holdout_pred, truth = y, estimate = .pred)$.estimate\n}\n\n# In some case, we want to parameterize the function over the tuning parameter:\nrmse_wrapper <- function(cost, object) svm_rmse(object, cost)\n```\n\nFor the nested resampling, a model needs to be fit for each tuning parameter and each bootstrap split. To do this, create a wrapper:\n\n```{r}\n#| label: \"inner-tune-func\"\n# `object` will be an `rsplit` object for the bootstrap samples\ntune_over_cost <- function(object) {\n tibble(cost = 2 ^ seq(-2, 8, by = 1)) %>% \n mutate(RMSE = map_dbl(cost, rmse_wrapper, object = object))\n}\n```\n\nSince this will be called across the set of outer cross-validation splits, another wrapper is required:\n\n```{r}\n#| label: \"inner-func\"\n# `object` is an `rsplit` object in `results$inner_resamples` \nsummarize_tune_results <- function(object) {\n # Return row-bound tibble that has the 25 bootstrap results\n map_df(object$splits, tune_over_cost) %>%\n # For each value of the tuning parameter, compute the \n # average RMSE which is the inner bootstrap estimate. \n group_by(cost) %>%\n summarize(mean_RMSE = mean(RMSE, na.rm = TRUE),\n n = length(RMSE),\n .groups = \"drop\")\n}\n```\n\nNow that those functions are defined, we can execute all the inner resampling loops:\n\n```{r}\n#| label: \"inner-runs\"\n#| eval: false\ntuning_results <- map(results$inner_resamples, summarize_tune_results) \n```\n\nAlternatively, since these computations can be run in parallel, we can use the furrr package. Instead of using `map()`, the function `future_map()` parallelizes the iterations using the [future package](https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html). The `multisession` plan uses the local cores to process the inner resampling loop. The end results are the same as the sequential computations.\n\n```{r}\n#| label: \"inner-runs-parallel\"\n#| warning: false\nlibrary(furrr)\nplan(multisession)\n\ntuning_results <- future_map(results$inner_resamples, summarize_tune_results) \n```\n\nThe object `tuning_results` is a list of data frames for each of the 50 outer resamples.\n\nLet's make a plot of the averaged results to see what the relationship is between the RMSE and the tuning parameters for each of the inner bootstrapping operations:\n\n```{r}\n#| label: \"rmse-plot\"\n#| fig-height: 4\n#| message: false\nlibrary(scales)\n\npooled_inner <- tuning_results %>% bind_rows\n\nbest_cost <- function(dat) dat[which.min(dat$mean_RMSE),]\n\np <- \n ggplot(pooled_inner, aes(x = cost, y = mean_RMSE)) + \n scale_x_continuous(trans = 'log2') +\n xlab(\"SVM Cost\") + ylab(\"Inner RMSE\")\n\nfor (i in 1:length(tuning_results))\n p <- p +\n geom_line(data = tuning_results[[i]], alpha = .2) +\n geom_point(data = best_cost(tuning_results[[i]]), pch = 16, alpha = 3/4)\n\np <- p + geom_smooth(data = pooled_inner, se = FALSE)\np\n```\n\nEach gray line is a separate bootstrap resampling curve created from a different 90% of the data. The blue line is a LOESS smooth of all the results pooled together.\n\nTo determine the best parameter estimate for each of the outer resampling iterations:\n\n```{r}\n#| label: \"choose\"\n#| fig-height: 4\ncost_vals <- \n tuning_results %>% \n map_df(best_cost) %>% \n select(cost)\n\nresults <- \n bind_cols(results, cost_vals) %>% \n mutate(cost = factor(cost, levels = paste(2 ^ seq(-2, 8, by = 1))))\n\nggplot(results, aes(x = cost)) + \n geom_bar() + \n xlab(\"SVM Cost\") + \n scale_x_discrete(drop = FALSE)\n```\n\nMost of the resamples produced an optimal cost value of 2.0, but the distribution is right-skewed due to the flat trend in the resampling profile once the cost value becomes 10 or larger.\n\nNow that we have these estimates, we can compute the outer resampling results for each of the `r nrow(results)` splits using the corresponding tuning parameter value:\n\n```{r}\n#| label: \"run-out-r\"\nresults <- \n results %>% \n mutate(RMSE = map2_dbl(splits, cost, svm_rmse))\n\nsummary(results$RMSE)\n```\n\nThe estimated RMSE for the model tuning process is `r round(mean(results$RMSE), 2)`.\n\nWhat is the RMSE estimate for the non-nested procedure when only the outer resampling method is used? For each cost value in the tuning grid, `r nrow(results)` SVM models are fit and their RMSE values are averaged. The table of cost values and mean RMSE estimates is used to determine the best cost value. The associated RMSE is the biased estimate.\n\n```{r}\n#| label: \"not-nested\"\n#| fig-height: 4\nnot_nested <- \n map(results$splits, tune_over_cost) %>%\n bind_rows\n\nouter_summary <- not_nested %>% \n group_by(cost) %>% \n summarize(outer_RMSE = mean(RMSE), n = length(RMSE))\n\nouter_summary\n\nggplot(outer_summary, aes(x = cost, y = outer_RMSE)) + \n geom_point() + \n geom_line() + \n scale_x_continuous(trans = 'log2') +\n xlab(\"SVM Cost\") + ylab(\"RMSE\")\n```\n\nThe non-nested procedure estimates the RMSE to be `r round(min(outer_summary$outer_RMSE), 2)`. Both estimates are fairly close.\n\nThe approximately true RMSE for an SVM model with a cost value of 2.0 can be approximated with the large sample that was simulated at the beginning.\n\n```{r}\n#| label: \"large-sample-estimate\"\nfinalModel <- ksvm(y ~ ., data = train_dat, C = 2)\nlarge_pred <- predict(finalModel, large_dat[, -ncol(large_dat)])\nsqrt(mean((large_dat$y - large_pred) ^ 2, na.rm = TRUE))\n```\n\nThe nested procedure produces a closer estimate to the approximate truth but the non-nested estimate is very similar.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Nested resampling","categories":["nested resampling","SVMs"],"type":"learn-subsection","weight":2,"description":"Estimate the best hyperparameters for a model using nested resampling.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/work/tune-svm/index.qmd.json b/.quarto/idx/learn/work/tune-svm/index.qmd.json new file mode 100644 index 00000000..90ef7dce --- /dev/null +++ b/.quarto/idx/learn/work/tune-svm/index.qmd.json @@ -0,0 +1 @@ +{"title":"Model tuning via grid search","markdown":{"yaml":{"title":"Model tuning via grid search","categories":["model tuning","SVMs"],"type":"learn-subsection","weight":1,"description":"Choose hyperparameters for a model by training on a grid of many possible parameter values.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n \n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(mlbench)\nlibrary(kernlab)\nlibrary(doMC)\nregisterDoMC(cores = parallel::detectCores())\n\npkgs <- c(\"tidymodels\", \"mlbench\", \"kernlab\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nThis article demonstrates how to tune a model using grid search. Many models have **hyperparameters** that can't be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best. \n\n## Example data\n\nTo demonstrate model tuning, we'll use the Ionosphere data in the mlbench package:\n\n```{r}\n#| label: \"load-data\"\nlibrary(tidymodels)\nlibrary(mlbench)\ndata(Ionosphere)\n```\n\nFrom `?Ionosphere`:\n\n> This radar data was collected by a system in Goose Bay, Labrador. This system consists of a phased array of 16 high-frequency antennas with a total transmitted power on the order of 6.4 kilowatts. See the paper for more details. The targets were free electrons in the ionosphere. \"good\" radar returns are those showing evidence of some type of structure in the ionosphere. \"bad\" returns are those that do not; their signals pass through the ionosphere.\n\n> Received signals were processed using an autocorrelation function whose arguments are the time of a pulse and the pulse number. There were 17 pulse numbers for the Goose Bay system. Instances in this databse are described by 2 attributes per pulse number, corresponding to the complex values returned by the function resulting from the complex electromagnetic signal. See cited below for more details.\n\nThere are 43 predictors and a factor outcome. Two of the predictors are factors (`V1` and `V2`) and the rest are numeric variables that have been scaled to a range of -1 to 1. Note that the two factor predictors have sparse distributions:\n\n```{r}\n#| label: \"factor-pred\"\ntable(Ionosphere$V1)\ntable(Ionosphere$V2)\n```\n\nThere's no point of putting `V2` into any model since is is a zero-variance predictor. `V1` is not but it _could_ be if the resampling process ends up sampling all of the same value. Is this an issue? It might be since the standard R formula infrastructure fails when there is only a single observed value:\n\n```{r}\n#| label: \"glm-fail\"\n#| error: false\n#| eval: false\nglm(Class ~ ., data = Ionosphere, family = binomial)\n\n# Surprisingly, this doesn't help: \n\nglm(Class ~ . - V2, data = Ionosphere, family = binomial)\n```\n\nLet's remove these two problematic variables:\n\n```{r}\n#| label: \"ion-rm\"\nIonosphere <- Ionosphere %>% select(-V1, -V2)\n```\n\n## Inputs for the search\n\nTo demonstrate, we'll fit a radial basis function support vector machine to these data and tune the SVM cost parameter and the $\\sigma$ parameter in the kernel function:\n\n```{r}\n#| label: \"svm-mod\"\nsvm_mod <-\n svm_rbf(cost = tune(), rbf_sigma = tune()) %>%\n set_mode(\"classification\") %>%\n set_engine(\"kernlab\")\n```\n\nIn this article, tuning will be demonstrated in two ways, using:\n\n- a standard R formula, and \n- a recipe.\n\nLet's create a simple recipe here:\n\n```{r}\n#| label: \"rec\"\niono_rec <-\n recipe(Class ~ ., data = Ionosphere) %>%\n # remove any zero variance predictors\n step_zv(all_predictors()) %>% \n # remove any linear combinations\n step_lincomb(all_numeric())\n```\n\nThe only other required item for tuning is a resampling strategy as defined by an rsample object. Let's demonstrate using basic bootstrapping:\n\n```{r}\n#| label: \"rs\"\nset.seed(4943)\niono_rs <- bootstraps(Ionosphere, times = 30)\n```\n\n## Optional inputs\n\nAn _optional_ step for model tuning is to specify which metrics should be computed using the out-of-sample predictions. For classification, the default is to calculate the log-likelihood statistic and overall accuracy. Instead of the defaults, the area under the ROC curve will be used. To do this, a yardstick package function can be used to create a metric set:\n\n```{r}\n#| label: \"roc\"\nroc_vals <- metric_set(roc_auc)\n```\n\nIf no grid or parameters are provided, a set of 10 hyperparameters are created using a space-filling design (via a Latin hypercube). A grid can be given in a data frame where the parameters are in columns and parameter combinations are in rows. Here, the default will be used.\n\nAlso, a control object can be passed that specifies different aspects of the search. Here, the verbose option is turned off and the option to save the out-of-sample predictions is turned on. \n\n```{r}\n#| label: \"ctrl\"\nctrl <- control_grid(verbose = FALSE, save_pred = TRUE)\n```\n\n## Executing with a formula\n\nFirst, we can use the formula interface:\n\n```{r}\n#| label: \"grid\"\n#| message: false\nset.seed(35)\nformula_res <-\n svm_mod %>% \n tune_grid(\n Class ~ .,\n resamples = iono_rs,\n metrics = roc_vals,\n control = ctrl\n )\nformula_res\n```\n\nThe `.metrics` column contains tibbles of the performance metrics for each tuning parameter combination:\n\n```{r}\n#| label: \"raw-metrics\"\nformula_res %>% \n select(.metrics) %>% \n slice(1) %>% \n pull(1)\n```\n\nTo get the final resampling estimates, the `collect_metrics()` function can be used on the grid object:\n\n```{r}\n#| label: \"metric-estimates\"\nestimates <- collect_metrics(formula_res)\nestimates\n```\n\nThe top combinations are:\n\n```{r}\n#| label: \"sorted-metrics\"\nshow_best(formula_res, metric = \"roc_auc\")\n```\n\n## Executing with a recipe\n\nNext, we can use the same syntax but pass a *recipe* in as the pre-processor argument:\n\n```{r}\n#| label: \"recipe\"\nset.seed(325)\nrecipe_res <-\n svm_mod %>% \n tune_grid(\n iono_rec,\n resamples = iono_rs,\n metrics = roc_vals,\n control = ctrl\n )\nrecipe_res\n```\n\nThe best setting here is:\n\n```{r}\n#| label: \"best-rec\"\nshow_best(recipe_res, metric = \"roc_auc\")\n```\n\n## Out-of-sample predictions\n\nIf we used `save_pred = TRUE` to keep the out-of-sample predictions for each resample during tuning, we can obtain those predictions, along with the tuning parameters and resample identifier, using `collect_predictions()`:\n\n```{r}\n#| label: \"rec-preds\"\ncollect_predictions(recipe_res)\n```\n\nWe can obtain the hold-out sets for all the resamples augmented with the predictions using `augment()`, which provides opportunities for flexible visualization of model results:\n\n```{r}\n#| label: \"augment-preds\"\naugment(recipe_res) %>%\n ggplot(aes(V3, .pred_good, color = Class)) +\n geom_point(show.legend = FALSE) +\n facet_wrap(~Class)\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n \n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(mlbench)\nlibrary(kernlab)\nlibrary(doMC)\nregisterDoMC(cores = parallel::detectCores())\n\npkgs <- c(\"tidymodels\", \"mlbench\", \"kernlab\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nThis article demonstrates how to tune a model using grid search. Many models have **hyperparameters** that can't be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best. \n\n## Example data\n\nTo demonstrate model tuning, we'll use the Ionosphere data in the mlbench package:\n\n```{r}\n#| label: \"load-data\"\nlibrary(tidymodels)\nlibrary(mlbench)\ndata(Ionosphere)\n```\n\nFrom `?Ionosphere`:\n\n> This radar data was collected by a system in Goose Bay, Labrador. This system consists of a phased array of 16 high-frequency antennas with a total transmitted power on the order of 6.4 kilowatts. See the paper for more details. The targets were free electrons in the ionosphere. \"good\" radar returns are those showing evidence of some type of structure in the ionosphere. \"bad\" returns are those that do not; their signals pass through the ionosphere.\n\n> Received signals were processed using an autocorrelation function whose arguments are the time of a pulse and the pulse number. There were 17 pulse numbers for the Goose Bay system. Instances in this databse are described by 2 attributes per pulse number, corresponding to the complex values returned by the function resulting from the complex electromagnetic signal. See cited below for more details.\n\nThere are 43 predictors and a factor outcome. Two of the predictors are factors (`V1` and `V2`) and the rest are numeric variables that have been scaled to a range of -1 to 1. Note that the two factor predictors have sparse distributions:\n\n```{r}\n#| label: \"factor-pred\"\ntable(Ionosphere$V1)\ntable(Ionosphere$V2)\n```\n\nThere's no point of putting `V2` into any model since is is a zero-variance predictor. `V1` is not but it _could_ be if the resampling process ends up sampling all of the same value. Is this an issue? It might be since the standard R formula infrastructure fails when there is only a single observed value:\n\n```{r}\n#| label: \"glm-fail\"\n#| error: false\n#| eval: false\nglm(Class ~ ., data = Ionosphere, family = binomial)\n\n# Surprisingly, this doesn't help: \n\nglm(Class ~ . - V2, data = Ionosphere, family = binomial)\n```\n\nLet's remove these two problematic variables:\n\n```{r}\n#| label: \"ion-rm\"\nIonosphere <- Ionosphere %>% select(-V1, -V2)\n```\n\n## Inputs for the search\n\nTo demonstrate, we'll fit a radial basis function support vector machine to these data and tune the SVM cost parameter and the $\\sigma$ parameter in the kernel function:\n\n```{r}\n#| label: \"svm-mod\"\nsvm_mod <-\n svm_rbf(cost = tune(), rbf_sigma = tune()) %>%\n set_mode(\"classification\") %>%\n set_engine(\"kernlab\")\n```\n\nIn this article, tuning will be demonstrated in two ways, using:\n\n- a standard R formula, and \n- a recipe.\n\nLet's create a simple recipe here:\n\n```{r}\n#| label: \"rec\"\niono_rec <-\n recipe(Class ~ ., data = Ionosphere) %>%\n # remove any zero variance predictors\n step_zv(all_predictors()) %>% \n # remove any linear combinations\n step_lincomb(all_numeric())\n```\n\nThe only other required item for tuning is a resampling strategy as defined by an rsample object. Let's demonstrate using basic bootstrapping:\n\n```{r}\n#| label: \"rs\"\nset.seed(4943)\niono_rs <- bootstraps(Ionosphere, times = 30)\n```\n\n## Optional inputs\n\nAn _optional_ step for model tuning is to specify which metrics should be computed using the out-of-sample predictions. For classification, the default is to calculate the log-likelihood statistic and overall accuracy. Instead of the defaults, the area under the ROC curve will be used. To do this, a yardstick package function can be used to create a metric set:\n\n```{r}\n#| label: \"roc\"\nroc_vals <- metric_set(roc_auc)\n```\n\nIf no grid or parameters are provided, a set of 10 hyperparameters are created using a space-filling design (via a Latin hypercube). A grid can be given in a data frame where the parameters are in columns and parameter combinations are in rows. Here, the default will be used.\n\nAlso, a control object can be passed that specifies different aspects of the search. Here, the verbose option is turned off and the option to save the out-of-sample predictions is turned on. \n\n```{r}\n#| label: \"ctrl\"\nctrl <- control_grid(verbose = FALSE, save_pred = TRUE)\n```\n\n## Executing with a formula\n\nFirst, we can use the formula interface:\n\n```{r}\n#| label: \"grid\"\n#| message: false\nset.seed(35)\nformula_res <-\n svm_mod %>% \n tune_grid(\n Class ~ .,\n resamples = iono_rs,\n metrics = roc_vals,\n control = ctrl\n )\nformula_res\n```\n\nThe `.metrics` column contains tibbles of the performance metrics for each tuning parameter combination:\n\n```{r}\n#| label: \"raw-metrics\"\nformula_res %>% \n select(.metrics) %>% \n slice(1) %>% \n pull(1)\n```\n\nTo get the final resampling estimates, the `collect_metrics()` function can be used on the grid object:\n\n```{r}\n#| label: \"metric-estimates\"\nestimates <- collect_metrics(formula_res)\nestimates\n```\n\nThe top combinations are:\n\n```{r}\n#| label: \"sorted-metrics\"\nshow_best(formula_res, metric = \"roc_auc\")\n```\n\n## Executing with a recipe\n\nNext, we can use the same syntax but pass a *recipe* in as the pre-processor argument:\n\n```{r}\n#| label: \"recipe\"\nset.seed(325)\nrecipe_res <-\n svm_mod %>% \n tune_grid(\n iono_rec,\n resamples = iono_rs,\n metrics = roc_vals,\n control = ctrl\n )\nrecipe_res\n```\n\nThe best setting here is:\n\n```{r}\n#| label: \"best-rec\"\nshow_best(recipe_res, metric = \"roc_auc\")\n```\n\n## Out-of-sample predictions\n\nIf we used `save_pred = TRUE` to keep the out-of-sample predictions for each resample during tuning, we can obtain those predictions, along with the tuning parameters and resample identifier, using `collect_predictions()`:\n\n```{r}\n#| label: \"rec-preds\"\ncollect_predictions(recipe_res)\n```\n\nWe can obtain the hold-out sets for all the resamples augmented with the predictions using `augment()`, which provides opportunities for flexible visualization of model results:\n\n```{r}\n#| label: \"augment-preds\"\naugment(recipe_res) %>%\n ggplot(aes(V3, .pred_good, color = Class)) +\n geom_point(show.legend = FALSE) +\n facet_wrap(~Class)\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Model tuning via grid search","categories":["model tuning","SVMs"],"type":"learn-subsection","weight":1,"description":"Choose hyperparameters for a model by training on a grid of many possible parameter values.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/learn/work/tune-text/index.qmd.json b/.quarto/idx/learn/work/tune-text/index.qmd.json new file mode 100644 index 00000000..5e22b76c --- /dev/null +++ b/.quarto/idx/learn/work/tune-text/index.qmd.json @@ -0,0 +1 @@ +{"title":"Tuning text models","markdown":{"yaml":{"title":"Tuning text models","categories":["model tuning","text analysis","logistic regression","Bayesian optimization","extracting results"],"type":"learn-subsection","weight":4,"description":"Prepare text data for predictive modeling and tune with both grid and iterative search.\n","toc":true,"toc-depth":2,"include-after-body":"../../../resources.html"},"headingText":"Introduction","containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(stopwords)\nlibrary(doMC)\nregisterDoMC(cores = parallel::detectCores())\n\npkgs <- c(\"tidymodels\", \"textrecipes\", \"textfeatures\", \"stopwords\")\n\ntheme_set(theme_bw())\n```\n\n\n`r article_req_pkgs(pkgs)`\n\nThis article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on.\n\n\n## Text as data\n\nThe text data we'll use in this article are from Amazon: \n\n> This dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review.\n\nThis article uses a small subset of the total reviews [available at the original source](https://snap.stanford.edu/data/web-FineFoods.html). We sampled a single review from 5,000 random products and allocated 80% of these data to the training set, with the remaining 1,000 reviews held out for the test set. \n\nThere is a column for the product, a column for the text of the review, and a factor column for the outcome variable. The outcome is whether the reviewer gave the product a five-star rating or not.\n\n```{r}\n#| label: \"data\"\nlibrary(tidymodels)\n\ndata(\"small_fine_foods\")\ntraining_data\n```\n\nOur modeling goal is to create modeling features from the text of the reviews to predict whether the review was five-star or not.\n\n## Inputs for the search\n\nText, perhaps more so than tabular data we often deal with, must be heavily processed to be used as predictor data for modeling. There are multiple ways to process and prepare text for modeling; let's add several steps together to create different kinds of features:\n\n* Create an initial set of count-based features, such as the number of words, spaces, lower- or uppercase characters, URLs, and so on; we can use the [textfeatures](https://github.com/mkearney/textfeatures) package for this.\n\n* [Tokenize](https://smltar.com/tokenization.html) the text (i.e. break the text into smaller components such as words).\n\n* Remove stop words such as \"the\", \"an\", \"of\", etc.\n\n* [Stem](https://smltar.com/stemming.html) tokens to a common root where possible.\n\n* Convert tokens to dummy variables via a [signed, binary hash function](https://bookdown.org/max/FES/encoding-predictors-with-many-categories.html).\n\n* Optionally transform non-token features (the count-based features like number of lowercase characters) to a more symmetric state using a [Yeo-Johnson transformation](https://bookdown.org/max/FES/numeric-one-to-one.html).\n\n* Remove predictors with a single distinct value.\n\n* Center and scale all predictors. \n\n\n::: {.callout-note}\n We will end up with two kinds of features:\n\n- dummy/indicator variables for the count-based features like number of digits or punctuation characters \n- hash features for the tokens like \"salsa\" or \"delicious\". \n:::\n\nSome of these preprocessing steps (such as stemming) may or may not be good ideas but a full discussion of their effects is beyond the scope of this article. In this preprocessing approach, the main tuning parameter is the number of hashing features to use. \n\nBefore we start building our preprocessing recipe, we need some helper objects. For example, for the Yeo-Johnson transformation, we need to know the set of count-based text features: \n\n```{r}\n#| label: \"feat-list\"\nlibrary(textfeatures)\n\nbasics <- names(textfeatures:::count_functions)\nhead(basics)\n```\n\nAlso, the implementation of feature hashes does not produce the binary values we need. This small function will help convert the scores to values of -1, 0, or 1:\n\n```{r}\n#| label: \"hash-func\"\nbinary_hash <- function(x) {\n x <- ifelse(x < 0, -1, x)\n x <- ifelse(x > 0, 1, x)\n x\n}\n```\n\nNow, let's put this all together in one recipe:\n\n```{r}\n#| label: \"text-rec\"\nlibrary(textrecipes)\n\npre_proc <-\n recipe(score ~ product + review, data = training_data) %>%\n # Do not use the product ID as a predictor\n update_role(product, new_role = \"id\") %>%\n # Make a copy of the raw text\n step_mutate(review_raw = review) %>%\n # Compute the initial features. This removes the `review_raw` column\n step_textfeature(review_raw) %>%\n # Make the feature names shorter\n step_rename_at(\n starts_with(\"textfeature_\"),\n fn = ~ gsub(\"textfeature_review_raw_\", \"\", .)\n ) %>%\n step_tokenize(review) %>%\n step_stopwords(review) %>%\n step_stem(review) %>%\n # Here is where the tuning parameter is declared\n step_texthash(review, signed = TRUE, num_terms = tune()) %>%\n # Simplify these names\n step_rename_at(starts_with(\"review_hash\"), fn = ~ gsub(\"review_\", \"\", .)) %>%\n # Convert the features from counts to values of -1, 0, or 1\n step_mutate_at(starts_with(\"hash\"), fn = binary_hash) %>%\n # Transform the initial feature set\n step_YeoJohnson(one_of(!!basics)) %>%\n step_zv(all_predictors()) %>%\n step_normalize(all_predictors())\n```\n\n::: {.callout-warning}\n Note that, when objects from the global environment are used, they are injected into the step objects via `!!`. For some parallel processing technologies, these objects may not be found by the worker processes. \n:::\n\nThe preprocessing recipe is long and complex (often typical for working with text data) but the model we'll use is more straightforward. Let's stick with a regularized logistic regression model: \n\n```{r}\n#| label: \"lr\"\nlr_mod <-\n logistic_reg(penalty = tune(), mixture = tune()) %>%\n set_engine(\"glmnet\")\n```\n\nThere are three tuning parameters for this data analysis:\n\n- `num_terms`, the number of feature hash variables to create\n- `penalty`, the amount of regularization for the model\n- `mixture`, the proportion of L1 regularization\n\n## Resampling\n\nThere are enough data here so that 10-fold resampling would hold out 400 reviews at a time to estimate performance. Performance estimates using this many observations have sufficiently low noise to measure and tune models. \n\n```{r}\n#| label: \"folds\"\nset.seed(8935)\nfolds <- vfold_cv(training_data)\nfolds\n```\n\n## Grid search\n\nLet's begin our tuning with [grid search](https://www.tidymodels.org/learn/work/tune-svm/) and a regular grid. For glmnet models, evaluating penalty values is fairly cheap because of the use of the [\"submodel-trick\"](https://tune.tidymodels.org/articles/extras/optimizations.html#sub-model-speed-ups-1). The grid will use 20 penalty values, 5 mixture values, and 3 values for the number of hash features. \n\n```{r}\n#| label: \"grid\"\nfive_star_grid <- \n crossing(\n penalty = 10^seq(-3, 0, length = 20),\n mixture = c(0.01, 0.25, 0.50, 0.75, 1),\n num_terms = 2^c(8, 10, 12)\n )\nfive_star_grid\n```\n\nNote that, for each resample, the (computationally expensive) text preprocessing recipe is only prepped 6 times. This increases the efficiency of the analysis by avoiding redundant work. \n\nLet's save information on the number of predictors by penalty value for each glmnet model. This can help us understand how many features were used across the penalty values. Use an extraction function to do this:\n\n```{r}\n#| label: \"extract\"\nglmnet_vars <- function(x) {\n # `x` will be a workflow object\n mod <- extract_model(x)\n # `df` is the number of model terms for each penalty value\n tibble(penalty = mod$lambda, num_vars = mod$df)\n}\n\nctrl <- control_grid(extract = glmnet_vars, verbose = TRUE)\n```\n\nFinally, let's run the grid search:\n\n```{r}\n#| label: \"grid-search\"\n#| message: false\nroc_scores <- metric_set(roc_auc)\n\nset.seed(1559)\nfive_star_glmnet <- \n tune_grid(\n lr_mod, \n pre_proc, \n resamples = folds, \n grid = five_star_grid, \n metrics = roc_scores, \n control = ctrl\n )\n\nfive_star_glmnet\n```\n\nThis took a while to complete! What do the results look like? Let's get the resampling estimates of the area under the ROC curve for each tuning parameter:\n\n```{r}\n#| label: \"grid-roc\"\ngrid_roc <- \n collect_metrics(five_star_glmnet) %>% \n arrange(desc(mean))\ngrid_roc\n```\n\nThe best results have a fairly high penalty value and focus on the ridge penalty (i.e. no feature selection via the lasso's L1 penalty). The best solutions also use the largest number of hashing features. \n\nWhat is the relationship between performance and the tuning parameters? \n\n```{r}\n#| label: \"grid-plot\"\n#| fig-width: 10\nautoplot(five_star_glmnet, metric = \"roc_auc\")\n```\n\n- We can definitely see that performance improves with the number of features included. In this article, we've used a small sample of the overall data set available. When more data are used, an even larger feature set is optimal. \n\n- The profiles with larger mixture values (greater than 0.01) have steep drop-offs in performance. What's that about? Those are cases where the lasso penalty is removing too many (and perhaps all) features from the model. \n- The panel with at least 4096 features shows that there are several parameter combinations that have about the same performance; there isn't much difference between the best performance for the different mixture values. A case could be made that we should choose a _larger_ mixture value and a _smaller_ penalty to select a simpler model that contains fewer predictors. \n\n- If more experimentation were conducted, a larger set of features (more than 4096) should also be considered. \n\nWe'll come back to the extracted glmnet components at the end of this article. \n\n## Directed search\n\nWhat if we had started with Bayesian optimization? Would a good set of conditions have been found more efficiently? \n\nLet's pretend that we haven't seen the grid search results. We'll initialize the Gaussian process model with five tuning parameter combinations chosen with a space-filling design. \n\nIt might be good to use a custom `dials` object for the number of hash terms. The default object, `num_terms()`, uses a linear range and tries to set the upper bound of the parameter using the data. Instead, let's create a parameter set, change the scale to be `log2`, and define the same range as was used in grid search. \n\n```{r}\n#| label: \"hash-range\"\nhash_range <- num_terms(c(8, 12), trans = log2_trans())\nhash_range\n```\n\nTo use this, we have to merge the recipe and `parsnip` model object into a workflow:\n\n```{r}\n#| label: \"wflow\"\nfive_star_wflow <-\n workflow() %>%\n add_recipe(pre_proc) %>%\n add_model(lr_mod)\n```\n\nThen we can extract and manipulate the corresponding parameter set:\n\n```{r}\n#| label: \"search-set\"\nfive_star_set <-\n five_star_wflow %>%\n parameters() %>%\n update(\n num_terms = hash_range, \n penalty = penalty(c(-3, 0)),\n mixture = mixture(c(0.05, 1.00))\n )\n```\n\nThis is passed to the search function via the `param_info` argument. \n\nThe initial rounds of search can be biased more towards exploration of the parameter space (as opposed to staying near the current best results). If expected improvement is used as the acquisition function, the trade-off value can be slowly moved from exploration to exploitation over iterations (see the tune vignette on [acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html) for more details). The tune package has a built-in function called `expo_decay()` that can help accomplish this:\n\n```{r}\n#| label: \"decay\"\ntrade_off_decay <- function(iter) {\n expo_decay(iter, start_val = .01, limit_val = 0, slope = 1/4)\n}\n```\n\nUsing these values, let's run the search:\n\n```{r}\n#| label: \"search\"\nset.seed(12)\nfive_star_search <-\n tune_bayes(\n five_star_wflow, \n resamples = folds,\n param_info = five_star_set,\n initial = 5,\n iter = 30,\n metrics = roc_scores,\n objective = exp_improve(trade_off_decay),\n control = control_bayes(verbose_iter = TRUE)\n )\n\nfive_star_search\n```\n\nThese results show some improvement over the initial set. One issue is that so many settings are sub-optimal (as shown in the plot above for grid search) so there are poor results periodically. There are regions where the penalty parameter becomes too large and all of the predictors are removed from the model. These regions are also dependent on the number of terms. There is a fairly narrow ridge (sorry, pun intended!) where good performance can be achieved. Using more iterations would probably result in the search finding better results. \nLet's look at a plot of model performance versus the search iterations:\n\n```{r}\n#| label: \"iter-plot\"\nautoplot(five_star_search, type = \"performance\")\n```\n\n::: {.callout-note}\nWhat would we do if we knew about the grid search results and wanted to try directed, iterative search? We would restrict the range for the number of hash features to be larger (especially with more data). We might also restrict the penalty and mixture parameters to have a lower upper bound. \n:::\n\n## Extracted results\n\nLet's return to the grid search results and examine the results of our `extract` function. For each _fitted model_, a tibble was saved that contains the relationship between the number of predictors and the penalty value. Let's look at these results for the best model:\n\n```{r}\n#| label: \"best-res\"\nparams <- select_best(five_star_glmnet, metric = \"roc_auc\")\nparams\n```\n\nRecall that we saved the glmnet results in a tibble. The column `five_star_glmnet$.extracts` is a list of tibbles. As an example, the first element of the list is:\n\n```{r}\n#| label: \"first-elem\"\nfive_star_glmnet$.extracts[[1]]\n```\n\nMore nested tibbles! Let's `unnest()` the `five_star_glmnet$.extracts` column:\n\n```{r}\n#| label: \"unnest\"\nlibrary(tidyr)\nextracted <- \n five_star_glmnet %>% \n dplyr::select(id, .extracts) %>% \n unnest(cols = .extracts)\nextracted\n```\n\nOne thing to realize here is that `tune_grid()` [may not fit all of the models](https://tune.tidymodels.org/articles/extras/optimizations.html) that are evaluated. In this case, for each value of `mixture` and `num_terms`, the model is fit over _all_ penalty values (this is a feature of this particular model and is not generally true for other engines). To select the best parameter set, we can exclude the `penalty` column in `extracted`:\n\n\n```{r}\n#| label: \"select-best\"\nextracted <- \n extracted %>% \n dplyr::select(-penalty) %>% \n inner_join(params, by = c(\"num_terms\", \"mixture\")) %>% \n # Now remove it from the final results\n dplyr::select(-penalty)\nextracted\n```\n\nNow we can get at the results that we want using another `unnest()`:\n\n```{r}\n#| label: \"final-unnest\"\nextracted <- \n extracted %>% \n unnest(col = .extracts) # <- these contain a `penalty` column\nextracted\n```\n\nLet's look at a plot of these results (per resample):\n\n```{r}\n#| label: \"var-plot\"\nggplot(extracted, aes(x = penalty, y = num_vars)) + \n geom_line(aes(group = id, col = id), alpha = .5) + \n ylab(\"Number of retained predictors\") + \n scale_x_log10() + \n ggtitle(paste(\"mixture = \", params$mixture, \"and\", params$num_terms, \"features\")) + \n theme(legend.position = \"none\")\n```\n\nThese results might help guide the choice of the `penalty` range if more optimization was conducted. \n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\nlibrary(tidymodels)\nlibrary(stopwords)\nlibrary(doMC)\nregisterDoMC(cores = parallel::detectCores())\n\npkgs <- c(\"tidymodels\", \"textrecipes\", \"textfeatures\", \"stopwords\")\n\ntheme_set(theme_bw())\n```\n\n## Introduction\n\n`r article_req_pkgs(pkgs)`\n\nThis article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on.\n\n\n## Text as data\n\nThe text data we'll use in this article are from Amazon: \n\n> This dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review.\n\nThis article uses a small subset of the total reviews [available at the original source](https://snap.stanford.edu/data/web-FineFoods.html). We sampled a single review from 5,000 random products and allocated 80% of these data to the training set, with the remaining 1,000 reviews held out for the test set. \n\nThere is a column for the product, a column for the text of the review, and a factor column for the outcome variable. The outcome is whether the reviewer gave the product a five-star rating or not.\n\n```{r}\n#| label: \"data\"\nlibrary(tidymodels)\n\ndata(\"small_fine_foods\")\ntraining_data\n```\n\nOur modeling goal is to create modeling features from the text of the reviews to predict whether the review was five-star or not.\n\n## Inputs for the search\n\nText, perhaps more so than tabular data we often deal with, must be heavily processed to be used as predictor data for modeling. There are multiple ways to process and prepare text for modeling; let's add several steps together to create different kinds of features:\n\n* Create an initial set of count-based features, such as the number of words, spaces, lower- or uppercase characters, URLs, and so on; we can use the [textfeatures](https://github.com/mkearney/textfeatures) package for this.\n\n* [Tokenize](https://smltar.com/tokenization.html) the text (i.e. break the text into smaller components such as words).\n\n* Remove stop words such as \"the\", \"an\", \"of\", etc.\n\n* [Stem](https://smltar.com/stemming.html) tokens to a common root where possible.\n\n* Convert tokens to dummy variables via a [signed, binary hash function](https://bookdown.org/max/FES/encoding-predictors-with-many-categories.html).\n\n* Optionally transform non-token features (the count-based features like number of lowercase characters) to a more symmetric state using a [Yeo-Johnson transformation](https://bookdown.org/max/FES/numeric-one-to-one.html).\n\n* Remove predictors with a single distinct value.\n\n* Center and scale all predictors. \n\n\n::: {.callout-note}\n We will end up with two kinds of features:\n\n- dummy/indicator variables for the count-based features like number of digits or punctuation characters \n- hash features for the tokens like \"salsa\" or \"delicious\". \n:::\n\nSome of these preprocessing steps (such as stemming) may or may not be good ideas but a full discussion of their effects is beyond the scope of this article. In this preprocessing approach, the main tuning parameter is the number of hashing features to use. \n\nBefore we start building our preprocessing recipe, we need some helper objects. For example, for the Yeo-Johnson transformation, we need to know the set of count-based text features: \n\n```{r}\n#| label: \"feat-list\"\nlibrary(textfeatures)\n\nbasics <- names(textfeatures:::count_functions)\nhead(basics)\n```\n\nAlso, the implementation of feature hashes does not produce the binary values we need. This small function will help convert the scores to values of -1, 0, or 1:\n\n```{r}\n#| label: \"hash-func\"\nbinary_hash <- function(x) {\n x <- ifelse(x < 0, -1, x)\n x <- ifelse(x > 0, 1, x)\n x\n}\n```\n\nNow, let's put this all together in one recipe:\n\n```{r}\n#| label: \"text-rec\"\nlibrary(textrecipes)\n\npre_proc <-\n recipe(score ~ product + review, data = training_data) %>%\n # Do not use the product ID as a predictor\n update_role(product, new_role = \"id\") %>%\n # Make a copy of the raw text\n step_mutate(review_raw = review) %>%\n # Compute the initial features. This removes the `review_raw` column\n step_textfeature(review_raw) %>%\n # Make the feature names shorter\n step_rename_at(\n starts_with(\"textfeature_\"),\n fn = ~ gsub(\"textfeature_review_raw_\", \"\", .)\n ) %>%\n step_tokenize(review) %>%\n step_stopwords(review) %>%\n step_stem(review) %>%\n # Here is where the tuning parameter is declared\n step_texthash(review, signed = TRUE, num_terms = tune()) %>%\n # Simplify these names\n step_rename_at(starts_with(\"review_hash\"), fn = ~ gsub(\"review_\", \"\", .)) %>%\n # Convert the features from counts to values of -1, 0, or 1\n step_mutate_at(starts_with(\"hash\"), fn = binary_hash) %>%\n # Transform the initial feature set\n step_YeoJohnson(one_of(!!basics)) %>%\n step_zv(all_predictors()) %>%\n step_normalize(all_predictors())\n```\n\n::: {.callout-warning}\n Note that, when objects from the global environment are used, they are injected into the step objects via `!!`. For some parallel processing technologies, these objects may not be found by the worker processes. \n:::\n\nThe preprocessing recipe is long and complex (often typical for working with text data) but the model we'll use is more straightforward. Let's stick with a regularized logistic regression model: \n\n```{r}\n#| label: \"lr\"\nlr_mod <-\n logistic_reg(penalty = tune(), mixture = tune()) %>%\n set_engine(\"glmnet\")\n```\n\nThere are three tuning parameters for this data analysis:\n\n- `num_terms`, the number of feature hash variables to create\n- `penalty`, the amount of regularization for the model\n- `mixture`, the proportion of L1 regularization\n\n## Resampling\n\nThere are enough data here so that 10-fold resampling would hold out 400 reviews at a time to estimate performance. Performance estimates using this many observations have sufficiently low noise to measure and tune models. \n\n```{r}\n#| label: \"folds\"\nset.seed(8935)\nfolds <- vfold_cv(training_data)\nfolds\n```\n\n## Grid search\n\nLet's begin our tuning with [grid search](https://www.tidymodels.org/learn/work/tune-svm/) and a regular grid. For glmnet models, evaluating penalty values is fairly cheap because of the use of the [\"submodel-trick\"](https://tune.tidymodels.org/articles/extras/optimizations.html#sub-model-speed-ups-1). The grid will use 20 penalty values, 5 mixture values, and 3 values for the number of hash features. \n\n```{r}\n#| label: \"grid\"\nfive_star_grid <- \n crossing(\n penalty = 10^seq(-3, 0, length = 20),\n mixture = c(0.01, 0.25, 0.50, 0.75, 1),\n num_terms = 2^c(8, 10, 12)\n )\nfive_star_grid\n```\n\nNote that, for each resample, the (computationally expensive) text preprocessing recipe is only prepped 6 times. This increases the efficiency of the analysis by avoiding redundant work. \n\nLet's save information on the number of predictors by penalty value for each glmnet model. This can help us understand how many features were used across the penalty values. Use an extraction function to do this:\n\n```{r}\n#| label: \"extract\"\nglmnet_vars <- function(x) {\n # `x` will be a workflow object\n mod <- extract_model(x)\n # `df` is the number of model terms for each penalty value\n tibble(penalty = mod$lambda, num_vars = mod$df)\n}\n\nctrl <- control_grid(extract = glmnet_vars, verbose = TRUE)\n```\n\nFinally, let's run the grid search:\n\n```{r}\n#| label: \"grid-search\"\n#| message: false\nroc_scores <- metric_set(roc_auc)\n\nset.seed(1559)\nfive_star_glmnet <- \n tune_grid(\n lr_mod, \n pre_proc, \n resamples = folds, \n grid = five_star_grid, \n metrics = roc_scores, \n control = ctrl\n )\n\nfive_star_glmnet\n```\n\nThis took a while to complete! What do the results look like? Let's get the resampling estimates of the area under the ROC curve for each tuning parameter:\n\n```{r}\n#| label: \"grid-roc\"\ngrid_roc <- \n collect_metrics(five_star_glmnet) %>% \n arrange(desc(mean))\ngrid_roc\n```\n\nThe best results have a fairly high penalty value and focus on the ridge penalty (i.e. no feature selection via the lasso's L1 penalty). The best solutions also use the largest number of hashing features. \n\nWhat is the relationship between performance and the tuning parameters? \n\n```{r}\n#| label: \"grid-plot\"\n#| fig-width: 10\nautoplot(five_star_glmnet, metric = \"roc_auc\")\n```\n\n- We can definitely see that performance improves with the number of features included. In this article, we've used a small sample of the overall data set available. When more data are used, an even larger feature set is optimal. \n\n- The profiles with larger mixture values (greater than 0.01) have steep drop-offs in performance. What's that about? Those are cases where the lasso penalty is removing too many (and perhaps all) features from the model. \n- The panel with at least 4096 features shows that there are several parameter combinations that have about the same performance; there isn't much difference between the best performance for the different mixture values. A case could be made that we should choose a _larger_ mixture value and a _smaller_ penalty to select a simpler model that contains fewer predictors. \n\n- If more experimentation were conducted, a larger set of features (more than 4096) should also be considered. \n\nWe'll come back to the extracted glmnet components at the end of this article. \n\n## Directed search\n\nWhat if we had started with Bayesian optimization? Would a good set of conditions have been found more efficiently? \n\nLet's pretend that we haven't seen the grid search results. We'll initialize the Gaussian process model with five tuning parameter combinations chosen with a space-filling design. \n\nIt might be good to use a custom `dials` object for the number of hash terms. The default object, `num_terms()`, uses a linear range and tries to set the upper bound of the parameter using the data. Instead, let's create a parameter set, change the scale to be `log2`, and define the same range as was used in grid search. \n\n```{r}\n#| label: \"hash-range\"\nhash_range <- num_terms(c(8, 12), trans = log2_trans())\nhash_range\n```\n\nTo use this, we have to merge the recipe and `parsnip` model object into a workflow:\n\n```{r}\n#| label: \"wflow\"\nfive_star_wflow <-\n workflow() %>%\n add_recipe(pre_proc) %>%\n add_model(lr_mod)\n```\n\nThen we can extract and manipulate the corresponding parameter set:\n\n```{r}\n#| label: \"search-set\"\nfive_star_set <-\n five_star_wflow %>%\n parameters() %>%\n update(\n num_terms = hash_range, \n penalty = penalty(c(-3, 0)),\n mixture = mixture(c(0.05, 1.00))\n )\n```\n\nThis is passed to the search function via the `param_info` argument. \n\nThe initial rounds of search can be biased more towards exploration of the parameter space (as opposed to staying near the current best results). If expected improvement is used as the acquisition function, the trade-off value can be slowly moved from exploration to exploitation over iterations (see the tune vignette on [acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html) for more details). The tune package has a built-in function called `expo_decay()` that can help accomplish this:\n\n```{r}\n#| label: \"decay\"\ntrade_off_decay <- function(iter) {\n expo_decay(iter, start_val = .01, limit_val = 0, slope = 1/4)\n}\n```\n\nUsing these values, let's run the search:\n\n```{r}\n#| label: \"search\"\nset.seed(12)\nfive_star_search <-\n tune_bayes(\n five_star_wflow, \n resamples = folds,\n param_info = five_star_set,\n initial = 5,\n iter = 30,\n metrics = roc_scores,\n objective = exp_improve(trade_off_decay),\n control = control_bayes(verbose_iter = TRUE)\n )\n\nfive_star_search\n```\n\nThese results show some improvement over the initial set. One issue is that so many settings are sub-optimal (as shown in the plot above for grid search) so there are poor results periodically. There are regions where the penalty parameter becomes too large and all of the predictors are removed from the model. These regions are also dependent on the number of terms. There is a fairly narrow ridge (sorry, pun intended!) where good performance can be achieved. Using more iterations would probably result in the search finding better results. \nLet's look at a plot of model performance versus the search iterations:\n\n```{r}\n#| label: \"iter-plot\"\nautoplot(five_star_search, type = \"performance\")\n```\n\n::: {.callout-note}\nWhat would we do if we knew about the grid search results and wanted to try directed, iterative search? We would restrict the range for the number of hash features to be larger (especially with more data). We might also restrict the penalty and mixture parameters to have a lower upper bound. \n:::\n\n## Extracted results\n\nLet's return to the grid search results and examine the results of our `extract` function. For each _fitted model_, a tibble was saved that contains the relationship between the number of predictors and the penalty value. Let's look at these results for the best model:\n\n```{r}\n#| label: \"best-res\"\nparams <- select_best(five_star_glmnet, metric = \"roc_auc\")\nparams\n```\n\nRecall that we saved the glmnet results in a tibble. The column `five_star_glmnet$.extracts` is a list of tibbles. As an example, the first element of the list is:\n\n```{r}\n#| label: \"first-elem\"\nfive_star_glmnet$.extracts[[1]]\n```\n\nMore nested tibbles! Let's `unnest()` the `five_star_glmnet$.extracts` column:\n\n```{r}\n#| label: \"unnest\"\nlibrary(tidyr)\nextracted <- \n five_star_glmnet %>% \n dplyr::select(id, .extracts) %>% \n unnest(cols = .extracts)\nextracted\n```\n\nOne thing to realize here is that `tune_grid()` [may not fit all of the models](https://tune.tidymodels.org/articles/extras/optimizations.html) that are evaluated. In this case, for each value of `mixture` and `num_terms`, the model is fit over _all_ penalty values (this is a feature of this particular model and is not generally true for other engines). To select the best parameter set, we can exclude the `penalty` column in `extracted`:\n\n\n```{r}\n#| label: \"select-best\"\nextracted <- \n extracted %>% \n dplyr::select(-penalty) %>% \n inner_join(params, by = c(\"num_terms\", \"mixture\")) %>% \n # Now remove it from the final results\n dplyr::select(-penalty)\nextracted\n```\n\nNow we can get at the results that we want using another `unnest()`:\n\n```{r}\n#| label: \"final-unnest\"\nextracted <- \n extracted %>% \n unnest(col = .extracts) # <- these contain a `penalty` column\nextracted\n```\n\nLet's look at a plot of these results (per resample):\n\n```{r}\n#| label: \"var-plot\"\nggplot(extracted, aes(x = penalty, y = num_vars)) + \n geom_line(aes(group = id, col = id), alpha = .5) + \n ylab(\"Number of retained predictors\") + \n scale_x_log10() + \n ggtitle(paste(\"mixture = \", params$mixture, \"and\", params$num_terms, \"features\")) + \n theme(legend.position = \"none\")\n```\n\nThese results might help guide the choice of the `penalty` range if more optimization was conducted. \n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"include-after-body":["../../../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../../styles.scss","../../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Tuning text models","categories":["model tuning","text analysis","logistic regression","Bayesian optimization","extracting results"],"type":"learn-subsection","weight":4,"description":"Prepare text data for predictive modeling and tune with both grid and iterative search.\n"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/packages/index.qmd.json b/.quarto/idx/packages/index.qmd.json new file mode 100644 index 00000000..f090f5f8 --- /dev/null +++ b/.quarto/idx/packages/index.qmd.json @@ -0,0 +1 @@ +{"title":"Tidymodels packages","markdown":{"yaml":{"title":"Tidymodels packages","toc":true,"toc-depth":1,"include-after-body":"../resources.html"},"headingText":"Installation and use","containsRefs":false,"markdown":"\n\n\n* Install many of the packages in the tidymodels ecosystem by running `install.packages(\"tidymodels\")`.\n\n* Run `library(tidymodels)` to load the core packages and make them available in your current R session.\n\n
\n\n\n
\n
\n\"\"\n
\n

tidymodels

\n

tidymodels is a meta-package that installs and load the core packages listed below that you need for modeling and machine learning.\nGo to package ...

\n
\n
\n
\n\"\"\n
\n

rsample

\n

rsample provides infrastructure for efficient data splitting and resampling.Go to package ...

\n
\n
\n
\n\"\"\n
\n

parsnip

\n

parsnip is a tidy, unified interface to models that can be used to try a range of models without getting bogged down in the syntactical minutiae of the underlying packages.Go to package ...

\n
\n
\n
\n\"\"\n
\n

recipes

\n

recipes is a tidy interface to data pre-processing tools for feature engineering.Go to package ...

\n
\n
\n
\n\"\"\n
\n

workflows

\n

workflows bundle your pre-processing, modeling, and post-processing together.Go to package ...

\n
\n
\n
\n\"\"\n
\n

tune

\n

tune helps you optimize the hyperparameters of your model and pre-processing steps.Go to package ...

\n
\n
\n
\n\"\"\n
\n

yardstick

\n

yardstick measures the effectiveness of models using performance metrics.Go to package ...

\n
\n
\n
\n\"\"\n
\n

broom

\n

broom converts the information in common statistical R objects into user-friendly, predictable formats. \nGo to package ...

\n
\n
\n
\n\"\"\n
\n

dials

\n

dials creates and manages tuning parameters and parameter grids. \nGo to package ...

\n
\n
\n
\n
\n\nLearn more about the tidymodels metapackage itself at .\n\n## Specialized packages\n\nThe tidymodels framework also includes many other packages designed for specialized data analysis and modeling tasks. They are not loaded automatically with `library(tidymodels)`, so you'll need to load each one with its own call to `library()`. These packages include: \n\n### [Perform statistical analysis](/learn/statistics/)\n\n* [infer](https://infer.tidymodels.org/) is a high-level API for tidyverse-friendly statistical inference.\n\n* The [corrr](https://corrr.tidymodels.org/) package has tidy interfaces for working with correlation matrices.\n\n### [Create robust models](/learn/models/)\n\n* The [spatialsample](http://spatialsample.tidymodels.org/) package provides resampling functions and classes like rsample, but specialized for spatial data.\n\n* parsnip also has additional packages that contain more model definitions. [discrim](https://discrim.tidymodels.org/) contains definitions for discriminant analysis models, [poissonreg](https://poissonreg.tidymodels.org/) provides definitions for Poisson regression models, [plsmod](https://plsmod.tidymodels.org/) enables linear projection models, and [rules](https://rules.tidymodels.org/) does the same for rule-based classification and regression models. [baguette](https://baguette.tidymodels.org/) creates ensemble models via bagging, and [multilevelmod](https://multilevelmod.tidymodels.org/) provides support for multilevel models (otherwise known as mixed models or hierarchical models). \n\n* There are several add-on packages for creating recipes. [embed](https://embed.tidymodels.org/) contains steps to create embeddings or projections of predictors. [textrecipes](https://textrecipes.tidymodels.org/) has extra steps for text processing, and [themis](https://themis.tidymodels.org/) can help alleviate class imbalance using sampling methods. \n\n* [tidypredict](https://tidypredict.tidymodels.org/) and [modeldb](https://modeldb.tidymodels.org/) can convert prediction equations to different languages (e.g. SQL) and fit some models in-database. \n\n### [Tune, compare, and work with your models](/learn/work/)\n\n* To try out multiple different workflows (i.e. bundles of pre-processor and model) at once, [workflowsets](https://workflowsets.tidymodels.org/) lets you create sets of workflow objects for tuning and resampling.\n\n* To integrate predictions from many models, the [stacks](https://stacks.tidymodels.org/) package provides tools for stacked ensemble modeling.\n\n* The [finetune](https://finetune.tidymodels.org/) package extends the tune package with more approaches such as racing and simulated annealing.\n\n* The [usemodels](https://usemodels.tidymodels.org/) package creates templates and automatically generates code to fit and tune models.\n\n* [probably](https://probably.tidymodels.org/) has tools for post-processing class probability estimates.\n\n* The [tidyposterior](https://tidyposterior.tidymodels.org/) package enables users to make formal statistical comparisons between models using resampling and Bayesian methods. \n\n* Some R objects become inconveniently large when saved to disk. The [butcher](https://butcher.tidymodels.org/) package can reduce the size of those objects by removing the sub-components. \n\n* To know whether the data that you are predicting are _extrapolations_ from the training set, [applicable](https://applicable.tidymodels.org/) can produce metrics that measure extrapolation. \n\n* [shinymodels](https://shinymodels.tidymodels.org/) lets you explore tuning or resampling results via a Shiny app.\n\n### [Develop custom modeling tools](/learn/develop/)\n\n* [hardhat](https://hardhat.tidymodels.org/) is a _developer-focused_ package that helps beginners create high-quality R packages for modeling. \n","srcMarkdownNoYaml":"\n\n## Installation and use\n\n* Install many of the packages in the tidymodels ecosystem by running `install.packages(\"tidymodels\")`.\n\n* Run `library(tidymodels)` to load the core packages and make them available in your current R session.\n\n
\n\n\n
\n
\n\"\"\n
\n

tidymodels

\n

tidymodels is a meta-package that installs and load the core packages listed below that you need for modeling and machine learning.\nGo to package ...

\n
\n
\n
\n\"\"\n
\n

rsample

\n

rsample provides infrastructure for efficient data splitting and resampling.Go to package ...

\n
\n
\n
\n\"\"\n
\n

parsnip

\n

parsnip is a tidy, unified interface to models that can be used to try a range of models without getting bogged down in the syntactical minutiae of the underlying packages.Go to package ...

\n
\n
\n
\n\"\"\n
\n

recipes

\n

recipes is a tidy interface to data pre-processing tools for feature engineering.Go to package ...

\n
\n
\n
\n\"\"\n
\n

workflows

\n

workflows bundle your pre-processing, modeling, and post-processing together.Go to package ...

\n
\n
\n
\n\"\"\n
\n

tune

\n

tune helps you optimize the hyperparameters of your model and pre-processing steps.Go to package ...

\n
\n
\n
\n\"\"\n
\n

yardstick

\n

yardstick measures the effectiveness of models using performance metrics.Go to package ...

\n
\n
\n
\n\"\"\n
\n

broom

\n

broom converts the information in common statistical R objects into user-friendly, predictable formats. \nGo to package ...

\n
\n
\n
\n\"\"\n
\n

dials

\n

dials creates and manages tuning parameters and parameter grids. \nGo to package ...

\n
\n
\n
\n
\n\nLearn more about the tidymodels metapackage itself at .\n\n## Specialized packages\n\nThe tidymodels framework also includes many other packages designed for specialized data analysis and modeling tasks. They are not loaded automatically with `library(tidymodels)`, so you'll need to load each one with its own call to `library()`. These packages include: \n\n### [Perform statistical analysis](/learn/statistics/)\n\n* [infer](https://infer.tidymodels.org/) is a high-level API for tidyverse-friendly statistical inference.\n\n* The [corrr](https://corrr.tidymodels.org/) package has tidy interfaces for working with correlation matrices.\n\n### [Create robust models](/learn/models/)\n\n* The [spatialsample](http://spatialsample.tidymodels.org/) package provides resampling functions and classes like rsample, but specialized for spatial data.\n\n* parsnip also has additional packages that contain more model definitions. [discrim](https://discrim.tidymodels.org/) contains definitions for discriminant analysis models, [poissonreg](https://poissonreg.tidymodels.org/) provides definitions for Poisson regression models, [plsmod](https://plsmod.tidymodels.org/) enables linear projection models, and [rules](https://rules.tidymodels.org/) does the same for rule-based classification and regression models. [baguette](https://baguette.tidymodels.org/) creates ensemble models via bagging, and [multilevelmod](https://multilevelmod.tidymodels.org/) provides support for multilevel models (otherwise known as mixed models or hierarchical models). \n\n* There are several add-on packages for creating recipes. [embed](https://embed.tidymodels.org/) contains steps to create embeddings or projections of predictors. [textrecipes](https://textrecipes.tidymodels.org/) has extra steps for text processing, and [themis](https://themis.tidymodels.org/) can help alleviate class imbalance using sampling methods. \n\n* [tidypredict](https://tidypredict.tidymodels.org/) and [modeldb](https://modeldb.tidymodels.org/) can convert prediction equations to different languages (e.g. SQL) and fit some models in-database. \n\n### [Tune, compare, and work with your models](/learn/work/)\n\n* To try out multiple different workflows (i.e. bundles of pre-processor and model) at once, [workflowsets](https://workflowsets.tidymodels.org/) lets you create sets of workflow objects for tuning and resampling.\n\n* To integrate predictions from many models, the [stacks](https://stacks.tidymodels.org/) package provides tools for stacked ensemble modeling.\n\n* The [finetune](https://finetune.tidymodels.org/) package extends the tune package with more approaches such as racing and simulated annealing.\n\n* The [usemodels](https://usemodels.tidymodels.org/) package creates templates and automatically generates code to fit and tune models.\n\n* [probably](https://probably.tidymodels.org/) has tools for post-processing class probability estimates.\n\n* The [tidyposterior](https://tidyposterior.tidymodels.org/) package enables users to make formal statistical comparisons between models using resampling and Bayesian methods. \n\n* Some R objects become inconveniently large when saved to disk. The [butcher](https://butcher.tidymodels.org/) package can reduce the size of those objects by removing the sub-components. \n\n* To know whether the data that you are predicting are _extrapolations_ from the training set, [applicable](https://applicable.tidymodels.org/) can produce metrics that measure extrapolation. \n\n* [shinymodels](https://shinymodels.tidymodels.org/) lets you explore tuning or resampling results via a Shiny app.\n\n### [Develop custom modeling tools](/learn/develop/)\n\n* [hardhat](https://hardhat.tidymodels.org/) is a _developer-focused_ package that helps beginners create high-quality R packages for modeling. \n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":1,"include-after-body":["../resources.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../styles.scss","../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Tidymodels packages"},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/start/case-study/index.qmd.json b/.quarto/idx/start/case-study/index.qmd.json new file mode 100644 index 00000000..1baf84f5 --- /dev/null +++ b/.quarto/idx/start/case-study/index.qmd.json @@ -0,0 +1 @@ +{"title":"A predictive modeling case study","markdown":{"yaml":{"title":"A predictive modeling case study","weight":5,"categories":["model fitting","tuning","parsnip","recipes","rsample","workflows","tune"],"description":"Develop, from beginning to end, a predictive model using best practices.\n","toc-location":"body","toc-depth":2,"toc-title":"","css":"../styles.css","include-after-body":"../repo-actions-delete.html"},"headingText":"Introduction","headingAttr":{"id":"intro","classes":[],"keyvalue":[]},"containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(readr)\nlibrary(vip)\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\", \"readr\", \"glmnet\", \"ranger\", \"vip\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\nEach of the four previous [*Get Started*](/start/) articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you'll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays.\n\n```{r}\n#| echo: false\n#| out-width: \"90%\"\nknitr::include_graphics(\"img/hotel.jpg\")\n```\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) \n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(vip) # for variable importance plots\n```\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n## The Hotel Bookings Data {#data}\n\nLet's use hotel bookings data from [Antonio, Almeida, and Nunes (2019)](https://doi.org/10.1016/j.dib.2018.11.126) to predict which hotel stays included children and/or babies, based on the other characteristics of the stays such as which hotel the guests stay at, how much they pay, etc. This was also a [`#TidyTuesday`](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11) dataset with a [data dictionary](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11#data-dictionary) you may want to look over to learn more about the variables. We'll use a slightly [edited version of the dataset](https://gist.github.com/topepo/05a74916c343e57a71c51d6bc32a21ce) for this case study.\n\nTo start, let's read our hotel data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (\"\"):\n\n```{r}\n#| label: \"hotel-import\"\n#| message: false\nlibrary(tidymodels)\nlibrary(readr)\n\nhotels <- \n read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>%\n mutate(across(where(is.character), as.factor))\n\ndim(hotels)\n```\n\nIn the original paper, the [authors](https://doi.org/10.1016/j.dib.2018.11.126) caution that the distribution of many variables (such as number of adults/children, room type, meals bought, country of origin of the guests, and so forth) is different for hotel stays that were canceled versus not canceled. This makes sense because much of that information is gathered (or gathered again more accurately) when guests check in for their stay, so canceled bookings are likely to have more missing data than non-canceled bookings, and/or to have different characteristics when data is not missing. Given this, it is unlikely that we can reliably detect meaningful differences between guests who cancel their bookings and those who do not with this dataset. To build our models here, we have already filtered the data to include only the bookings that did not cancel, so we'll be analyzing *hotel stays* only.\n\n```{r}\n#| label: \"glimpse-hotels\"\nglimpse(hotels)\n```\n\nWe will build a model to predict which actual hotel stays included children and/or babies, and which did not. Our outcome variable `children` is a factor variable with two levels:\n\n```{r}\n#| label: \"count-children\"\nhotels %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n```\n\nWe can see that children were only in `r round(mean(hotels$children == \"children\") * 100, 1)`% of the reservations. This type of class imbalance can often wreak havoc on an analysis. While there are several methods for combating this issue using [recipes](/find/recipes/) (search for steps to `upsample` or `downsample`) or other more specialized packages like [themis](https://themis.tidymodels.org/), the analyses shown below analyze the data as-is.\n\n## Data Splitting & Resampling {#data-split}\n\nFor a data splitting strategy, let's reserve 25% of the stays to the test set. As in our [*Evaluate your model with resampling*](/start/resampling/#data-split) article, we know our outcome variable `children` is pretty imbalanced so we'll use a stratified random sample:\n\n```{r}\n#| label: \"tr-te-split\"\nset.seed(123)\nsplits <- initial_split(hotels, strata = children)\n\nhotel_other <- training(splits)\nhotel_test <- testing(splits)\n\n# training set proportions by children\nhotel_other %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n\n# test set proportions by children\nhotel_test %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n```\n\nIn our articles so far, we've relied on 10-fold cross-validation as the primary resampling method using [`rsample::vfold_cv()`](https://rsample.tidymodels.org/reference/vfold_cv.html). This has created 10 different resamples of the training set (which we further split into *analysis* and *assessment* sets), producing 10 different performance metrics that we then aggregated.\n\nFor this case study, rather than using multiple iterations of resampling, let's create a single resample called a *validation set*. In tidymodels, a validation set is treated as a single iteration of resampling. This will be a split from the `r format(nrow(hotel_other), big.mark = \",\")` stays that were not used for testing, which we called `hotel_other`. This split creates two new datasets:\n\n- the set held out for the purpose of measuring performance, called the *validation set*, and\n\n- the remaining data used to fit the model, called the *training set*.\n\n```{r}\n#| label: \"validation-fig\"\n#| echo: false\n#| fig-align: center\n#| out-width: \"50%\"\nknitr::include_graphics(\"img/validation-split.svg\")\n```\n\nWe'll use the `validation_split()` function to allocate 20% of the `hotel_other` stays to the *validation set* and `r format(nrow(hotel_other) * .8, big.mark = \",\")` stays to the *training set*. This means that our model performance metrics will be computed on a single set of `r format(nrow(hotel_other) * .2, big.mark = \",\")` hotel stays. This is fairly large, so the amount of data should provide enough precision to be a reliable indicator for how well each model predicts the outcome with a single iteration of resampling.\n\n```{r}\n#| label: \"validation-set\"\nset.seed(234)\nval_set <- validation_split(hotel_other, \n strata = children, \n prop = 0.80)\nval_set\n```\n\nThis function, like `initial_split()`, has the same `strata` argument, which uses stratified sampling to create the resample. This means that we'll have roughly the same proportions of hotel stays with and without children in our new validation and training sets, as compared to the original `hotel_other` proportions.\n\n## A first model: penalized logistic regression {#first-model}\n\nSince our outcome variable `children` is categorical, logistic regression would be a good first model to start. Let's use a model that can perform feature selection during training. The [glmnet](https://cran.r-project.org/web/packages/glmnet/index.html) R package fits a generalized linear model via penalized maximum likelihood. This method of estimating the logistic regression slope parameters uses a *penalty* on the process so that less relevant predictors are driven towards a value of zero. One of the glmnet penalization methods, called the [lasso method](https://en.wikipedia.org/wiki/Lasso_(statistics)), can actually set the predictor slopes to zero if a large enough penalty is used.\n\n### Build the model\n\nTo specify a penalized logistic regression model that uses a feature selection penalty, let's use the parsnip package with the [glmnet engine](/find/parsnip/):\n\n```{r}\n#| label: \"logistic-model\"\nlr_mod <- \n logistic_reg(penalty = tune(), mixture = 1) %>% \n set_engine(\"glmnet\")\n```\n\nWe'll set the `penalty` argument to `tune()` as a placeholder for now. This is a model hyperparameter that we will [tune](/start/tuning/) to find the best value for making predictions with our data. Setting `mixture` to a value of one means that the glmnet model will potentially remove irrelevant predictors and choose a simpler model.\n\n### Create the recipe\n\nLet's create a [recipe](/start/recipes/) to define the preprocessing steps we need to prepare our hotel stays data for this model. It might make sense to create a set of date-based predictors that reflect important components related to the arrival date. We have already introduced a [number of useful recipe steps](/start/recipes/#features) for creating features from dates:\n\n- `step_date()` creates predictors for the year, month, and day of the week.\n\n- `step_holiday()` generates a set of indicator variables for specific holidays. Although we don't know where these two hotels are located, we do know that the countries for origin for most stays are based in Europe.\n\n- `step_rm()` removes variables; here we'll use it to remove the original date variable since we no longer want it in the model.\n\nAdditionally, all categorical predictors (e.g., `distribution_channel`, `hotel`, ...) should be converted to dummy variables, and all numeric predictors need to be centered and scaled.\n\n- `step_dummy()` converts characters or factors (i.e., nominal variables) into one or more numeric binary model terms for the levels of the original data.\n\n- `step_zv()` removes indicator variables that only contain a single unique value (e.g. all zeros). This is important because, for penalized models, the predictors should be centered and scaled.\n\n- `step_normalize()` centers and scales numeric variables.\n\nPutting all these steps together into a recipe for a penalized logistic regression model, we have:\n\n```{r}\n#| label: \"logistic-features\"\nholidays <- c(\"AllSouls\", \"AshWednesday\", \"ChristmasEve\", \"Easter\", \n \"ChristmasDay\", \"GoodFriday\", \"NewYearsDay\", \"PalmSunday\")\n\nlr_recipe <- \n recipe(children ~ ., data = hotel_other) %>% \n step_date(arrival_date) %>% \n step_holiday(arrival_date, holidays = holidays) %>% \n step_rm(arrival_date) %>% \n step_dummy(all_nominal_predictors()) %>% \n step_zv(all_predictors()) %>% \n step_normalize(all_predictors())\n```\n\n### Create the workflow\n\nAs we introduced in [*Preprocess your data with recipes*](/start/recipes/#fit-workflow), let's bundle the model and recipe into a single `workflow()` object to make management of the R objects easier:\n\n```{r}\n#| label: \"logistic-workflow\"\nlr_workflow <- \n workflow() %>% \n add_model(lr_mod) %>% \n add_recipe(lr_recipe)\n```\n\n### Create the grid for tuning\n\nBefore we fit this model, we need to set up a grid of `penalty` values to tune. In our [*Tune model parameters*](/start/tuning/) article, we used [`dials::grid_regular()`](start/tuning/#tune-grid) to create an expanded grid based on a combination of two hyperparameters. Since we have only one hyperparameter to tune here, we can set the grid up manually using a one-column tibble with 30 candidate values:\n\n```{r}\n#| label: \"logistic-grid\"\nlr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30))\n\nlr_reg_grid %>% top_n(-5) # lowest penalty values\nlr_reg_grid %>% top_n(5) # highest penalty values\n```\n\n### Train and tune the model\n\nLet's use `tune::tune_grid()` to train these 30 penalized logistic regression models. We'll also save the validation set predictions (via the call to `control_grid()`) so that diagnostic information can be available after the model fit. The area under the ROC curve will be used to quantify how well the model performs across a continuum of event thresholds (recall that the event rate---the proportion of stays including children--- is very low for these data).\n\n```{r}\n#| label: \"logistic-fit\"\n#| cache: false\n#| message: false\nlr_res <- \n lr_workflow %>% \n tune_grid(val_set,\n grid = lr_reg_grid,\n control = control_grid(save_pred = TRUE),\n metrics = metric_set(roc_auc))\n```\n\nIt might be easier to visualize the validation set metrics by plotting the area under the ROC curve against the range of penalty values:\n\n```{r}\n#| label: \"logistic-results\"\n#| fig-height: 4.25\n#| fig-width: 6\nlr_plot <- \n lr_res %>% \n collect_metrics() %>% \n ggplot(aes(x = penalty, y = mean)) + \n geom_point() + \n geom_line() + \n ylab(\"Area under the ROC Curve\") +\n scale_x_log10(labels = scales::label_number())\n\nlr_plot \n```\n\nThis plots shows us that model performance is generally better at the smaller penalty values. This suggests that the majority of the predictors are important to the model. We also see a steep drop in the area under the ROC curve towards the highest penalty values. This happens because a large enough penalty will remove *all* predictors from the model, and not surprisingly predictive accuracy plummets with no predictors in the model (recall that an ROC AUC value of 0.50 means that the model does no better than chance at predicting the correct class).\n\nOur model performance seems to plateau at the smaller penalty values, so going by the `roc_auc` metric alone could lead us to multiple options for the \"best\" value for this hyperparameter:\n\n```{r}\ntop_models <-\n lr_res %>% \n show_best(\"roc_auc\", n = 15) %>% \n arrange(penalty) \ntop_models\n```\n\n```{r}\n#| label: \"top-cand-mods\"\n#| echo: false\n# Adding a candidate model ID variable\ntop_models <- \n top_models %>% \n mutate(cand_model = row_number())\n\ntop_pen <- \n lr_res %>% \n select_best(\"roc_auc\") %>% \n pull(penalty)\n\ntop_cand <- \n top_models %>% \n filter(penalty == top_pen) %>% \n pull(cand_model)\n\nselect_cand <- 12\n\nselect_pen <-\n top_models %>% \n filter(cand_model == select_cand) %>% \n pull(penalty)\n\nselect_roc <- \n top_models %>% \n filter(cand_model == select_cand) %>% \n pull(mean)\n```\n\nEvery candidate model in this tibble likely includes more predictor variables than the model in the row below it. If we used `select_best()`, it would return candidate model `r top_cand` with a penalty value of `r format(top_pen, digits = 3)`, shown with the dotted line below.\n\n```{r}\n#| label: \"lr-plot-lines\"\n#| echo: false\n#| fig-height: 4.25\n#| fig-width: 6\nlr_plot +\n geom_vline(color = \"#CA225E\", lty = 3, xintercept = top_pen) +\n geom_vline(color = \"#CA225E\", xintercept = select_pen) \n```\n\nHowever, we may want to choose a penalty value further along the x-axis, closer to where we start to see the decline in model performance. For example, candidate model `r select_cand` with a penalty value of `r format(select_pen, digits = 3)` has effectively the same performance as the numerically best model, but might eliminate more predictors. This penalty value is marked by the solid line above. In general, fewer irrelevant predictors is better. If performance is about the same, we'd prefer to choose a higher penalty value.\n\nLet's select this value and visualize the validation set ROC curve:\n\n```{r}\n#| label: \"logistic-best\"\nlr_best <- \n lr_res %>% \n collect_metrics() %>% \n arrange(penalty) %>% \n slice(12)\nlr_best\n```\n\n```{r}\n#| label: \"logistic-roc-curve\"\nlr_auc <- \n lr_res %>% \n collect_predictions(parameters = lr_best) %>% \n roc_curve(children, .pred_children) %>% \n mutate(model = \"Logistic Regression\")\n\nautoplot(lr_auc)\n```\n\nThe level of performance generated by this logistic regression model is good, but not groundbreaking. Perhaps the linear nature of the prediction equation is too limiting for this data set. As a next step, we might consider a highly non-linear model generated using a tree-based ensemble method.\n\n## A second model: tree-based ensemble {#second-model}\n\nAn effective and low-maintenance modeling technique is a *random forest*. This model was also used in our [*Evaluate your model with resampling*](/start/resampling/) article. Compared to logistic regression, a random forest model is more flexible. A random forest is an *ensemble model* typically made up of thousands of decision trees, where each individual tree sees a slightly different version of the training data and learns a sequence of splitting rules to predict new data. Each tree is non-linear, and aggregating across trees makes random forests also non-linear but more robust and stable compared to individual trees. Tree-based models like random forests require very little preprocessing and can effectively handle many types of predictors (sparse, skewed, continuous, categorical, etc.).\n\n### Build the model and improve training time\n\nAlthough the default hyperparameters for random forests tend to give reasonable results, we'll plan to tune two hyperparameters that we think could improve performance. Unfortunately, random forest models can be computationally expensive to train and to tune. The computations required for model tuning can usually be easily parallelized to improve training time. The tune package can do [parallel processing](https://tune.tidymodels.org/articles/extras/optimizations.html#parallel-processing) for you, and allows users to use multiple cores or separate machines to fit models.\n\nBut, here we are using a single validation set, so parallelization isn't an option using the tune package. For this specific case study, a good alternative is provided by the engine itself. The ranger package offers a built-in way to compute individual random forest models in parallel. To do this, we need to know the the number of cores we have to work with. We can use the parallel package to query the number of cores on your own computer to understand how much parallelization you can do:\n\n```{r}\n#| label: \"num-cores\"\ncores <- parallel::detectCores()\ncores\n```\n\nWe have `r cores` cores to work with. We can pass this information to the ranger engine when we set up our parsnip `rand_forest()` model. To enable parallel processing, we can pass engine-specific arguments like `num.threads` to ranger when we set the engine:\n\n```{r}\n#| label: \"rf-model\"\nrf_mod <- \n rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% \n set_engine(\"ranger\", num.threads = cores) %>% \n set_mode(\"classification\")\n```\n\nThis works well in this modeling context, but it bears repeating: if you use any other resampling method, let tune do the parallel processing for you --- we typically do not recommend relying on the modeling engine (like we did here) to do this.\n\nIn this model, we used `tune()` as a placeholder for the `mtry` and `min_n` argument values, because these are our two hyperparameters that we will [tune](/start/tuning/).\n\n### Create the recipe and workflow\n\nUnlike penalized logistic regression models, random forest models do not require [dummy](https://bookdown.org/max/FES/categorical-trees.html) or normalized predictor variables. Nevertheless, we want to do some feature engineering again with our `arrival_date` variable. As before, the date predictor is engineered so that the random forest model does not need to work hard to tease these potential patterns from the data.\n\n```{r}\n#| label: \"rf-features\"\nrf_recipe <- \n recipe(children ~ ., data = hotel_other) %>% \n step_date(arrival_date) %>% \n step_holiday(arrival_date) %>% \n step_rm(arrival_date) \n```\n\nAdding this recipe to our parsnip model gives us a new workflow for predicting whether a hotel stay included children and/or babies as guests with a random forest:\n\n```{r}\n#| label: \"rf-workflow\"\nrf_workflow <- \n workflow() %>% \n add_model(rf_mod) %>% \n add_recipe(rf_recipe)\n```\n\n### Train and tune the model\n\nWhen we set up our parsnip model, we chose two hyperparameters for tuning:\n\n```{r}\n#| message: false\nrf_mod\n\n# show what will be tuned\nextract_parameter_set_dials(rf_mod)\n```\n\nThe `mtry` hyperparameter sets the number of predictor variables that each node in the decision tree \"sees\" and can learn about, so it can range from 1 to the total number of features present; when `mtry` = all possible features, the model is the same as bagging decision trees. The `min_n` hyperparameter sets the minimum `n` to split at any node.\n\nWe will use a space-filling design to tune, with 25 candidate models:\n\n```{r}\n#| label: \"rf-fit\"\n#| cache: false\nset.seed(345)\nrf_res <- \n rf_workflow %>% \n tune_grid(val_set,\n grid = 25,\n control = control_grid(save_pred = TRUE),\n metrics = metric_set(roc_auc))\n```\n\nThe message printed above *\"Creating pre-processing data to finalize unknown parameter: mtry\"* is related to the size of the data set. Since `mtry` depends on the number of predictors in the data set, `tune_grid()` determines the upper bound for `mtry` once it receives the data.\n\nHere are our top 5 random forest models, out of the 25 candidates:\n\n```{r}\nrf_res %>% \n show_best(metric = \"roc_auc\")\n```\n\nRight away, we see that these values for area under the ROC look more promising than our top model using penalized logistic regression, which yielded an ROC AUC of `r format(select_roc, digits = 3)`.\n\nPlotting the results of the tuning process highlights that both `mtry` (number of predictors at each node) and `min_n` (minimum number of data points required to keep splitting) should be fairly small to optimize performance. However, the range of the y-axis indicates that the model is very robust to the choice of these parameter values --- all but one of the ROC AUC values are greater than 0.90.\n\n```{r}\n#| label: \"rf-results\"\n#| fig-height: 4\nautoplot(rf_res)\n```\n\nLet's select the best model according to the ROC AUC metric. Our final tuning parameter values are:\n\n```{r}\n#| label: \"rf-best\"\nrf_best <- \n rf_res %>% \n select_best(metric = \"roc_auc\")\nrf_best\n```\n\nTo calculate the data needed to plot the ROC curve, we use `collect_predictions()`. This is only possible after tuning with `control_grid(save_pred = TRUE)`. In the output, you can see the two columns that hold our class probabilities for predicting hotel stays including and not including children.\n\n```{r}\nrf_res %>% \n collect_predictions()\n```\n\nTo filter the predictions for only our best random forest model, we can use the `parameters` argument and pass it our tibble with the best hyperparameter values from tuning, which we called `rf_best`:\n\n```{r}\n#| label: \"rf-roc\"\nrf_auc <- \n rf_res %>% \n collect_predictions(parameters = rf_best) %>% \n roc_curve(children, .pred_children) %>% \n mutate(model = \"Random Forest\")\n```\n\nNow, we can compare the validation set ROC curves for our top penalized logistic regression model and random forest model:\n\n```{r}\n#| label: \"rf-lr-roc-curve\"\nbind_rows(rf_auc, lr_auc) %>% \n ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) + \n geom_path(lwd = 1.5, alpha = 0.8) +\n geom_abline(lty = 3) + \n coord_equal() + \n scale_color_viridis_d(option = \"plasma\", end = .6)\n```\n\nThe random forest is uniformly better across event probability thresholds.\n\n## The last fit {#last-fit}\n\nOur goal was to predict which hotel stays included children and/or babies. The random forest model clearly performed better than the penalized logistic regression model, and would be our best bet for predicting hotel stays with and without children. After selecting our best model and hyperparameter values, our last step is to fit the final model on all the rows of data not originally held out for testing (both the training and the validation sets combined), and then evaluate the model performance one last time with the held-out test set.\n\nWe'll start by building our parsnip model object again from scratch. We take our best hyperparameter values from our random forest model. When we set the engine, we add a new argument: `importance = \"impurity\"`. This will provide *variable importance* scores for this last model, which gives some insight into which predictors drive model performance.\n\n```{r}\n#| label: \"last-rf\"\n#| cache: false\n# the last model\nlast_rf_mod <- \n rand_forest(mtry = 8, min_n = 7, trees = 1000) %>% \n set_engine(\"ranger\", num.threads = cores, importance = \"impurity\") %>% \n set_mode(\"classification\")\n\n# the last workflow\nlast_rf_workflow <- \n rf_workflow %>% \n update_model(last_rf_mod)\n\n# the last fit\nset.seed(345)\nlast_rf_fit <- \n last_rf_workflow %>% \n last_fit(splits)\n\nlast_rf_fit\n```\n\nThis fitted workflow contains *everything*, including our final metrics based on the test set. So, how did this model do on the test set? Was the validation set a good estimate of future performance?\n\n```{r}\nlast_rf_fit %>% \n collect_metrics()\n```\n\nThis ROC AUC value is pretty close to what we saw when we tuned the random forest model with the validation set, which is good news. That means that our estimate of how well our model would perform with new data was not too far off from how well our model actually performed with the unseen test data.\n\nWe can access those variable importance scores via the `.workflow` column. We can [extract out the fit](https://tune.tidymodels.org/reference/extract-tune.html) from the workflow object, and then use the vip package to visualize the variable importance scores for the top 20 features:\n\n```{r}\n#| label: \"rf-importance\"\nlast_rf_fit %>% \n extract_fit_parsnip() %>% \n vip(num_features = 20)\n```\n\nThe most important predictors in whether a hotel stay had children or not were the daily cost for the room, the type of room reserved, the time between the creation of the reservation and the arrival date, and the type of room that was ultimately assigned.\n\nLet's generate our last ROC curve to visualize. Since the event we are predicting is the first level in the `children` factor (\"children\"), we provide `roc_curve()` with the [relevant class probability](https://yardstick.tidymodels.org/reference/roc_curve.html#relevant-level) `.pred_children`:\n\n```{r}\n#| label: \"test-set-roc-curve\"\nlast_rf_fit %>% \n collect_predictions() %>% \n roc_curve(children, .pred_children) %>% \n autoplot()\n```\n\nBased on these results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data.\n\n## Where to next? {#next}\n\nIf you've made it to the end of this series of [*Get Started*](/start/) articles, we hope you feel ready to learn more! You now know the core tidymodels packages and how they fit together. After you are comfortable with the basics we introduced in this series, you can [learn how to go farther](/learn/) with tidymodels in your modeling and machine learning projects.\n\nHere are some more ideas for where to go next:\n\n- Study up on statistics and modeling with our comprehensive [books](/books/).\n\n- Dig deeper into the [package documentation sites](/packages/) to find functions that meet your modeling needs. Use the [searchable tables](/find/) to explore what is possible.\n\n- Keep up with the latest about tidymodels packages at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/).\n\n- Find ways to ask for [help](/help/) and [contribute to tidymodels](/contribute) to help others.\n\n\\###\n\n
Happy modeling!
\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(readr)\nlibrary(vip)\nlibrary(tidymodels)\npkgs <- c(\"tidymodels\", \"readr\", \"glmnet\", \"ranger\", \"vip\")\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction {#intro}\n\nEach of the four previous [*Get Started*](/start/) articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you'll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays.\n\n```{r}\n#| echo: false\n#| out-width: \"90%\"\nknitr::include_graphics(\"img/hotel.jpg\")\n```\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) \n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(vip) # for variable importance plots\n```\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n## The Hotel Bookings Data {#data}\n\nLet's use hotel bookings data from [Antonio, Almeida, and Nunes (2019)](https://doi.org/10.1016/j.dib.2018.11.126) to predict which hotel stays included children and/or babies, based on the other characteristics of the stays such as which hotel the guests stay at, how much they pay, etc. This was also a [`#TidyTuesday`](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11) dataset with a [data dictionary](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11#data-dictionary) you may want to look over to learn more about the variables. We'll use a slightly [edited version of the dataset](https://gist.github.com/topepo/05a74916c343e57a71c51d6bc32a21ce) for this case study.\n\nTo start, let's read our hotel data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (\"\"):\n\n```{r}\n#| label: \"hotel-import\"\n#| message: false\nlibrary(tidymodels)\nlibrary(readr)\n\nhotels <- \n read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>%\n mutate(across(where(is.character), as.factor))\n\ndim(hotels)\n```\n\nIn the original paper, the [authors](https://doi.org/10.1016/j.dib.2018.11.126) caution that the distribution of many variables (such as number of adults/children, room type, meals bought, country of origin of the guests, and so forth) is different for hotel stays that were canceled versus not canceled. This makes sense because much of that information is gathered (or gathered again more accurately) when guests check in for their stay, so canceled bookings are likely to have more missing data than non-canceled bookings, and/or to have different characteristics when data is not missing. Given this, it is unlikely that we can reliably detect meaningful differences between guests who cancel their bookings and those who do not with this dataset. To build our models here, we have already filtered the data to include only the bookings that did not cancel, so we'll be analyzing *hotel stays* only.\n\n```{r}\n#| label: \"glimpse-hotels\"\nglimpse(hotels)\n```\n\nWe will build a model to predict which actual hotel stays included children and/or babies, and which did not. Our outcome variable `children` is a factor variable with two levels:\n\n```{r}\n#| label: \"count-children\"\nhotels %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n```\n\nWe can see that children were only in `r round(mean(hotels$children == \"children\") * 100, 1)`% of the reservations. This type of class imbalance can often wreak havoc on an analysis. While there are several methods for combating this issue using [recipes](/find/recipes/) (search for steps to `upsample` or `downsample`) or other more specialized packages like [themis](https://themis.tidymodels.org/), the analyses shown below analyze the data as-is.\n\n## Data Splitting & Resampling {#data-split}\n\nFor a data splitting strategy, let's reserve 25% of the stays to the test set. As in our [*Evaluate your model with resampling*](/start/resampling/#data-split) article, we know our outcome variable `children` is pretty imbalanced so we'll use a stratified random sample:\n\n```{r}\n#| label: \"tr-te-split\"\nset.seed(123)\nsplits <- initial_split(hotels, strata = children)\n\nhotel_other <- training(splits)\nhotel_test <- testing(splits)\n\n# training set proportions by children\nhotel_other %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n\n# test set proportions by children\nhotel_test %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n```\n\nIn our articles so far, we've relied on 10-fold cross-validation as the primary resampling method using [`rsample::vfold_cv()`](https://rsample.tidymodels.org/reference/vfold_cv.html). This has created 10 different resamples of the training set (which we further split into *analysis* and *assessment* sets), producing 10 different performance metrics that we then aggregated.\n\nFor this case study, rather than using multiple iterations of resampling, let's create a single resample called a *validation set*. In tidymodels, a validation set is treated as a single iteration of resampling. This will be a split from the `r format(nrow(hotel_other), big.mark = \",\")` stays that were not used for testing, which we called `hotel_other`. This split creates two new datasets:\n\n- the set held out for the purpose of measuring performance, called the *validation set*, and\n\n- the remaining data used to fit the model, called the *training set*.\n\n```{r}\n#| label: \"validation-fig\"\n#| echo: false\n#| fig-align: center\n#| out-width: \"50%\"\nknitr::include_graphics(\"img/validation-split.svg\")\n```\n\nWe'll use the `validation_split()` function to allocate 20% of the `hotel_other` stays to the *validation set* and `r format(nrow(hotel_other) * .8, big.mark = \",\")` stays to the *training set*. This means that our model performance metrics will be computed on a single set of `r format(nrow(hotel_other) * .2, big.mark = \",\")` hotel stays. This is fairly large, so the amount of data should provide enough precision to be a reliable indicator for how well each model predicts the outcome with a single iteration of resampling.\n\n```{r}\n#| label: \"validation-set\"\nset.seed(234)\nval_set <- validation_split(hotel_other, \n strata = children, \n prop = 0.80)\nval_set\n```\n\nThis function, like `initial_split()`, has the same `strata` argument, which uses stratified sampling to create the resample. This means that we'll have roughly the same proportions of hotel stays with and without children in our new validation and training sets, as compared to the original `hotel_other` proportions.\n\n## A first model: penalized logistic regression {#first-model}\n\nSince our outcome variable `children` is categorical, logistic regression would be a good first model to start. Let's use a model that can perform feature selection during training. The [glmnet](https://cran.r-project.org/web/packages/glmnet/index.html) R package fits a generalized linear model via penalized maximum likelihood. This method of estimating the logistic regression slope parameters uses a *penalty* on the process so that less relevant predictors are driven towards a value of zero. One of the glmnet penalization methods, called the [lasso method](https://en.wikipedia.org/wiki/Lasso_(statistics)), can actually set the predictor slopes to zero if a large enough penalty is used.\n\n### Build the model\n\nTo specify a penalized logistic regression model that uses a feature selection penalty, let's use the parsnip package with the [glmnet engine](/find/parsnip/):\n\n```{r}\n#| label: \"logistic-model\"\nlr_mod <- \n logistic_reg(penalty = tune(), mixture = 1) %>% \n set_engine(\"glmnet\")\n```\n\nWe'll set the `penalty` argument to `tune()` as a placeholder for now. This is a model hyperparameter that we will [tune](/start/tuning/) to find the best value for making predictions with our data. Setting `mixture` to a value of one means that the glmnet model will potentially remove irrelevant predictors and choose a simpler model.\n\n### Create the recipe\n\nLet's create a [recipe](/start/recipes/) to define the preprocessing steps we need to prepare our hotel stays data for this model. It might make sense to create a set of date-based predictors that reflect important components related to the arrival date. We have already introduced a [number of useful recipe steps](/start/recipes/#features) for creating features from dates:\n\n- `step_date()` creates predictors for the year, month, and day of the week.\n\n- `step_holiday()` generates a set of indicator variables for specific holidays. Although we don't know where these two hotels are located, we do know that the countries for origin for most stays are based in Europe.\n\n- `step_rm()` removes variables; here we'll use it to remove the original date variable since we no longer want it in the model.\n\nAdditionally, all categorical predictors (e.g., `distribution_channel`, `hotel`, ...) should be converted to dummy variables, and all numeric predictors need to be centered and scaled.\n\n- `step_dummy()` converts characters or factors (i.e., nominal variables) into one or more numeric binary model terms for the levels of the original data.\n\n- `step_zv()` removes indicator variables that only contain a single unique value (e.g. all zeros). This is important because, for penalized models, the predictors should be centered and scaled.\n\n- `step_normalize()` centers and scales numeric variables.\n\nPutting all these steps together into a recipe for a penalized logistic regression model, we have:\n\n```{r}\n#| label: \"logistic-features\"\nholidays <- c(\"AllSouls\", \"AshWednesday\", \"ChristmasEve\", \"Easter\", \n \"ChristmasDay\", \"GoodFriday\", \"NewYearsDay\", \"PalmSunday\")\n\nlr_recipe <- \n recipe(children ~ ., data = hotel_other) %>% \n step_date(arrival_date) %>% \n step_holiday(arrival_date, holidays = holidays) %>% \n step_rm(arrival_date) %>% \n step_dummy(all_nominal_predictors()) %>% \n step_zv(all_predictors()) %>% \n step_normalize(all_predictors())\n```\n\n### Create the workflow\n\nAs we introduced in [*Preprocess your data with recipes*](/start/recipes/#fit-workflow), let's bundle the model and recipe into a single `workflow()` object to make management of the R objects easier:\n\n```{r}\n#| label: \"logistic-workflow\"\nlr_workflow <- \n workflow() %>% \n add_model(lr_mod) %>% \n add_recipe(lr_recipe)\n```\n\n### Create the grid for tuning\n\nBefore we fit this model, we need to set up a grid of `penalty` values to tune. In our [*Tune model parameters*](/start/tuning/) article, we used [`dials::grid_regular()`](start/tuning/#tune-grid) to create an expanded grid based on a combination of two hyperparameters. Since we have only one hyperparameter to tune here, we can set the grid up manually using a one-column tibble with 30 candidate values:\n\n```{r}\n#| label: \"logistic-grid\"\nlr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30))\n\nlr_reg_grid %>% top_n(-5) # lowest penalty values\nlr_reg_grid %>% top_n(5) # highest penalty values\n```\n\n### Train and tune the model\n\nLet's use `tune::tune_grid()` to train these 30 penalized logistic regression models. We'll also save the validation set predictions (via the call to `control_grid()`) so that diagnostic information can be available after the model fit. The area under the ROC curve will be used to quantify how well the model performs across a continuum of event thresholds (recall that the event rate---the proportion of stays including children--- is very low for these data).\n\n```{r}\n#| label: \"logistic-fit\"\n#| cache: false\n#| message: false\nlr_res <- \n lr_workflow %>% \n tune_grid(val_set,\n grid = lr_reg_grid,\n control = control_grid(save_pred = TRUE),\n metrics = metric_set(roc_auc))\n```\n\nIt might be easier to visualize the validation set metrics by plotting the area under the ROC curve against the range of penalty values:\n\n```{r}\n#| label: \"logistic-results\"\n#| fig-height: 4.25\n#| fig-width: 6\nlr_plot <- \n lr_res %>% \n collect_metrics() %>% \n ggplot(aes(x = penalty, y = mean)) + \n geom_point() + \n geom_line() + \n ylab(\"Area under the ROC Curve\") +\n scale_x_log10(labels = scales::label_number())\n\nlr_plot \n```\n\nThis plots shows us that model performance is generally better at the smaller penalty values. This suggests that the majority of the predictors are important to the model. We also see a steep drop in the area under the ROC curve towards the highest penalty values. This happens because a large enough penalty will remove *all* predictors from the model, and not surprisingly predictive accuracy plummets with no predictors in the model (recall that an ROC AUC value of 0.50 means that the model does no better than chance at predicting the correct class).\n\nOur model performance seems to plateau at the smaller penalty values, so going by the `roc_auc` metric alone could lead us to multiple options for the \"best\" value for this hyperparameter:\n\n```{r}\ntop_models <-\n lr_res %>% \n show_best(\"roc_auc\", n = 15) %>% \n arrange(penalty) \ntop_models\n```\n\n```{r}\n#| label: \"top-cand-mods\"\n#| echo: false\n# Adding a candidate model ID variable\ntop_models <- \n top_models %>% \n mutate(cand_model = row_number())\n\ntop_pen <- \n lr_res %>% \n select_best(\"roc_auc\") %>% \n pull(penalty)\n\ntop_cand <- \n top_models %>% \n filter(penalty == top_pen) %>% \n pull(cand_model)\n\nselect_cand <- 12\n\nselect_pen <-\n top_models %>% \n filter(cand_model == select_cand) %>% \n pull(penalty)\n\nselect_roc <- \n top_models %>% \n filter(cand_model == select_cand) %>% \n pull(mean)\n```\n\nEvery candidate model in this tibble likely includes more predictor variables than the model in the row below it. If we used `select_best()`, it would return candidate model `r top_cand` with a penalty value of `r format(top_pen, digits = 3)`, shown with the dotted line below.\n\n```{r}\n#| label: \"lr-plot-lines\"\n#| echo: false\n#| fig-height: 4.25\n#| fig-width: 6\nlr_plot +\n geom_vline(color = \"#CA225E\", lty = 3, xintercept = top_pen) +\n geom_vline(color = \"#CA225E\", xintercept = select_pen) \n```\n\nHowever, we may want to choose a penalty value further along the x-axis, closer to where we start to see the decline in model performance. For example, candidate model `r select_cand` with a penalty value of `r format(select_pen, digits = 3)` has effectively the same performance as the numerically best model, but might eliminate more predictors. This penalty value is marked by the solid line above. In general, fewer irrelevant predictors is better. If performance is about the same, we'd prefer to choose a higher penalty value.\n\nLet's select this value and visualize the validation set ROC curve:\n\n```{r}\n#| label: \"logistic-best\"\nlr_best <- \n lr_res %>% \n collect_metrics() %>% \n arrange(penalty) %>% \n slice(12)\nlr_best\n```\n\n```{r}\n#| label: \"logistic-roc-curve\"\nlr_auc <- \n lr_res %>% \n collect_predictions(parameters = lr_best) %>% \n roc_curve(children, .pred_children) %>% \n mutate(model = \"Logistic Regression\")\n\nautoplot(lr_auc)\n```\n\nThe level of performance generated by this logistic regression model is good, but not groundbreaking. Perhaps the linear nature of the prediction equation is too limiting for this data set. As a next step, we might consider a highly non-linear model generated using a tree-based ensemble method.\n\n## A second model: tree-based ensemble {#second-model}\n\nAn effective and low-maintenance modeling technique is a *random forest*. This model was also used in our [*Evaluate your model with resampling*](/start/resampling/) article. Compared to logistic regression, a random forest model is more flexible. A random forest is an *ensemble model* typically made up of thousands of decision trees, where each individual tree sees a slightly different version of the training data and learns a sequence of splitting rules to predict new data. Each tree is non-linear, and aggregating across trees makes random forests also non-linear but more robust and stable compared to individual trees. Tree-based models like random forests require very little preprocessing and can effectively handle many types of predictors (sparse, skewed, continuous, categorical, etc.).\n\n### Build the model and improve training time\n\nAlthough the default hyperparameters for random forests tend to give reasonable results, we'll plan to tune two hyperparameters that we think could improve performance. Unfortunately, random forest models can be computationally expensive to train and to tune. The computations required for model tuning can usually be easily parallelized to improve training time. The tune package can do [parallel processing](https://tune.tidymodels.org/articles/extras/optimizations.html#parallel-processing) for you, and allows users to use multiple cores or separate machines to fit models.\n\nBut, here we are using a single validation set, so parallelization isn't an option using the tune package. For this specific case study, a good alternative is provided by the engine itself. The ranger package offers a built-in way to compute individual random forest models in parallel. To do this, we need to know the the number of cores we have to work with. We can use the parallel package to query the number of cores on your own computer to understand how much parallelization you can do:\n\n```{r}\n#| label: \"num-cores\"\ncores <- parallel::detectCores()\ncores\n```\n\nWe have `r cores` cores to work with. We can pass this information to the ranger engine when we set up our parsnip `rand_forest()` model. To enable parallel processing, we can pass engine-specific arguments like `num.threads` to ranger when we set the engine:\n\n```{r}\n#| label: \"rf-model\"\nrf_mod <- \n rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% \n set_engine(\"ranger\", num.threads = cores) %>% \n set_mode(\"classification\")\n```\n\nThis works well in this modeling context, but it bears repeating: if you use any other resampling method, let tune do the parallel processing for you --- we typically do not recommend relying on the modeling engine (like we did here) to do this.\n\nIn this model, we used `tune()` as a placeholder for the `mtry` and `min_n` argument values, because these are our two hyperparameters that we will [tune](/start/tuning/).\n\n### Create the recipe and workflow\n\nUnlike penalized logistic regression models, random forest models do not require [dummy](https://bookdown.org/max/FES/categorical-trees.html) or normalized predictor variables. Nevertheless, we want to do some feature engineering again with our `arrival_date` variable. As before, the date predictor is engineered so that the random forest model does not need to work hard to tease these potential patterns from the data.\n\n```{r}\n#| label: \"rf-features\"\nrf_recipe <- \n recipe(children ~ ., data = hotel_other) %>% \n step_date(arrival_date) %>% \n step_holiday(arrival_date) %>% \n step_rm(arrival_date) \n```\n\nAdding this recipe to our parsnip model gives us a new workflow for predicting whether a hotel stay included children and/or babies as guests with a random forest:\n\n```{r}\n#| label: \"rf-workflow\"\nrf_workflow <- \n workflow() %>% \n add_model(rf_mod) %>% \n add_recipe(rf_recipe)\n```\n\n### Train and tune the model\n\nWhen we set up our parsnip model, we chose two hyperparameters for tuning:\n\n```{r}\n#| message: false\nrf_mod\n\n# show what will be tuned\nextract_parameter_set_dials(rf_mod)\n```\n\nThe `mtry` hyperparameter sets the number of predictor variables that each node in the decision tree \"sees\" and can learn about, so it can range from 1 to the total number of features present; when `mtry` = all possible features, the model is the same as bagging decision trees. The `min_n` hyperparameter sets the minimum `n` to split at any node.\n\nWe will use a space-filling design to tune, with 25 candidate models:\n\n```{r}\n#| label: \"rf-fit\"\n#| cache: false\nset.seed(345)\nrf_res <- \n rf_workflow %>% \n tune_grid(val_set,\n grid = 25,\n control = control_grid(save_pred = TRUE),\n metrics = metric_set(roc_auc))\n```\n\nThe message printed above *\"Creating pre-processing data to finalize unknown parameter: mtry\"* is related to the size of the data set. Since `mtry` depends on the number of predictors in the data set, `tune_grid()` determines the upper bound for `mtry` once it receives the data.\n\nHere are our top 5 random forest models, out of the 25 candidates:\n\n```{r}\nrf_res %>% \n show_best(metric = \"roc_auc\")\n```\n\nRight away, we see that these values for area under the ROC look more promising than our top model using penalized logistic regression, which yielded an ROC AUC of `r format(select_roc, digits = 3)`.\n\nPlotting the results of the tuning process highlights that both `mtry` (number of predictors at each node) and `min_n` (minimum number of data points required to keep splitting) should be fairly small to optimize performance. However, the range of the y-axis indicates that the model is very robust to the choice of these parameter values --- all but one of the ROC AUC values are greater than 0.90.\n\n```{r}\n#| label: \"rf-results\"\n#| fig-height: 4\nautoplot(rf_res)\n```\n\nLet's select the best model according to the ROC AUC metric. Our final tuning parameter values are:\n\n```{r}\n#| label: \"rf-best\"\nrf_best <- \n rf_res %>% \n select_best(metric = \"roc_auc\")\nrf_best\n```\n\nTo calculate the data needed to plot the ROC curve, we use `collect_predictions()`. This is only possible after tuning with `control_grid(save_pred = TRUE)`. In the output, you can see the two columns that hold our class probabilities for predicting hotel stays including and not including children.\n\n```{r}\nrf_res %>% \n collect_predictions()\n```\n\nTo filter the predictions for only our best random forest model, we can use the `parameters` argument and pass it our tibble with the best hyperparameter values from tuning, which we called `rf_best`:\n\n```{r}\n#| label: \"rf-roc\"\nrf_auc <- \n rf_res %>% \n collect_predictions(parameters = rf_best) %>% \n roc_curve(children, .pred_children) %>% \n mutate(model = \"Random Forest\")\n```\n\nNow, we can compare the validation set ROC curves for our top penalized logistic regression model and random forest model:\n\n```{r}\n#| label: \"rf-lr-roc-curve\"\nbind_rows(rf_auc, lr_auc) %>% \n ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) + \n geom_path(lwd = 1.5, alpha = 0.8) +\n geom_abline(lty = 3) + \n coord_equal() + \n scale_color_viridis_d(option = \"plasma\", end = .6)\n```\n\nThe random forest is uniformly better across event probability thresholds.\n\n## The last fit {#last-fit}\n\nOur goal was to predict which hotel stays included children and/or babies. The random forest model clearly performed better than the penalized logistic regression model, and would be our best bet for predicting hotel stays with and without children. After selecting our best model and hyperparameter values, our last step is to fit the final model on all the rows of data not originally held out for testing (both the training and the validation sets combined), and then evaluate the model performance one last time with the held-out test set.\n\nWe'll start by building our parsnip model object again from scratch. We take our best hyperparameter values from our random forest model. When we set the engine, we add a new argument: `importance = \"impurity\"`. This will provide *variable importance* scores for this last model, which gives some insight into which predictors drive model performance.\n\n```{r}\n#| label: \"last-rf\"\n#| cache: false\n# the last model\nlast_rf_mod <- \n rand_forest(mtry = 8, min_n = 7, trees = 1000) %>% \n set_engine(\"ranger\", num.threads = cores, importance = \"impurity\") %>% \n set_mode(\"classification\")\n\n# the last workflow\nlast_rf_workflow <- \n rf_workflow %>% \n update_model(last_rf_mod)\n\n# the last fit\nset.seed(345)\nlast_rf_fit <- \n last_rf_workflow %>% \n last_fit(splits)\n\nlast_rf_fit\n```\n\nThis fitted workflow contains *everything*, including our final metrics based on the test set. So, how did this model do on the test set? Was the validation set a good estimate of future performance?\n\n```{r}\nlast_rf_fit %>% \n collect_metrics()\n```\n\nThis ROC AUC value is pretty close to what we saw when we tuned the random forest model with the validation set, which is good news. That means that our estimate of how well our model would perform with new data was not too far off from how well our model actually performed with the unseen test data.\n\nWe can access those variable importance scores via the `.workflow` column. We can [extract out the fit](https://tune.tidymodels.org/reference/extract-tune.html) from the workflow object, and then use the vip package to visualize the variable importance scores for the top 20 features:\n\n```{r}\n#| label: \"rf-importance\"\nlast_rf_fit %>% \n extract_fit_parsnip() %>% \n vip(num_features = 20)\n```\n\nThe most important predictors in whether a hotel stay had children or not were the daily cost for the room, the type of room reserved, the time between the creation of the reservation and the arrival date, and the type of room that was ultimately assigned.\n\nLet's generate our last ROC curve to visualize. Since the event we are predicting is the first level in the `children` factor (\"children\"), we provide `roc_curve()` with the [relevant class probability](https://yardstick.tidymodels.org/reference/roc_curve.html#relevant-level) `.pred_children`:\n\n```{r}\n#| label: \"test-set-roc-curve\"\nlast_rf_fit %>% \n collect_predictions() %>% \n roc_curve(children, .pred_children) %>% \n autoplot()\n```\n\nBased on these results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data.\n\n## Where to next? {#next}\n\nIf you've made it to the end of this series of [*Get Started*](/start/) articles, we hope you feel ready to learn more! You now know the core tidymodels packages and how they fit together. After you are comfortable with the basics we introduced in this series, you can [learn how to go farther](/learn/) with tidymodels in your modeling and machine learning projects.\n\nHere are some more ideas for where to go next:\n\n- Study up on statistics and modeling with our comprehensive [books](/books/).\n\n- Dig deeper into the [package documentation sites](/packages/) to find functions that meet your modeling needs. Use the [searchable tables](/find/) to explore what is possible.\n\n- Keep up with the latest about tidymodels packages at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/).\n\n- Find ways to ask for [help](/help/) and [contribute to tidymodels](/contribute) to help others.\n\n\\###\n\n
Happy modeling!
\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"css":["../styles.css"],"include-after-body":["../repo-actions-delete.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"A predictive modeling case study","weight":5,"categories":["model fitting","tuning","parsnip","recipes","rsample","workflows","tune"],"description":"Develop, from beginning to end, a predictive model using best practices.\n","toc-location":"body","toc-title":""},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/start/index.qmd.json b/.quarto/idx/start/index.qmd.json new file mode 100644 index 00000000..e8e59d94 --- /dev/null +++ b/.quarto/idx/start/index.qmd.json @@ -0,0 +1 @@ +{"title":"Welcome!","markdown":{"yaml":{"title":"Welcome!","description":"What do you need to know to start using tidymodels? Learn what you need in 5 articles.","toc":false,"css":"styles.css"},"headingText":"If you are new to R or the tidyverse","containsRefs":false,"markdown":"\n\nHere, learn what you need to get started with tidymodels in five articles, starting with how to create a model and ending with a beginning-to-end modeling case study. After you are comfortable with these basics, you can [learn how to go farther with tidymodels](/learn/).\n\n\nTo get the most out of tidymodels, we recommend that you start by learning some basics about R and the [tidyverse](https://www.tidyverse.org/) first, then return here when you feel ready. Here are some resources to start learning:\n\n- [Finding Your Way To R](https://education.rstudio.com/learn/), from the RStudio Education team.\n\n- [Learn the tidyverse](https://www.tidyverse.org/learn/), from the tidyverse team.\n\n- [Statistical Inference via Data Science: A ModernDive into R and the Tidyverse](/books/moderndive/).\n","srcMarkdownNoYaml":"\n\nHere, learn what you need to get started with tidymodels in five articles, starting with how to create a model and ending with a beginning-to-end modeling case study. After you are comfortable with these basics, you can [learn how to go farther with tidymodels](/learn/).\n\n## If you are new to R or the tidyverse\n\nTo get the most out of tidymodels, we recommend that you start by learning some basics about R and the [tidyverse](https://www.tidyverse.org/) first, then return here when you feel ready. Here are some resources to start learning:\n\n- [Finding Your Way To R](https://education.rstudio.com/learn/), from the RStudio Education team.\n\n- [Learn the tidyverse](https://www.tidyverse.org/learn/), from the tidyverse team.\n\n- [Statistical Inference via Data Science: A ModernDive into R and the Tidyverse](/books/moderndive/).\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":false,"css":["styles.css"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../styles.scss","../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Welcome!","description":"What do you need to know to start using tidymodels? Learn what you need in 5 articles."},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/start/models/index.qmd.json b/.quarto/idx/start/models/index.qmd.json new file mode 100644 index 00000000..51a2dd30 --- /dev/null +++ b/.quarto/idx/start/models/index.qmd.json @@ -0,0 +1 @@ +{"title":"Build a model","markdown":{"yaml":{"title":"Build a model","weight":1,"categories":["model fitting","parsnip","broom"],"description":"Get started by learning how to specify and train a model using tidymodels.\n","toc-location":"body","toc-depth":2,"toc-title":"","css":"../styles.css","include-after-body":"../repo-actions-delete.html"},"headingText":"Introduction","headingAttr":{"id":"intro","classes":[],"keyvalue":[]},"containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(readr)\nlibrary(rstanarm)\nlibrary(tidymodels)\nlibrary(broom.mixed)\nlibrary(dotwhisker)\n\npkgs <- c(\"tidymodels\", \"readr\", \"rstanarm\", \"broom.mixed\", \"dotwhisker\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\n\nHow do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/), and understand why these functions are designed this way.\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) # for the parsnip package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(broom.mixed) # for converting bayesian models to tidy tibbles\nlibrary(dotwhisker) # for visualizing regression results\n```\n\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n\n## The Sea Urchins Data {#data}\n\nLet's use the data from [Constable (1993)](https://link.springer.com/article/10.1007/BF00349318) to explore how three different feeding regimes affect the size of sea urchins over time. The initial size of the sea urchins at the beginning of the experiment probably affects how big they grow as they are fed. \n\nTo start, let's read our urchins data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (\"\"):\n\n```{r}\n#| label: \"data\"\nurchins <-\n # Data were assembled for a tutorial \n # at https://www.flutterbys.com.au/stats/tut/tut7.5a.html\n read_csv(\"https://tidymodels.org/start/models/urchins.csv\") %>% \n # Change the names to be a little more verbose\n setNames(c(\"food_regime\", \"initial_volume\", \"width\")) %>% \n # Factors are very helpful for modeling, so we convert one column\n mutate(food_regime = factor(food_regime, levels = c(\"Initial\", \"Low\", \"High\")))\n```\n\nLet's take a quick look at the data:\n\n```{r}\nurchins\n```\n\nThe urchins data is a [tibble](https://tibble.tidyverse.org/index.html). If you are new to tibbles, the best place to start is the [tibbles chapter](https://r4ds.had.co.nz/tibbles.html) in *R for Data Science*. For each of the `r nrow(urchins)` urchins, we know their:\n\n+ experimental feeding regime group (`food_regime`: either `Initial`, `Low`, or `High`),\n+ size in milliliters at the start of the experiment (`initial_volume`), and\n+ suture width at the end of the experiment (`width`).\n\nAs a first step in modeling, it's always a good idea to plot the data: \n\n```{r}\n#| label: \"urchin-plot\"\nggplot(urchins,\n aes(x = initial_volume, \n y = width, \n group = food_regime, \n col = food_regime)) + \n geom_point() + \n geom_smooth(method = lm, se = FALSE) +\n scale_color_viridis_d(option = \"plasma\", end = .7)\n```\n\nWe can see that urchins that were larger in volume at the start of the experiment tended to have wider sutures at the end, but the slopes of the lines look different so this effect may depend on the feeding regime condition.\n\n## Build and fit a model {#build-model}\n\nA standard two-way analysis of variance ([ANOVA](https://www.itl.nist.gov/div898/handbook/prc/section4/prc43.htm)) model makes sense for this dataset because we have both a continuous predictor and a categorical predictor. Since the slopes appear to be different for at least two of the feeding regimes, let's build a model that allows for two-way interactions. Specifying an R formula with our variables in this way: \n\n```{r}\n#| label: \"two-way-int\"\n#| eval: false\nwidth ~ initial_volume * food_regime\n```\n\nallows our regression model depending on initial volume to have separate slopes and intercepts for each food regime. \n\nFor this kind of model, ordinary least squares is a good initial approach. With tidymodels, we start by specifying the _functional form_ of the model that we want using the [parsnip package](https://parsnip.tidymodels.org/). Since there is a numeric outcome and the model should be linear with slopes and intercepts, the model type is [\"linear regression\"](https://parsnip.tidymodels.org/reference/linear_reg.html). We can declare this with: \n\n\n```{r}\n#| label: \"lm-tm\"\nlinear_reg()\n```\n\nThat is pretty underwhelming since, on its own, it doesn't really do much. However, now that the type of model has been specified, we can think about a method for _fitting_ or training the model, the model **engine**. The engine value is often a mash-up of the software that can be used to fit or train the model as well as the estimation method. The default for `linear_reg()` is `\"lm\"` for ordinary least squares, as you can see above. We could set a non-default option instead:\n\n```{r}\n#| label: \"lm-spec\"\nlinear_reg() %>% \n set_engine(\"keras\")\n```\n\nThe [documentation page for `linear_reg()`](https://parsnip.tidymodels.org/reference/linear_reg.html) lists all the possible engines. We'll save our model object using the default engine as `lm_mod`.\n\n```{r}\nlm_mod <- linear_reg()\n```\n\nFrom here, the model can be estimated or trained using the [`fit()`](https://parsnip.tidymodels.org/reference/fit.html) function:\n\n```{r}\n#| label: \"lm-fit\"\nlm_fit <- \n lm_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\nlm_fit\n```\n\nPerhaps our analysis requires a description of the model parameter estimates and their statistical properties. Although the `summary()` function for `lm` objects can provide that, it gives the results back in an unwieldy format. Many models have a `tidy()` method that provides the summary results in a more predictable and useful format (e.g. a data frame with standard column names): \n\n```{r}\n#| label: \"lm-table\"\ntidy(lm_fit)\n```\n\nThis kind of output can be used to generate a dot-and-whisker plot of our regression results using the dotwhisker package:\n\n```{r}\n#| label: \"dwplot\"\ntidy(lm_fit) %>% \n dwplot(dot_args = list(size = 2, color = \"black\"),\n whisker_args = list(color = \"black\"),\n vline = geom_vline(xintercept = 0, colour = \"grey50\", linetype = 2))\n```\n\n\n## Use a model to predict {#predict-model}\n\nThis fitted object `lm_fit` has the `lm` model output built-in, which you can access with `lm_fit$fit`, but there are some benefits to using the fitted parsnip model object when it comes to predicting.\n\nSuppose that, for a publication, it would be particularly interesting to make a plot of the mean body size for urchins that started the experiment with an initial volume of 20ml. To create such a graph, we start with some new example data that we will make predictions for, to show in our graph:\n\n```{r}\n#| label: \"new-points\"\nnew_points <- expand.grid(initial_volume = 20, \n food_regime = c(\"Initial\", \"Low\", \"High\"))\nnew_points\n```\n\nTo get our predicted results, we can use the `predict()` function to find the mean values at 20ml. \n\nIt is also important to communicate the variability, so we also need to find the predicted confidence intervals. If we had used `lm()` to fit the model directly, a few minutes of reading the [documentation page](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/predict.lm.html) for `predict.lm()` would explain how to do this. However, if we decide to use a different model to estimate urchin size (_spoiler:_ we will!), it is likely that a completely different syntax would be required. \n\nInstead, with tidymodels, the types of predicted values are standardized so that we can use the same syntax to get these values. \n\nFirst, let's generate the mean body width values: \n\n```{r}\n#| label: \"lm-pred-mean\"\nmean_pred <- predict(lm_fit, new_data = new_points)\nmean_pred\n```\n\nWhen making predictions, the tidymodels convention is to always produce a tibble of results with standardized column names. This makes it easy to combine the original data and the predictions in a usable format: \n\n```{r}\n#| label: \"lm-all-pred\"\nconf_int_pred <- predict(lm_fit, \n new_data = new_points, \n type = \"conf_int\")\nconf_int_pred\n\n# Now combine: \nplot_data <- \n new_points %>% \n bind_cols(mean_pred) %>% \n bind_cols(conf_int_pred)\n\n# and plot:\nggplot(plot_data, aes(x = food_regime)) + \n geom_point(aes(y = .pred)) + \n geom_errorbar(aes(ymin = .pred_lower, \n ymax = .pred_upper),\n width = .2) + \n labs(y = \"urchin size\")\n```\n\n## Model with a different engine {#new-engine}\n\nEvery one on your team is happy with that plot _except_ that one person who just read their first book on [Bayesian analysis](https://bayesian.org/what-is-bayesian-analysis/). They are interested in knowing if the results would be different if the model were estimated using a Bayesian approach. In such an analysis, a [_prior distribution_](https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7) needs to be declared for each model parameter that represents the possible values of the parameters (before being exposed to the observed data). After some discussion, the group agrees that the priors should be bell-shaped but, since no one has any idea what the range of values should be, to take a conservative approach and make the priors _wide_ using a Cauchy distribution (which is the same as a t-distribution with a single degree of freedom).\n\nThe [documentation](https://mc-stan.org/rstanarm/articles/priors.html) on the rstanarm package shows us that the `stan_glm()` function can be used to estimate this model, and that the function arguments that need to be specified are called `prior` and `prior_intercept`. It turns out that `linear_reg()` has a [`stan` engine](https://parsnip.tidymodels.org/reference/linear_reg.html#details). Since these prior distribution arguments are specific to the Stan software, they are passed as arguments to [`parsnip::set_engine()`](https://parsnip.tidymodels.org/reference/set_engine.html). After that, the same exact `fit()` call is used:\n\n```{r}\n#| label: \"go-stan\"\n#| message: false\n# set the prior distribution\nprior_dist <- rstanarm::student_t(df = 1)\n\nset.seed(123)\n\n# make the parsnip model\nbayes_mod <- \n linear_reg() %>% \n set_engine(\"stan\", \n prior_intercept = prior_dist, \n prior = prior_dist) \n\n# train the model\nbayes_fit <- \n bayes_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\n\nprint(bayes_fit, digits = 5)\n```\n\nThis kind of Bayesian analysis (like many models) involves randomly generated numbers in its fitting procedure. We can use `set.seed()` to ensure that the same (pseudo-)random numbers are generated each time we run this code. The number `123` isn't special or related to our data; it is just a \"seed\" used to choose random numbers.\n\nTo update the parameter table, the `tidy()` method is once again used: \n\n```{r}\n#| label: \"tidy-stan\"\ntidy(bayes_fit, conf.int = TRUE)\n```\n\nA goal of the tidymodels packages is that the **interfaces to common tasks are standardized** (as seen in the `tidy()` results above). The same is true for getting predictions; we can use the same code even though the underlying packages use very different syntax:\n\n```{r}\n#| label: \"stan-pred\"\nbayes_plot_data <- \n new_points %>% \n bind_cols(predict(bayes_fit, new_data = new_points)) %>% \n bind_cols(predict(bayes_fit, new_data = new_points, type = \"conf_int\"))\n\nggplot(bayes_plot_data, aes(x = food_regime)) + \n geom_point(aes(y = .pred)) + \n geom_errorbar(aes(ymin = .pred_lower, ymax = .pred_upper), width = .2) + \n labs(y = \"urchin size\") + \n ggtitle(\"Bayesian model with t(1) prior distribution\")\n```\n\nThis isn't very different from the non-Bayesian results (except in interpretation). \n\n::: {.callout-note}\nThe [parsnip](https://parsnip.tidymodels.org/) package can work with many model types, engines, and arguments. Check out [tidymodels.org/find/parsnip](/find/parsnip/) to see what is available. \n:::\n\n## Why does it work that way? {#why}\n\nThe extra step of defining the model using a function like `linear_reg()` might seem superfluous since a call to `lm()` is much more succinct. However, the problem with standard modeling functions is that they don't separate what you want to do from the execution. For example, the process of executing a formula has to happen repeatedly across model calls even when the formula does not change; we can't recycle those computations. \n\nAlso, using the tidymodels framework, we can do some interesting things by incrementally creating a model (instead of using single function call). [Model tuning](/start/tuning/) with tidymodels uses the specification of the model to declare what parts of the model should be tuned. That would be very difficult to do if `linear_reg()` immediately fit the model. \n\nIf you are familiar with the tidyverse, you may have noticed that our modeling code uses the magrittr pipe (`%>%`). With dplyr and other tidyverse packages, the pipe works well because all of the functions take the _data_ as the first argument. For example: \n\n```{r}\n#| label: \"tidy-data\"\nurchins %>% \n group_by(food_regime) %>% \n summarize(med_vol = median(initial_volume))\n```\n\nwhereas the modeling code uses the pipe to pass around the _model object_:\n\n```{r}\n#| label: \"tidy-model\"\n#| eval: false\nbayes_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\n```\n\nThis may seem jarring if you have used dplyr a lot, but it is extremely similar to how ggplot2 operates:\n\n```{r}\n#| eval: false\nggplot(urchins,\n aes(initial_volume, width)) + # returns a ggplot object \n geom_jitter() + # same\n geom_smooth(method = lm, se = FALSE) + # same \n labs(x = \"Volume\", y = \"Width\") # etc\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(readr)\nlibrary(rstanarm)\nlibrary(tidymodels)\nlibrary(broom.mixed)\nlibrary(dotwhisker)\n\npkgs <- c(\"tidymodels\", \"readr\", \"rstanarm\", \"broom.mixed\", \"dotwhisker\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\n\n## Introduction {#intro}\n\nHow do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/), and understand why these functions are designed this way.\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) # for the parsnip package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(broom.mixed) # for converting bayesian models to tidy tibbles\nlibrary(dotwhisker) # for visualizing regression results\n```\n\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n\n## The Sea Urchins Data {#data}\n\nLet's use the data from [Constable (1993)](https://link.springer.com/article/10.1007/BF00349318) to explore how three different feeding regimes affect the size of sea urchins over time. The initial size of the sea urchins at the beginning of the experiment probably affects how big they grow as they are fed. \n\nTo start, let's read our urchins data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (\"\"):\n\n```{r}\n#| label: \"data\"\nurchins <-\n # Data were assembled for a tutorial \n # at https://www.flutterbys.com.au/stats/tut/tut7.5a.html\n read_csv(\"https://tidymodels.org/start/models/urchins.csv\") %>% \n # Change the names to be a little more verbose\n setNames(c(\"food_regime\", \"initial_volume\", \"width\")) %>% \n # Factors are very helpful for modeling, so we convert one column\n mutate(food_regime = factor(food_regime, levels = c(\"Initial\", \"Low\", \"High\")))\n```\n\nLet's take a quick look at the data:\n\n```{r}\nurchins\n```\n\nThe urchins data is a [tibble](https://tibble.tidyverse.org/index.html). If you are new to tibbles, the best place to start is the [tibbles chapter](https://r4ds.had.co.nz/tibbles.html) in *R for Data Science*. For each of the `r nrow(urchins)` urchins, we know their:\n\n+ experimental feeding regime group (`food_regime`: either `Initial`, `Low`, or `High`),\n+ size in milliliters at the start of the experiment (`initial_volume`), and\n+ suture width at the end of the experiment (`width`).\n\nAs a first step in modeling, it's always a good idea to plot the data: \n\n```{r}\n#| label: \"urchin-plot\"\nggplot(urchins,\n aes(x = initial_volume, \n y = width, \n group = food_regime, \n col = food_regime)) + \n geom_point() + \n geom_smooth(method = lm, se = FALSE) +\n scale_color_viridis_d(option = \"plasma\", end = .7)\n```\n\nWe can see that urchins that were larger in volume at the start of the experiment tended to have wider sutures at the end, but the slopes of the lines look different so this effect may depend on the feeding regime condition.\n\n## Build and fit a model {#build-model}\n\nA standard two-way analysis of variance ([ANOVA](https://www.itl.nist.gov/div898/handbook/prc/section4/prc43.htm)) model makes sense for this dataset because we have both a continuous predictor and a categorical predictor. Since the slopes appear to be different for at least two of the feeding regimes, let's build a model that allows for two-way interactions. Specifying an R formula with our variables in this way: \n\n```{r}\n#| label: \"two-way-int\"\n#| eval: false\nwidth ~ initial_volume * food_regime\n```\n\nallows our regression model depending on initial volume to have separate slopes and intercepts for each food regime. \n\nFor this kind of model, ordinary least squares is a good initial approach. With tidymodels, we start by specifying the _functional form_ of the model that we want using the [parsnip package](https://parsnip.tidymodels.org/). Since there is a numeric outcome and the model should be linear with slopes and intercepts, the model type is [\"linear regression\"](https://parsnip.tidymodels.org/reference/linear_reg.html). We can declare this with: \n\n\n```{r}\n#| label: \"lm-tm\"\nlinear_reg()\n```\n\nThat is pretty underwhelming since, on its own, it doesn't really do much. However, now that the type of model has been specified, we can think about a method for _fitting_ or training the model, the model **engine**. The engine value is often a mash-up of the software that can be used to fit or train the model as well as the estimation method. The default for `linear_reg()` is `\"lm\"` for ordinary least squares, as you can see above. We could set a non-default option instead:\n\n```{r}\n#| label: \"lm-spec\"\nlinear_reg() %>% \n set_engine(\"keras\")\n```\n\nThe [documentation page for `linear_reg()`](https://parsnip.tidymodels.org/reference/linear_reg.html) lists all the possible engines. We'll save our model object using the default engine as `lm_mod`.\n\n```{r}\nlm_mod <- linear_reg()\n```\n\nFrom here, the model can be estimated or trained using the [`fit()`](https://parsnip.tidymodels.org/reference/fit.html) function:\n\n```{r}\n#| label: \"lm-fit\"\nlm_fit <- \n lm_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\nlm_fit\n```\n\nPerhaps our analysis requires a description of the model parameter estimates and their statistical properties. Although the `summary()` function for `lm` objects can provide that, it gives the results back in an unwieldy format. Many models have a `tidy()` method that provides the summary results in a more predictable and useful format (e.g. a data frame with standard column names): \n\n```{r}\n#| label: \"lm-table\"\ntidy(lm_fit)\n```\n\nThis kind of output can be used to generate a dot-and-whisker plot of our regression results using the dotwhisker package:\n\n```{r}\n#| label: \"dwplot\"\ntidy(lm_fit) %>% \n dwplot(dot_args = list(size = 2, color = \"black\"),\n whisker_args = list(color = \"black\"),\n vline = geom_vline(xintercept = 0, colour = \"grey50\", linetype = 2))\n```\n\n\n## Use a model to predict {#predict-model}\n\nThis fitted object `lm_fit` has the `lm` model output built-in, which you can access with `lm_fit$fit`, but there are some benefits to using the fitted parsnip model object when it comes to predicting.\n\nSuppose that, for a publication, it would be particularly interesting to make a plot of the mean body size for urchins that started the experiment with an initial volume of 20ml. To create such a graph, we start with some new example data that we will make predictions for, to show in our graph:\n\n```{r}\n#| label: \"new-points\"\nnew_points <- expand.grid(initial_volume = 20, \n food_regime = c(\"Initial\", \"Low\", \"High\"))\nnew_points\n```\n\nTo get our predicted results, we can use the `predict()` function to find the mean values at 20ml. \n\nIt is also important to communicate the variability, so we also need to find the predicted confidence intervals. If we had used `lm()` to fit the model directly, a few minutes of reading the [documentation page](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/predict.lm.html) for `predict.lm()` would explain how to do this. However, if we decide to use a different model to estimate urchin size (_spoiler:_ we will!), it is likely that a completely different syntax would be required. \n\nInstead, with tidymodels, the types of predicted values are standardized so that we can use the same syntax to get these values. \n\nFirst, let's generate the mean body width values: \n\n```{r}\n#| label: \"lm-pred-mean\"\nmean_pred <- predict(lm_fit, new_data = new_points)\nmean_pred\n```\n\nWhen making predictions, the tidymodels convention is to always produce a tibble of results with standardized column names. This makes it easy to combine the original data and the predictions in a usable format: \n\n```{r}\n#| label: \"lm-all-pred\"\nconf_int_pred <- predict(lm_fit, \n new_data = new_points, \n type = \"conf_int\")\nconf_int_pred\n\n# Now combine: \nplot_data <- \n new_points %>% \n bind_cols(mean_pred) %>% \n bind_cols(conf_int_pred)\n\n# and plot:\nggplot(plot_data, aes(x = food_regime)) + \n geom_point(aes(y = .pred)) + \n geom_errorbar(aes(ymin = .pred_lower, \n ymax = .pred_upper),\n width = .2) + \n labs(y = \"urchin size\")\n```\n\n## Model with a different engine {#new-engine}\n\nEvery one on your team is happy with that plot _except_ that one person who just read their first book on [Bayesian analysis](https://bayesian.org/what-is-bayesian-analysis/). They are interested in knowing if the results would be different if the model were estimated using a Bayesian approach. In such an analysis, a [_prior distribution_](https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7) needs to be declared for each model parameter that represents the possible values of the parameters (before being exposed to the observed data). After some discussion, the group agrees that the priors should be bell-shaped but, since no one has any idea what the range of values should be, to take a conservative approach and make the priors _wide_ using a Cauchy distribution (which is the same as a t-distribution with a single degree of freedom).\n\nThe [documentation](https://mc-stan.org/rstanarm/articles/priors.html) on the rstanarm package shows us that the `stan_glm()` function can be used to estimate this model, and that the function arguments that need to be specified are called `prior` and `prior_intercept`. It turns out that `linear_reg()` has a [`stan` engine](https://parsnip.tidymodels.org/reference/linear_reg.html#details). Since these prior distribution arguments are specific to the Stan software, they are passed as arguments to [`parsnip::set_engine()`](https://parsnip.tidymodels.org/reference/set_engine.html). After that, the same exact `fit()` call is used:\n\n```{r}\n#| label: \"go-stan\"\n#| message: false\n# set the prior distribution\nprior_dist <- rstanarm::student_t(df = 1)\n\nset.seed(123)\n\n# make the parsnip model\nbayes_mod <- \n linear_reg() %>% \n set_engine(\"stan\", \n prior_intercept = prior_dist, \n prior = prior_dist) \n\n# train the model\nbayes_fit <- \n bayes_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\n\nprint(bayes_fit, digits = 5)\n```\n\nThis kind of Bayesian analysis (like many models) involves randomly generated numbers in its fitting procedure. We can use `set.seed()` to ensure that the same (pseudo-)random numbers are generated each time we run this code. The number `123` isn't special or related to our data; it is just a \"seed\" used to choose random numbers.\n\nTo update the parameter table, the `tidy()` method is once again used: \n\n```{r}\n#| label: \"tidy-stan\"\ntidy(bayes_fit, conf.int = TRUE)\n```\n\nA goal of the tidymodels packages is that the **interfaces to common tasks are standardized** (as seen in the `tidy()` results above). The same is true for getting predictions; we can use the same code even though the underlying packages use very different syntax:\n\n```{r}\n#| label: \"stan-pred\"\nbayes_plot_data <- \n new_points %>% \n bind_cols(predict(bayes_fit, new_data = new_points)) %>% \n bind_cols(predict(bayes_fit, new_data = new_points, type = \"conf_int\"))\n\nggplot(bayes_plot_data, aes(x = food_regime)) + \n geom_point(aes(y = .pred)) + \n geom_errorbar(aes(ymin = .pred_lower, ymax = .pred_upper), width = .2) + \n labs(y = \"urchin size\") + \n ggtitle(\"Bayesian model with t(1) prior distribution\")\n```\n\nThis isn't very different from the non-Bayesian results (except in interpretation). \n\n::: {.callout-note}\nThe [parsnip](https://parsnip.tidymodels.org/) package can work with many model types, engines, and arguments. Check out [tidymodels.org/find/parsnip](/find/parsnip/) to see what is available. \n:::\n\n## Why does it work that way? {#why}\n\nThe extra step of defining the model using a function like `linear_reg()` might seem superfluous since a call to `lm()` is much more succinct. However, the problem with standard modeling functions is that they don't separate what you want to do from the execution. For example, the process of executing a formula has to happen repeatedly across model calls even when the formula does not change; we can't recycle those computations. \n\nAlso, using the tidymodels framework, we can do some interesting things by incrementally creating a model (instead of using single function call). [Model tuning](/start/tuning/) with tidymodels uses the specification of the model to declare what parts of the model should be tuned. That would be very difficult to do if `linear_reg()` immediately fit the model. \n\nIf you are familiar with the tidyverse, you may have noticed that our modeling code uses the magrittr pipe (`%>%`). With dplyr and other tidyverse packages, the pipe works well because all of the functions take the _data_ as the first argument. For example: \n\n```{r}\n#| label: \"tidy-data\"\nurchins %>% \n group_by(food_regime) %>% \n summarize(med_vol = median(initial_volume))\n```\n\nwhereas the modeling code uses the pipe to pass around the _model object_:\n\n```{r}\n#| label: \"tidy-model\"\n#| eval: false\nbayes_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\n```\n\nThis may seem jarring if you have used dplyr a lot, but it is extremely similar to how ggplot2 operates:\n\n```{r}\n#| eval: false\nggplot(urchins,\n aes(initial_volume, width)) + # returns a ggplot object \n geom_jitter() + # same\n geom_smooth(method = lm, se = FALSE) + # same \n labs(x = \"Volume\", y = \"Width\") # etc\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"css":["../styles.css"],"include-after-body":["../repo-actions-delete.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Build a model","weight":1,"categories":["model fitting","parsnip","broom"],"description":"Get started by learning how to specify and train a model using tidymodels.\n","toc-location":"body","toc-title":""},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/start/recipes/index.qmd.json b/.quarto/idx/start/recipes/index.qmd.json new file mode 100644 index 00000000..30001819 --- /dev/null +++ b/.quarto/idx/start/recipes/index.qmd.json @@ -0,0 +1 @@ +{"title":"Preprocess your data with recipes","markdown":{"yaml":{"title":"Preprocess your data with recipes","weight":2,"categories":["pre-processing","recipes","parsnip","workflows","yardstick","broom"],"description":"Prepare data for modeling with modular preprocessing steps.\n","toc-location":"body","toc-depth":2,"toc-title":"","css":"../styles.css","include-after-body":"../repo-actions-delete.html"},"headingText":"Introduction","headingAttr":{"id":"intro","classes":[],"keyvalue":[]},"containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(nycflights13)\nlibrary(kableExtra)\nlibrary(skimr)\npkgs <- c(\"tidymodels\", \"nycflights13\", \"skimr\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\nIn our [*Build a Model*](/start/models/) article, we learned how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/). In this article, we'll explore another tidymodels package, [recipes](https://recipes.tidymodels.org/), which is designed to help you preprocess your data *before* training your model. Recipes are built as a series of preprocessing steps, such as:\n\n- converting qualitative predictors to indicator variables (also known as dummy variables),\n\n- transforming data to be on a different scale (e.g., taking the logarithm of a variable),\n\n- transforming whole groups of predictors together,\n\n- extracting key features from raw variables (e.g., getting the day of the week out of a date variable),\n\nand so on. If you are familiar with R's formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling.\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) # for the recipes package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(nycflights13) # for flight data\nlibrary(skimr) # for variable summaries\n```\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n## The New York City flight data {#data}\n\n```{r}\n#| label: \"flight-start\"\n#| echo: false\nset.seed(123)\n\nflight_data <- \n flights %>% \n mutate(\n # Convert the arrival delay to a factor\n arr_delay = ifelse(arr_delay >= 30, \"late\", \"on_time\"),\n arr_delay = factor(arr_delay),\n # We will use the date (not date-time) in the recipe below\n date = lubridate::as_date(time_hour)\n ) %>% \n # Include the weather data\n inner_join(weather, by = c(\"origin\", \"time_hour\")) %>% \n # Only retain the specific columns we will use\n select(dep_time, flight, origin, dest, air_time, distance, \n carrier, date, arr_delay, time_hour) %>% \n # Exclude missing data\n na.omit() %>% \n # For creating models, it is better to have qualitative columns\n # encoded as factors (instead of character strings)\n mutate_if(is.character, as.factor)\n```\n\nLet's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on `r scales::comma(nrow(flight_data))` flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:\n\n```{r}\n#| ref.label: \"flight-start\"\n\n```\n\nWe can see that about `r percent(mean(flight_data$arr_delay == \"late\"))` of the flights in this data set arrived more than 30 minutes late.\n\n```{r}\n#| label: \"count-delays\"\nflight_data %>% \n count(arr_delay) %>% \n mutate(prop = n/sum(n))\n```\n\nBefore we start building up our recipe, let's take a quick look at a few specific variables that will be important for both preprocessing and modeling.\n\nFirst, notice that the variable we created called `arr_delay` is a factor variable; it is important that our outcome variable for training a logistic regression model is a factor.\n\n```{r}\n#| label: \"glimpse-flights\"\nglimpse(flight_data)\n```\n\nSecond, there are two variables that we don't want to use as predictors in our model, but that we would like to retain as identification variables that can be used to troubleshoot poorly predicted data points. These are `flight`, a numeric value, and `time_hour`, a date-time value.\n\nThird, there are `r length(levels(flight_data$dest))` flight destinations contained in `dest` and `r length(levels(flight_data$carrier))` distinct `carrier`s.\n\n```{r}\n#| label: \"skim-flights\"\nflight_data %>% \n skimr::skim(dest, carrier) \n```\n\nBecause we'll be using a simple logistic regression model, the variables `dest` and `carrier` will be converted to [dummy variables](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html). However, some of these values do not occur very frequently and this could complicate our analysis. We'll discuss specific steps later in this article that we can add to our recipe to address this issue before modeling.\n\n## Data splitting {#data-split}\n\nTo get started, let's split this single dataset into two: a *training* set and a *testing* set. We'll keep most of the rows in the original dataset (subset chosen randomly) in the *training* set. The training data will be used to *fit* the model, and the *testing* set will be used to measure model performance.\n\nTo do this, we can use the [rsample](https://rsample.tidymodels.org/) package to create an object that contains the information on *how* to split the data, and then two more rsample functions to create data frames for the training and testing sets:\n\n```{r}\n#| label: \"split\"\n# Fix the random numbers by setting the seed \n# This enables the analysis to be reproducible when random numbers are used \nset.seed(222)\n# Put 3/4 of the data into the training set \ndata_split <- initial_split(flight_data, prop = 3/4)\n\n# Create data frames for the two sets:\ntrain_data <- training(data_split)\ntest_data <- testing(data_split)\n```\n\n## Create recipe and roles {#recipe}\n\nTo get started, let's create a recipe for a simple logistic regression model. Before training the model, we can use a recipe to create a few new predictors and conduct some preprocessing required by the model.\n\nLet's initiate a new recipe:\n\n```{r}\n#| label: \"initial-recipe\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) \n```\n\nThe [`recipe()` function](https://recipes.tidymodels.org/reference/recipe.html) as we used it here has two arguments:\n\n- A **formula**. Any variable on the left-hand side of the tilde (`~`) is considered the model outcome (here, `arr_delay`). On the right-hand side of the tilde are the predictors. Variables may be listed by name, or you can use the dot (`.`) to indicate all other variables as predictors.\n\n- The **data**. A recipe is associated with the data set used to create the model. This will typically be the *training* set, so `data = train_data` here. Naming a data set doesn't actually change the data itself; it is only used to catalog the names of the variables and their types, like factors, integers, dates, etc.\n\nNow we can add [roles](https://recipes.tidymodels.org/reference/roles.html) to this recipe. We can use the [`update_role()` function](https://recipes.tidymodels.org/reference/roles.html) to let recipes know that `flight` and `time_hour` are variables with a custom role that we called `\"ID\"` (a role can have any character value). Whereas our formula included all variables in the training set other than `arr_delay` as predictors, this tells the recipe to keep these two variables but not use them as either outcomes or predictors.\n\n```{r}\n#| label: \"recipe-roles\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") \n```\n\nThis step of adding roles to a recipe is optional; the purpose of using it here is that those two variables can be retained in the data but not included in the model. This can be convenient when, after the model is fit, we want to investigate some poorly predicted value. These ID columns will be available and can be used to try to understand what went wrong.\n\nTo get the current set of variables and roles, use the `summary()` function:\n\n```{r}\n#| label: \"summary\"\nsummary(flights_rec)\n```\n\n## Create features {#features}\n\nNow we can start adding steps onto our recipe using the pipe operator. Perhaps it is reasonable for the date of the flight to have an effect on the likelihood of a late arrival. A little bit of **feature engineering** might go a long way to improving our model. How should the date be encoded into the model? The `date` column has an R `date` object so including that column \"as is\" will mean that the model will convert it to a numeric format equal to the number of days after a reference date:\n\n```{r}\n#| label: \"dates\"\nflight_data %>% \n distinct(date) %>% \n mutate(numeric_date = as.numeric(date)) \n```\n\nIt's possible that the numeric date variable is a good option for modeling; perhaps the model would benefit from a linear trend between the log-odds of a late arrival and the numeric date variable. However, it might be better to add model terms *derived* from the date that have a better potential to be important to the model. For example, we could derive the following meaningful features from the single `date` variable:\n\n- the day of the week,\n\n- the month, and\n\n- whether or not the date corresponds to a holiday.\n\nLet's do all three of these by adding steps to our recipe:\n\n```{r}\n#| label: \"date-recipe\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE)\n```\n\nWhat do each of these steps do?\n\n- With [`step_date()`](https://recipes.tidymodels.org/reference/step_date.html), we created two new factor columns with the appropriate day of the week and the month.\n\n- With [`step_holiday()`](https://recipes.tidymodels.org/reference/step_holiday.html), we created a binary variable indicating whether the current date is a holiday or not. The argument value of `timeDate::listHolidays(\"US\")` uses the [timeDate package](https://cran.r-project.org/web/packages/timeDate/index.html) to list the `r length(timeDate::listHolidays(\"US\"))` standard US holidays.\n\n- With `keep_original_cols = FALSE`, we remove the original `date` variable since we no longer want it in the model. Many recipe steps that create new variables have this argument.\n\nNext, we'll turn our attention to the variable types of our predictors. Because we plan to train a logistic regression model, we know that predictors will ultimately need to be numeric, as opposed to nominal data like strings and factor variables. In other words, there may be a difference in how we store our data (in factors inside a data frame), and how the underlying equations require them (a purely numeric matrix).\n\nFor factors like `dest` and `origin`, [standard practice](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html) is to convert them into *dummy* or *indicator* variables to make them numeric. These are binary values for each level of the factor. For example, our `origin` variable has values of `\"EWR\"`, `\"JFK\"`, and `\"LGA\"`. The standard dummy variable encoding, shown below, will create *two* numeric columns of the data that are 1 when the originating airport is `\"JFK\"` or `\"LGA\"` and zero otherwise, respectively.\n\n```{r}\n#| label: \"calc-dummy\"\n#| include: false\nfour_origins <- \n train_data %>% \n select(origin, arr_delay) %>% \n slice(1:4)\n\norigin_dummies <- \n recipe(arr_delay ~ origin, data = train_data) %>% \n step_dummy(origin, keep_original_cols = TRUE) %>%\n prep(training = four_origins)\n```\n\n```{r}\n#| label: \"dummy-table\"\n#| echo: false\n# Get a row for each factor level\nbake(origin_dummies, new_data = NULL, origin, starts_with(\"origin\")) %>% \n distinct() %>% \n knitr::kable() %>% \n kable_styling(full_width = FALSE)\n```\n\nBut, unlike the standard model formula methods in R, a recipe **does not** automatically create these dummy variables for you; you'll need to tell your recipe to add this step. This is for two reasons. First, many models do not require [numeric predictors](https://bookdown.org/max/FES/categorical-trees.html), so dummy variables may not always be preferred. Second, recipes can also be used for purposes outside of modeling, where non-dummy versions of the variables may work better. For example, you may want to make a table or a plot with a variable as a single factor. For those reasons, you need to explicitly tell recipes to create dummy variables using `step_dummy()`:\n\n```{r}\n#| label: \"dummy\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE) %>% \n step_dummy(all_nominal_predictors())\n```\n\nHere, we did something different than before: instead of applying a step to an individual variable, we used [selectors](https://recipes.tidymodels.org/reference/selections.html) to apply this recipe step to several variables at once, `all_nominal_predictors()`. The [selector functions](https://recipes.tidymodels.org/reference/selections.html) can be combined to select intersections of variables.\n\nAt this stage in the recipe, this step selects the `origin`, `dest`, and `carrier` variables. It also includes two new variables, `date_dow` and `date_month`, that were created by the earlier `step_date()`.\n\nMore generally, the recipe selectors mean that you don't always have to apply steps to individual variables one at a time. Since a recipe knows the *variable type* and *role* of each column, they can also be selected (or dropped) using this information.\n\nWe need one final step to add to our recipe. Since `carrier` and `dest` have some infrequently occurring factor values, it is possible that dummy variables might be created for values that don't exist in the training set. For example, there is one destination that is only in the test set:\n\n```{r}\n#| label: \"zv-cols\"\ntest_data %>% \n distinct(dest) %>% \n anti_join(train_data)\n```\n\nWhen the recipe is applied to the training set, a column is made for `r dplyr::setdiff(test_data$dest, train_data$dest)` because the factor levels come from `flight_data` (not the training set), but this column will contain all zeros. This is a \"zero-variance predictor\" that has no information within the column. While some R functions will not produce an error for such predictors, it usually causes warnings and other issues. `step_zv()` will remove columns from the data when the training set data have a single value, so it is added to the recipe *after* `step_dummy()`:\n\n```{r}\n#| label: \"zv\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE) %>% \n step_dummy(all_nominal_predictors()) %>% \n step_zv(all_predictors())\n```\n\nNow we've created a *specification* of what should be done with the data. How do we use the recipe we made?\n\n## Fit a model with a recipe {#fit-workflow}\n\nLet's use logistic regression to model the flight data. As we saw in [*Build a Model*](/start/models/), we start by [building a model specification](/start/models/#build-model) using the parsnip package:\n\n```{r}\n#| label: \"model\"\nlr_mod <- \n logistic_reg() %>% \n set_engine(\"glm\")\n```\n\nWe will want to use our recipe across several steps as we train and test our model. We will:\n\n1. **Process the recipe using the training set**: This involves any estimation or calculations based on the training set. For our recipe, the training set will be used to determine which predictors should be converted to dummy variables and which predictors will have zero-variance in the training set, and should be slated for removal.\n\n2. **Apply the recipe to the training set**: We create the final predictor set on the training set.\n\n3. **Apply the recipe to the test set**: We create the final predictor set on the test set. Nothing is recomputed and no information from the test set is used here; the dummy variable and zero-variance results from the training set are applied to the test set.\n\nTo simplify this process, we can use a *model workflow*, which pairs a model and recipe together. This is a straightforward approach because different recipes are often needed for different models, so when a model and recipe are bundled, it becomes easier to train and test *workflows*. We'll use the [workflows package](https://workflows.tidymodels.org/) from tidymodels to bundle our parsnip model (`lr_mod`) with our recipe (`flights_rec`).\n\n```{r}\n#| label: \"workflow\"\nflights_wflow <- \n workflow() %>% \n add_model(lr_mod) %>% \n add_recipe(flights_rec)\n\nflights_wflow\n```\n\nNow, there is a single function that can be used to prepare the recipe and train the model from the resulting predictors:\n\n```{r}\n#| label: \"fit\"\nflights_fit <- \n flights_wflow %>% \n fit(data = train_data)\n```\n\nThis object has the finalized recipe and fitted model objects inside. You may want to extract the model or recipe objects from the workflow. To do this, you can use the helper functions `extract_fit_parsnip()` and `extract_recipe()`. For example, here we pull the fitted model object then use the `broom::tidy()` function to get a tidy tibble of model coefficients:\n\n```{r}\n#| label: \"fit-glance\"\nflights_fit %>% \n extract_fit_parsnip() %>% \n tidy()\n```\n\n## Use a trained workflow to predict {#predict-workflow}\n\nOur goal was to predict whether a plane arrives more than 30 minutes late. We have just:\n\n1. Built the model (`lr_mod`),\n\n2. Created a preprocessing recipe (`flights_rec`),\n\n3. Bundled the model and recipe (`flights_wflow`), and\n\n4. Trained our workflow using a single call to `fit()`.\n\nThe next step is to use the trained workflow (`flights_fit`) to predict with the unseen test data, which we will do with a single call to `predict()`. The `predict()` method applies the recipe to the new data, then passes them to the fitted model.\n\n```{r}\n#| label: \"pred-class\"\npredict(flights_fit, test_data)\n```\n\nBecause our outcome variable here is a factor, the output from `predict()` returns the predicted class: `late` versus `on_time`. But, let's say we want the predicted class probabilities for each flight instead. To return those, we can specify `type = \"prob\"` when we use `predict()` or use `augment()` with the model plus test data to save them together:\n\n```{r}\n#| label: \"test-pred\"\nflights_aug <- \n augment(flights_fit, test_data)\n\n# The data look like: \nflights_aug %>%\n select(arr_delay, time_hour, flight, .pred_class, .pred_on_time)\n```\n\nNow that we have a tibble with our predicted class probabilities, how will we evaluate the performance of our workflow? We can see from these first few rows that our model predicted these 5 on time flights correctly because the values of `.pred_on_time` are *p* \\> .50. But we also know that we have `r scales::comma(nrow(flights_aug))` rows total to predict. We would like to calculate a metric that tells how well our model predicted late arrivals, compared to the true status of our outcome variable, `arr_delay`.\n\nLet's use the area under the [ROC curve](https://bookdown.org/max/FES/measuring-performance.html#class-metrics) as our metric, computed using `roc_curve()` and `roc_auc()` from the [yardstick package](https://yardstick.tidymodels.org/).\n\nTo generate a ROC curve, we need the predicted class probabilities for `late` and `on_time`, which we just calculated in the code chunk above. We can create the ROC curve with these values, using `roc_curve()` and then piping to the `autoplot()` method:\n\n```{r}\n#| label: \"roc-plot\"\nflights_aug %>% \n roc_curve(truth = arr_delay, .pred_late) %>% \n autoplot()\n```\n\nSimilarly, `roc_auc()` estimates the area under the curve:\n\n```{r}\n#| label: \"roc-auc\"\nflights_aug %>% \n roc_auc(truth = arr_delay, .pred_late)\n```\n\nNot too bad! We leave it to the reader to test out this workflow [*without*](https://workflows.tidymodels.org/reference/add_formula.html) this recipe. You can use `workflows::add_formula(arr_delay ~ .)` instead of `add_recipe()` (remember to remove the identification variables first!), and see whether our recipe improved our model's ability to predict late arrivals.\n\n```{r}\n#| eval: false\n#| include: false\nset.seed(555)\nflights_cens <- flight_data %>% \n select(-flight, -time_hour)\n\nflights_cens_split <- initial_split(flights_cens, prop = 3/4)\nflights_cens_train <- training(flights_cens_split)\nflights_cens_test <- testing(flights_cens_split)\n\nflights_wflow_raw <-\n workflow() %>% \n add_model(lr_mod) %>% \n add_formula(arr_delay ~ .)\n\nflights_fit_raw <- \n flights_wflow_raw %>% \n fit(data = flights_cens_train)\n\nflights_preds_raw <- \n predict(flights_fit_raw, \n flights_cens_test, \n type = \"prob\") %>% \n bind_cols(flights_cens_test %>% select(arr_delay)) \n\nflights_preds_raw %>% \n roc_auc(truth = arr_delay, .pred_late)\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(nycflights13)\nlibrary(kableExtra)\nlibrary(skimr)\npkgs <- c(\"tidymodels\", \"nycflights13\", \"skimr\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction {#intro}\n\nIn our [*Build a Model*](/start/models/) article, we learned how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/). In this article, we'll explore another tidymodels package, [recipes](https://recipes.tidymodels.org/), which is designed to help you preprocess your data *before* training your model. Recipes are built as a series of preprocessing steps, such as:\n\n- converting qualitative predictors to indicator variables (also known as dummy variables),\n\n- transforming data to be on a different scale (e.g., taking the logarithm of a variable),\n\n- transforming whole groups of predictors together,\n\n- extracting key features from raw variables (e.g., getting the day of the week out of a date variable),\n\nand so on. If you are familiar with R's formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling.\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) # for the recipes package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(nycflights13) # for flight data\nlibrary(skimr) # for variable summaries\n```\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n## The New York City flight data {#data}\n\n```{r}\n#| label: \"flight-start\"\n#| echo: false\nset.seed(123)\n\nflight_data <- \n flights %>% \n mutate(\n # Convert the arrival delay to a factor\n arr_delay = ifelse(arr_delay >= 30, \"late\", \"on_time\"),\n arr_delay = factor(arr_delay),\n # We will use the date (not date-time) in the recipe below\n date = lubridate::as_date(time_hour)\n ) %>% \n # Include the weather data\n inner_join(weather, by = c(\"origin\", \"time_hour\")) %>% \n # Only retain the specific columns we will use\n select(dep_time, flight, origin, dest, air_time, distance, \n carrier, date, arr_delay, time_hour) %>% \n # Exclude missing data\n na.omit() %>% \n # For creating models, it is better to have qualitative columns\n # encoded as factors (instead of character strings)\n mutate_if(is.character, as.factor)\n```\n\nLet's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on `r scales::comma(nrow(flight_data))` flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:\n\n```{r}\n#| ref.label: \"flight-start\"\n\n```\n\nWe can see that about `r percent(mean(flight_data$arr_delay == \"late\"))` of the flights in this data set arrived more than 30 minutes late.\n\n```{r}\n#| label: \"count-delays\"\nflight_data %>% \n count(arr_delay) %>% \n mutate(prop = n/sum(n))\n```\n\nBefore we start building up our recipe, let's take a quick look at a few specific variables that will be important for both preprocessing and modeling.\n\nFirst, notice that the variable we created called `arr_delay` is a factor variable; it is important that our outcome variable for training a logistic regression model is a factor.\n\n```{r}\n#| label: \"glimpse-flights\"\nglimpse(flight_data)\n```\n\nSecond, there are two variables that we don't want to use as predictors in our model, but that we would like to retain as identification variables that can be used to troubleshoot poorly predicted data points. These are `flight`, a numeric value, and `time_hour`, a date-time value.\n\nThird, there are `r length(levels(flight_data$dest))` flight destinations contained in `dest` and `r length(levels(flight_data$carrier))` distinct `carrier`s.\n\n```{r}\n#| label: \"skim-flights\"\nflight_data %>% \n skimr::skim(dest, carrier) \n```\n\nBecause we'll be using a simple logistic regression model, the variables `dest` and `carrier` will be converted to [dummy variables](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html). However, some of these values do not occur very frequently and this could complicate our analysis. We'll discuss specific steps later in this article that we can add to our recipe to address this issue before modeling.\n\n## Data splitting {#data-split}\n\nTo get started, let's split this single dataset into two: a *training* set and a *testing* set. We'll keep most of the rows in the original dataset (subset chosen randomly) in the *training* set. The training data will be used to *fit* the model, and the *testing* set will be used to measure model performance.\n\nTo do this, we can use the [rsample](https://rsample.tidymodels.org/) package to create an object that contains the information on *how* to split the data, and then two more rsample functions to create data frames for the training and testing sets:\n\n```{r}\n#| label: \"split\"\n# Fix the random numbers by setting the seed \n# This enables the analysis to be reproducible when random numbers are used \nset.seed(222)\n# Put 3/4 of the data into the training set \ndata_split <- initial_split(flight_data, prop = 3/4)\n\n# Create data frames for the two sets:\ntrain_data <- training(data_split)\ntest_data <- testing(data_split)\n```\n\n## Create recipe and roles {#recipe}\n\nTo get started, let's create a recipe for a simple logistic regression model. Before training the model, we can use a recipe to create a few new predictors and conduct some preprocessing required by the model.\n\nLet's initiate a new recipe:\n\n```{r}\n#| label: \"initial-recipe\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) \n```\n\nThe [`recipe()` function](https://recipes.tidymodels.org/reference/recipe.html) as we used it here has two arguments:\n\n- A **formula**. Any variable on the left-hand side of the tilde (`~`) is considered the model outcome (here, `arr_delay`). On the right-hand side of the tilde are the predictors. Variables may be listed by name, or you can use the dot (`.`) to indicate all other variables as predictors.\n\n- The **data**. A recipe is associated with the data set used to create the model. This will typically be the *training* set, so `data = train_data` here. Naming a data set doesn't actually change the data itself; it is only used to catalog the names of the variables and their types, like factors, integers, dates, etc.\n\nNow we can add [roles](https://recipes.tidymodels.org/reference/roles.html) to this recipe. We can use the [`update_role()` function](https://recipes.tidymodels.org/reference/roles.html) to let recipes know that `flight` and `time_hour` are variables with a custom role that we called `\"ID\"` (a role can have any character value). Whereas our formula included all variables in the training set other than `arr_delay` as predictors, this tells the recipe to keep these two variables but not use them as either outcomes or predictors.\n\n```{r}\n#| label: \"recipe-roles\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") \n```\n\nThis step of adding roles to a recipe is optional; the purpose of using it here is that those two variables can be retained in the data but not included in the model. This can be convenient when, after the model is fit, we want to investigate some poorly predicted value. These ID columns will be available and can be used to try to understand what went wrong.\n\nTo get the current set of variables and roles, use the `summary()` function:\n\n```{r}\n#| label: \"summary\"\nsummary(flights_rec)\n```\n\n## Create features {#features}\n\nNow we can start adding steps onto our recipe using the pipe operator. Perhaps it is reasonable for the date of the flight to have an effect on the likelihood of a late arrival. A little bit of **feature engineering** might go a long way to improving our model. How should the date be encoded into the model? The `date` column has an R `date` object so including that column \"as is\" will mean that the model will convert it to a numeric format equal to the number of days after a reference date:\n\n```{r}\n#| label: \"dates\"\nflight_data %>% \n distinct(date) %>% \n mutate(numeric_date = as.numeric(date)) \n```\n\nIt's possible that the numeric date variable is a good option for modeling; perhaps the model would benefit from a linear trend between the log-odds of a late arrival and the numeric date variable. However, it might be better to add model terms *derived* from the date that have a better potential to be important to the model. For example, we could derive the following meaningful features from the single `date` variable:\n\n- the day of the week,\n\n- the month, and\n\n- whether or not the date corresponds to a holiday.\n\nLet's do all three of these by adding steps to our recipe:\n\n```{r}\n#| label: \"date-recipe\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE)\n```\n\nWhat do each of these steps do?\n\n- With [`step_date()`](https://recipes.tidymodels.org/reference/step_date.html), we created two new factor columns with the appropriate day of the week and the month.\n\n- With [`step_holiday()`](https://recipes.tidymodels.org/reference/step_holiday.html), we created a binary variable indicating whether the current date is a holiday or not. The argument value of `timeDate::listHolidays(\"US\")` uses the [timeDate package](https://cran.r-project.org/web/packages/timeDate/index.html) to list the `r length(timeDate::listHolidays(\"US\"))` standard US holidays.\n\n- With `keep_original_cols = FALSE`, we remove the original `date` variable since we no longer want it in the model. Many recipe steps that create new variables have this argument.\n\nNext, we'll turn our attention to the variable types of our predictors. Because we plan to train a logistic regression model, we know that predictors will ultimately need to be numeric, as opposed to nominal data like strings and factor variables. In other words, there may be a difference in how we store our data (in factors inside a data frame), and how the underlying equations require them (a purely numeric matrix).\n\nFor factors like `dest` and `origin`, [standard practice](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html) is to convert them into *dummy* or *indicator* variables to make them numeric. These are binary values for each level of the factor. For example, our `origin` variable has values of `\"EWR\"`, `\"JFK\"`, and `\"LGA\"`. The standard dummy variable encoding, shown below, will create *two* numeric columns of the data that are 1 when the originating airport is `\"JFK\"` or `\"LGA\"` and zero otherwise, respectively.\n\n```{r}\n#| label: \"calc-dummy\"\n#| include: false\nfour_origins <- \n train_data %>% \n select(origin, arr_delay) %>% \n slice(1:4)\n\norigin_dummies <- \n recipe(arr_delay ~ origin, data = train_data) %>% \n step_dummy(origin, keep_original_cols = TRUE) %>%\n prep(training = four_origins)\n```\n\n```{r}\n#| label: \"dummy-table\"\n#| echo: false\n# Get a row for each factor level\nbake(origin_dummies, new_data = NULL, origin, starts_with(\"origin\")) %>% \n distinct() %>% \n knitr::kable() %>% \n kable_styling(full_width = FALSE)\n```\n\nBut, unlike the standard model formula methods in R, a recipe **does not** automatically create these dummy variables for you; you'll need to tell your recipe to add this step. This is for two reasons. First, many models do not require [numeric predictors](https://bookdown.org/max/FES/categorical-trees.html), so dummy variables may not always be preferred. Second, recipes can also be used for purposes outside of modeling, where non-dummy versions of the variables may work better. For example, you may want to make a table or a plot with a variable as a single factor. For those reasons, you need to explicitly tell recipes to create dummy variables using `step_dummy()`:\n\n```{r}\n#| label: \"dummy\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE) %>% \n step_dummy(all_nominal_predictors())\n```\n\nHere, we did something different than before: instead of applying a step to an individual variable, we used [selectors](https://recipes.tidymodels.org/reference/selections.html) to apply this recipe step to several variables at once, `all_nominal_predictors()`. The [selector functions](https://recipes.tidymodels.org/reference/selections.html) can be combined to select intersections of variables.\n\nAt this stage in the recipe, this step selects the `origin`, `dest`, and `carrier` variables. It also includes two new variables, `date_dow` and `date_month`, that were created by the earlier `step_date()`.\n\nMore generally, the recipe selectors mean that you don't always have to apply steps to individual variables one at a time. Since a recipe knows the *variable type* and *role* of each column, they can also be selected (or dropped) using this information.\n\nWe need one final step to add to our recipe. Since `carrier` and `dest` have some infrequently occurring factor values, it is possible that dummy variables might be created for values that don't exist in the training set. For example, there is one destination that is only in the test set:\n\n```{r}\n#| label: \"zv-cols\"\ntest_data %>% \n distinct(dest) %>% \n anti_join(train_data)\n```\n\nWhen the recipe is applied to the training set, a column is made for `r dplyr::setdiff(test_data$dest, train_data$dest)` because the factor levels come from `flight_data` (not the training set), but this column will contain all zeros. This is a \"zero-variance predictor\" that has no information within the column. While some R functions will not produce an error for such predictors, it usually causes warnings and other issues. `step_zv()` will remove columns from the data when the training set data have a single value, so it is added to the recipe *after* `step_dummy()`:\n\n```{r}\n#| label: \"zv\"\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE) %>% \n step_dummy(all_nominal_predictors()) %>% \n step_zv(all_predictors())\n```\n\nNow we've created a *specification* of what should be done with the data. How do we use the recipe we made?\n\n## Fit a model with a recipe {#fit-workflow}\n\nLet's use logistic regression to model the flight data. As we saw in [*Build a Model*](/start/models/), we start by [building a model specification](/start/models/#build-model) using the parsnip package:\n\n```{r}\n#| label: \"model\"\nlr_mod <- \n logistic_reg() %>% \n set_engine(\"glm\")\n```\n\nWe will want to use our recipe across several steps as we train and test our model. We will:\n\n1. **Process the recipe using the training set**: This involves any estimation or calculations based on the training set. For our recipe, the training set will be used to determine which predictors should be converted to dummy variables and which predictors will have zero-variance in the training set, and should be slated for removal.\n\n2. **Apply the recipe to the training set**: We create the final predictor set on the training set.\n\n3. **Apply the recipe to the test set**: We create the final predictor set on the test set. Nothing is recomputed and no information from the test set is used here; the dummy variable and zero-variance results from the training set are applied to the test set.\n\nTo simplify this process, we can use a *model workflow*, which pairs a model and recipe together. This is a straightforward approach because different recipes are often needed for different models, so when a model and recipe are bundled, it becomes easier to train and test *workflows*. We'll use the [workflows package](https://workflows.tidymodels.org/) from tidymodels to bundle our parsnip model (`lr_mod`) with our recipe (`flights_rec`).\n\n```{r}\n#| label: \"workflow\"\nflights_wflow <- \n workflow() %>% \n add_model(lr_mod) %>% \n add_recipe(flights_rec)\n\nflights_wflow\n```\n\nNow, there is a single function that can be used to prepare the recipe and train the model from the resulting predictors:\n\n```{r}\n#| label: \"fit\"\nflights_fit <- \n flights_wflow %>% \n fit(data = train_data)\n```\n\nThis object has the finalized recipe and fitted model objects inside. You may want to extract the model or recipe objects from the workflow. To do this, you can use the helper functions `extract_fit_parsnip()` and `extract_recipe()`. For example, here we pull the fitted model object then use the `broom::tidy()` function to get a tidy tibble of model coefficients:\n\n```{r}\n#| label: \"fit-glance\"\nflights_fit %>% \n extract_fit_parsnip() %>% \n tidy()\n```\n\n## Use a trained workflow to predict {#predict-workflow}\n\nOur goal was to predict whether a plane arrives more than 30 minutes late. We have just:\n\n1. Built the model (`lr_mod`),\n\n2. Created a preprocessing recipe (`flights_rec`),\n\n3. Bundled the model and recipe (`flights_wflow`), and\n\n4. Trained our workflow using a single call to `fit()`.\n\nThe next step is to use the trained workflow (`flights_fit`) to predict with the unseen test data, which we will do with a single call to `predict()`. The `predict()` method applies the recipe to the new data, then passes them to the fitted model.\n\n```{r}\n#| label: \"pred-class\"\npredict(flights_fit, test_data)\n```\n\nBecause our outcome variable here is a factor, the output from `predict()` returns the predicted class: `late` versus `on_time`. But, let's say we want the predicted class probabilities for each flight instead. To return those, we can specify `type = \"prob\"` when we use `predict()` or use `augment()` with the model plus test data to save them together:\n\n```{r}\n#| label: \"test-pred\"\nflights_aug <- \n augment(flights_fit, test_data)\n\n# The data look like: \nflights_aug %>%\n select(arr_delay, time_hour, flight, .pred_class, .pred_on_time)\n```\n\nNow that we have a tibble with our predicted class probabilities, how will we evaluate the performance of our workflow? We can see from these first few rows that our model predicted these 5 on time flights correctly because the values of `.pred_on_time` are *p* \\> .50. But we also know that we have `r scales::comma(nrow(flights_aug))` rows total to predict. We would like to calculate a metric that tells how well our model predicted late arrivals, compared to the true status of our outcome variable, `arr_delay`.\n\nLet's use the area under the [ROC curve](https://bookdown.org/max/FES/measuring-performance.html#class-metrics) as our metric, computed using `roc_curve()` and `roc_auc()` from the [yardstick package](https://yardstick.tidymodels.org/).\n\nTo generate a ROC curve, we need the predicted class probabilities for `late` and `on_time`, which we just calculated in the code chunk above. We can create the ROC curve with these values, using `roc_curve()` and then piping to the `autoplot()` method:\n\n```{r}\n#| label: \"roc-plot\"\nflights_aug %>% \n roc_curve(truth = arr_delay, .pred_late) %>% \n autoplot()\n```\n\nSimilarly, `roc_auc()` estimates the area under the curve:\n\n```{r}\n#| label: \"roc-auc\"\nflights_aug %>% \n roc_auc(truth = arr_delay, .pred_late)\n```\n\nNot too bad! We leave it to the reader to test out this workflow [*without*](https://workflows.tidymodels.org/reference/add_formula.html) this recipe. You can use `workflows::add_formula(arr_delay ~ .)` instead of `add_recipe()` (remember to remove the identification variables first!), and see whether our recipe improved our model's ability to predict late arrivals.\n\n```{r}\n#| eval: false\n#| include: false\nset.seed(555)\nflights_cens <- flight_data %>% \n select(-flight, -time_hour)\n\nflights_cens_split <- initial_split(flights_cens, prop = 3/4)\nflights_cens_train <- training(flights_cens_split)\nflights_cens_test <- testing(flights_cens_split)\n\nflights_wflow_raw <-\n workflow() %>% \n add_model(lr_mod) %>% \n add_formula(arr_delay ~ .)\n\nflights_fit_raw <- \n flights_wflow_raw %>% \n fit(data = flights_cens_train)\n\nflights_preds_raw <- \n predict(flights_fit_raw, \n flights_cens_test, \n type = \"prob\") %>% \n bind_cols(flights_cens_test %>% select(arr_delay)) \n\nflights_preds_raw %>% \n roc_auc(truth = arr_delay, .pred_late)\n```\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"css":["../styles.css"],"include-after-body":["../repo-actions-delete.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Preprocess your data with recipes","weight":2,"categories":["pre-processing","recipes","parsnip","workflows","yardstick","broom"],"description":"Prepare data for modeling with modular preprocessing steps.\n","toc-location":"body","toc-title":""},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/start/resampling/index.qmd.json b/.quarto/idx/start/resampling/index.qmd.json new file mode 100644 index 00000000..69cf497c --- /dev/null +++ b/.quarto/idx/start/resampling/index.qmd.json @@ -0,0 +1 @@ +{"title":"Evaluate your model with resampling","markdown":{"yaml":{"title":"Evaluate your model with resampling","weight":3,"categories":["resampling","rsample","parsnip","tune","workflows","yardstick"],"description":"Measure model performance by generating different versions of the training data through resampling.\n","toc-location":"body","toc-depth":2,"toc-title":"","css":"../styles.css","include-after-body":"../repo-actions-delete.html"},"headingText":"Introduction","headingAttr":{"id":"intro","classes":[],"keyvalue":[]},"containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(ranger)\nlibrary(modeldata)\nlibrary(kableExtra)\n\ndata(cells, package = \"modeldata\")\n\npkgs <- c(\"tidymodels\", \"ranger\", \"modeldata\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n\nSo far, we have [built a model](/start/models/) and [preprocessed data with a recipe](/start/recipes/). We also introduced [workflows](/start/recipes/#fit-workflow) as a way to bundle a [parsnip model](https://parsnip.tidymodels.org/) and [recipe](https://recipes.tidymodels.org/) together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on **resampling** statistics.\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) # for the rsample package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(modeldata) # for the cells data\n```\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n## The cell image data {#data}\n\nLet's use data from [Hill, LaPan, Li, and Haney (2007)](http://www.biomedcentral.com/1471-2105/8/340), available in the [modeldata package](https://cran.r-project.org/web/packages/modeldata/index.html), to predict cell image segmentation quality with resampling. To start, we load this data into R:\n\n```{r}\n#| label: \"cell-import\"\ndata(cells, package = \"modeldata\")\ncells\n```\n\nWe have data for `r nrow(cells)` cells, with `r ncol(cells)` variables. The main outcome variable of interest for us here is called `class`, which you can see is a factor. But before we jump into predicting the `class` variable, we need to understand it better. Below is a brief primer on cell image segmentation.\n\n### Predicting image segmentation quality\n\nSome biologists conduct experiments on cells. In drug discovery, a particular type of cell can be treated with either a drug or control and then observed to see what the effect is (if any). A common approach for this kind of measurement is cell imaging. Different parts of the cells can be colored so that the locations of a cell can be determined.\n\nFor example, in top panel of this image of five cells, the green color is meant to define the boundary of the cell (coloring something called the cytoskeleton) while the blue color defines the nucleus of the cell.\n\n```{r}\n#| label: \"cell-fig\"\n#| echo: false\n#| fig-align: center\n#| out-width: \"70%\"\nknitr::include_graphics(\"img/cells.png\")\n```\n\nUsing these colors, the cells in an image can be *segmented* so that we know which pixels belong to which cell. If this is done well, the cell can be measured in different ways that are important to the biology. Sometimes the shape of the cell matters and different mathematical tools are used to summarize characteristics like the size or \"oblongness\" of the cell.\n\nThe bottom panel shows some segmentation results. Cells 1 and 5 are fairly well segmented. However, cells 2 to 4 are bunched up together because the segmentation was not very good. The consequence of bad segmentation is data contamination; when the biologist analyzes the shape or size of these cells, the data are inaccurate and could lead to the wrong conclusion.\n\nA cell-based experiment might involve millions of cells so it is unfeasible to visually assess them all. Instead, a subsample can be created and these cells can be manually labeled by experts as either poorly segmented (`PS`) or well-segmented (`WS`). If we can predict these labels accurately, the larger data set can be improved by filtering out the cells most likely to be poorly segmented.\n\n### Back to the cells data\n\nThe `cells` data has `class` labels for `r nrow(cells)` cells --- each cell is labeled as either poorly segmented (`PS`) or well-segmented (`WS`). Each also has a total of `r ncol(cells) - 2` predictors based on automated image analysis measurements. For example, `avg_inten_ch_1` is the mean intensity of the data contained in the nucleus, `area_ch_1` is the total size of the cell, and so on (some predictors are fairly arcane in nature).\n\n```{r}\n#| label: \"cells-show\"\ncells\n```\n\nThe rates of the classes are somewhat imbalanced; there are more poorly segmented cells than well-segmented cells:\n\n```{r}\n#| label: \"rates\"\ncells %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n```\n\n## Data splitting {#data-split}\n\nIn our previous [*Preprocess your data with recipes*](/start/recipes/#data-split) article, we started by splitting our data. It is common when beginning a modeling project to [separate the data set](https://bookdown.org/max/FES/data-splitting.html) into two partitions:\n\n- The *training set* is used to estimate parameters, compare models and feature engineering techniques, tune models, etc.\n\n- The *test set* is held in reserve until the end of the project, at which point there should only be one or two models under serious consideration. It is used as an unbiased source for measuring final model performance.\n\nThere are different ways to create these partitions of the data. The most common approach is to use a random sample. Suppose that one quarter of the data were reserved for the test set. Random sampling would randomly select 25% for the test set and use the remainder for the training set. We can use the [rsample](https://rsample.tidymodels.org/) package for this purpose.\n\nSince random sampling uses random numbers, it is important to set the random number seed. This ensures that the random numbers can be reproduced at a later time (if needed).\n\nThe function `rsample::initial_split()` takes the original data and saves the information on how to make the partitions. In the original analysis, the authors made their own training/test set and that information is contained in the column `case`. To demonstrate how to make a split, we'll remove this column before we make our own split:\n\n```{r}\n#| label: \"cell-split\"\nset.seed(123)\ncell_split <- initial_split(cells %>% select(-case), \n strata = class)\n```\n\nHere we used the [`strata` argument](https://rsample.tidymodels.org/reference/initial_split.html), which conducts a stratified split. This ensures that, despite the imbalance we noticed in our `class` variable, our training and test data sets will keep roughly the same proportions of poorly and well-segmented cells as in the original data. After the `initial_split`, the `training()` and `testing()` functions return the actual data sets.\n\n```{r}\n#| label: \"cell-train-test\"\ncell_train <- training(cell_split)\ncell_test <- testing(cell_split)\n\nnrow(cell_train)\nnrow(cell_train)/nrow(cells)\n\n# training set proportions by class\ncell_train %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n\n# test set proportions by class\ncell_test %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n```\n\nThe majority of the modeling work is then conducted on the training set data.\n\n## Modeling\n\n[Random forest models](https://en.wikipedia.org/wiki/Random_forest) are [ensembles](https://en.wikipedia.org/wiki/Ensemble_learning) of [decision trees](https://en.wikipedia.org/wiki/Decision_tree). A large number of decision tree models are created for the ensemble based on slightly different versions of the training set. When creating the individual decision trees, the fitting process encourages them to be as diverse as possible. The collection of trees are combined into the random forest model and, when a new sample is predicted, the votes from each tree are used to calculate the final predicted value for the new sample. For categorical outcome variables like `class` in our `cells` data example, the majority vote across all the trees in the random forest determines the predicted class for the new sample.\n\nOne of the benefits of a random forest model is that it is very low maintenance; it requires very little preprocessing of the data and the default parameters tend to give reasonable results. For that reason, we won't create a recipe for the `cells` data.\n\nAt the same time, the number of trees in the ensemble should be large (in the thousands) and this makes the model moderately expensive to compute.\n\nTo fit a random forest model on the training set, let's use the [parsnip](https://parsnip.tidymodels.org/) package with the [ranger](https://cran.r-project.org/package=ranger) engine. We first define the model that we want to create:\n\n```{r}\n#| label: \"rf-def\"\nrf_mod <- \n rand_forest(trees = 1000) %>% \n set_engine(\"ranger\") %>% \n set_mode(\"classification\")\n```\n\nStarting with this parsnip model object, the `fit()` function can be used with a model formula. Since random forest models use random numbers, we again set the seed prior to computing:\n\n```{r}\n#| label: \"rf-fit\"\nset.seed(234)\nrf_fit <- \n rf_mod %>% \n fit(class ~ ., data = cell_train)\nrf_fit\n```\n\nThis new `rf_fit` object is our fitted model, trained on our training data set.\n\n## Estimating performance {#performance}\n\nDuring a modeling project, we might create a variety of different models. To choose between them, we need to consider how well these models do, as measured by some performance statistics. In our example in this article, some options we could use are:\n\n- the area under the Receiver Operating Characteristic (ROC) curve, and\n\n- overall classification accuracy.\n\nThe ROC curve uses the class probability estimates to give us a sense of performance across the entire set of potential probability cutoffs. Overall accuracy uses the hard class predictions to measure performance. The hard class predictions tell us whether our model predicted `PS` or `WS` for each cell. But, behind those predictions, the model is actually estimating a probability. A simple 50% probability cutoff is used to categorize a cell as poorly segmented.\n\nThe [yardstick package](https://yardstick.tidymodels.org/) has functions for computing both of these measures called `roc_auc()` and `accuracy()`.\n\nAt first glance, it might seem like a good idea to use the training set data to compute these statistics. (This is actually a very bad idea.) Let's see what happens if we try this. To evaluate performance based on the training set, we call the `predict()` method to get both types of predictions (i.e. probabilities and hard class predictions).\n\n```{r}\n#| label: \"rf-train-pred\"\nrf_training_pred <- \n predict(rf_fit, cell_train) %>% \n bind_cols(predict(rf_fit, cell_train, type = \"prob\")) %>% \n # Add the true outcome data back in\n bind_cols(cell_train %>% \n select(class))\n```\n\nUsing the yardstick functions, this model has spectacular results, so spectacular that you might be starting to get suspicious:\n\n```{r}\n#| label: \"rf-train-perf\"\nrf_training_pred %>% # training set predictions\n roc_auc(truth = class, .pred_PS)\nrf_training_pred %>% # training set predictions\n accuracy(truth = class, .pred_class)\n```\n\nNow that we have this model with exceptional performance, we proceed to the test set. Unfortunately, we discover that, although our results aren't bad, they are certainly worse than what we initially thought based on predicting the training set:\n\n```{r}\n#| label: \"rf-test\"\nrf_testing_pred <- \n predict(rf_fit, cell_test) %>% \n bind_cols(predict(rf_fit, cell_test, type = \"prob\")) %>% \n bind_cols(cell_test %>% select(class))\n```\n\n```{r}\n#| label: \"rf-test-perf\"\nrf_testing_pred %>% # test set predictions\n roc_auc(truth = class, .pred_PS)\nrf_testing_pred %>% # test set predictions\n accuracy(truth = class, .pred_class)\n```\n\n### What happened here?\n\nThere are several reasons why training set statistics like the ones shown in this section can be unrealistically optimistic:\n\n- Models like random forests, neural networks, and other black-box methods can essentially memorize the training set. Re-predicting that same set should always result in nearly perfect results.\n\n- The training set does not have the capacity to be a good arbiter of performance. It is not an independent piece of information; predicting the training set can only reflect what the model already knows.\n\nTo understand that second point better, think about an analogy from teaching. Suppose you give a class a test, then give them the answers, then provide the same test. The student scores on the *second* test do not accurately reflect what they know about the subject; these scores would probably be higher than their results on the first test.\n\n## Resampling to the rescue {#resampling}\n\nResampling methods, such as cross-validation and the bootstrap, are empirical simulation systems. They create a series of data sets similar to the training/testing split discussed previously; a subset of the data are used for creating the model and a different subset is used to measure performance. Resampling is always used with the *training set*. This schematic from [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/resampling.html) illustrates data usage for resampling methods:\n\n```{r}\n#| label: \"resampling-fig\"\n#| echo: false\n#| fig-align: center\n#| out-width: \"85%\"\nknitr::include_graphics(\"img/resampling.svg\")\n```\n\nIn the first level of this diagram, you see what happens when you use `rsample::initial_split()`, which splits the original data into training and test sets. Then, the training set is chosen for resampling, and the test set is held out.\n\nLet's use 10-fold cross-validation (CV) in this example. This method randomly allocates the `r nrow(cell_train)` cells in the training set to 10 groups of roughly equal size, called \"folds\". For the first iteration of resampling, the first fold of about `r floor(nrow(cell_train)/10)` cells are held out for the purpose of measuring performance. This is similar to a test set but, to avoid confusion, we call these data the *assessment set* in the tidymodels framework.\n\nThe other 90% of the data (about `r floor(nrow(cell_train) * .9)` cells) are used to fit the model. Again, this sounds similar to a training set, so in tidymodels we call this data the *analysis set*. This model, trained on the analysis set, is applied to the assessment set to generate predictions, and performance statistics are computed based on those predictions.\n\nIn this example, 10-fold CV moves iteratively through the folds and leaves a different 10% out each time for model assessment. At the end of this process, there are 10 sets of performance statistics that were created on 10 data sets that were not used in the modeling process. For the cell example, this means 10 accuracies and 10 areas under the ROC curve. While 10 models were created, these are not used further; we do not keep the models themselves trained on these folds because their only purpose is calculating performance metrics.\n\n```{r}\n#| label: \"rf-rs\"\n#| include: false\nset.seed(345)\nfolds <- vfold_cv(cell_train)\n\nrf_wf <- \n workflow() %>%\n add_model(rf_mod) %>%\n add_formula(class ~ .)\n\nset.seed(456)\nrf_fit_rs <- fit_resamples(rf_wf, folds)\n\nassessment_size <- \n folds %>% \n tidy() %>% \n group_by(Fold, Data) %>% \n count() %>% \n ungroup() %>% \n filter(Data == \"Assessment\") %>% \n select(`assessment size` = n, id = Fold)\n\nassessment_stats <- \n collect_metrics(rf_fit_rs, summarize = FALSE) %>%\n select(id, .estimate, .metric) %>%\n pivot_wider(\n id_cols = c(id),\n names_from = c(.metric),\n values_from = c(.estimate)\n ) %>%\n full_join(assessment_size, by = \"id\") %>% \n dplyr::rename(resample = id)\n\nrs_stats <- collect_metrics(rf_fit_rs)\n```\n\nThe final resampling estimates for the model are the **averages** of the performance statistics replicates. For example, suppose for our data the results were:\n\n```{r}\n#| label: \"rs-table\"\n#| echo: false\n#| results: asis\nassessment_stats %>% \n kable() %>% \n kable_styling(full_width = FALSE)\n```\n\nFrom these resampling statistics, the final estimate of performance for this random forest model would be `r round(rs_stats$mean[rs_stats$.metric == \"roc_auc\"], 3)` for the area under the ROC curve and `r round(rs_stats$mean[rs_stats$.metric == \"accuracy\"], 3)` for accuracy.\n\nThese resampling statistics are an effective method for measuring model performance *without* predicting the training set directly as a whole.\n\n## Fit a model with resampling {#fit-resamples}\n\nTo generate these results, the first step is to create a resampling object using rsample. There are [several resampling methods](https://rsample.tidymodels.org/reference/index.html#section-resampling-methods) implemented in rsample; cross-validation folds can be created using `vfold_cv()`:\n\n```{r}\n#| label: \"folds\"\nset.seed(345)\nfolds <- vfold_cv(cell_train, v = 10)\nfolds\n```\n\nThe list column for `splits` contains the information on which rows belong in the analysis and assessment sets. There are functions that can be used to extract the individual resampled data called `analysis()` and `assessment()`.\n\nHowever, the tune package contains high-level functions that can do the required computations to resample a model for the purpose of measuring performance. You have several options for building an object for resampling:\n\n- Resample a model specification preprocessed with a formula or [recipe](/start/recipes/), or\n\n- Resample a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and formula/recipe.\n\nFor this example, let's use a `workflow()` that bundles together the random forest model and a formula, since we are not using a recipe. Whichever of these options you use, the syntax to `fit_resamples()` is very similar to `fit()`:\n\n```{r}\n#| label: \"rs\"\n#| eval: false\nrf_wf <- \n workflow() %>%\n add_model(rf_mod) %>%\n add_formula(class ~ .)\n\nset.seed(456)\nrf_fit_rs <- \n rf_wf %>% \n fit_resamples(folds)\n```\n\n```{r}\n#| label: \"rs-show\"\nrf_fit_rs\n```\n\nThe results are similar to the `folds` results with some extra columns. The column `.metrics` contains the performance statistics created from the 10 assessment sets. These can be manually unnested but the tune package contains a number of simple functions that can extract these data:\n\n```{r}\n#| label: \"metrics\"\ncollect_metrics(rf_fit_rs)\n```\n\nThink about these values we now have for accuracy and AUC. These performance metrics are now more realistic (i.e. lower) than our ill-advised first attempt at computing performance metrics in the section above. If we wanted to try different model types for this data set, we could more confidently compare performance metrics computed using resampling to choose between models. Also, remember that at the end of our project, we return to our test set to estimate final model performance. We have looked at this once already before we started using resampling, but let's remind ourselves of the results:\n\n```{r}\n#| ref.label: \"rf-test-perf\"\n\n```\n\nThe performance metrics from the test set are much closer to the performance metrics computed using resampling than our first (\"bad idea\") attempt. Resampling allows us to simulate how well our model will perform on new data, and the test set acts as the final, unbiased check for our model's performance.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(ranger)\nlibrary(modeldata)\nlibrary(kableExtra)\n\ndata(cells, package = \"modeldata\")\n\npkgs <- c(\"tidymodels\", \"ranger\", \"modeldata\")\n\ntheme_set(theme_bw() + theme(legend.position = \"top\"))\n```\n\n## Introduction {#intro}\n\nSo far, we have [built a model](/start/models/) and [preprocessed data with a recipe](/start/recipes/). We also introduced [workflows](/start/recipes/#fit-workflow) as a way to bundle a [parsnip model](https://parsnip.tidymodels.org/) and [recipe](https://recipes.tidymodels.org/) together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on **resampling** statistics.\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) # for the rsample package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(modeldata) # for the cells data\n```\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n## The cell image data {#data}\n\nLet's use data from [Hill, LaPan, Li, and Haney (2007)](http://www.biomedcentral.com/1471-2105/8/340), available in the [modeldata package](https://cran.r-project.org/web/packages/modeldata/index.html), to predict cell image segmentation quality with resampling. To start, we load this data into R:\n\n```{r}\n#| label: \"cell-import\"\ndata(cells, package = \"modeldata\")\ncells\n```\n\nWe have data for `r nrow(cells)` cells, with `r ncol(cells)` variables. The main outcome variable of interest for us here is called `class`, which you can see is a factor. But before we jump into predicting the `class` variable, we need to understand it better. Below is a brief primer on cell image segmentation.\n\n### Predicting image segmentation quality\n\nSome biologists conduct experiments on cells. In drug discovery, a particular type of cell can be treated with either a drug or control and then observed to see what the effect is (if any). A common approach for this kind of measurement is cell imaging. Different parts of the cells can be colored so that the locations of a cell can be determined.\n\nFor example, in top panel of this image of five cells, the green color is meant to define the boundary of the cell (coloring something called the cytoskeleton) while the blue color defines the nucleus of the cell.\n\n```{r}\n#| label: \"cell-fig\"\n#| echo: false\n#| fig-align: center\n#| out-width: \"70%\"\nknitr::include_graphics(\"img/cells.png\")\n```\n\nUsing these colors, the cells in an image can be *segmented* so that we know which pixels belong to which cell. If this is done well, the cell can be measured in different ways that are important to the biology. Sometimes the shape of the cell matters and different mathematical tools are used to summarize characteristics like the size or \"oblongness\" of the cell.\n\nThe bottom panel shows some segmentation results. Cells 1 and 5 are fairly well segmented. However, cells 2 to 4 are bunched up together because the segmentation was not very good. The consequence of bad segmentation is data contamination; when the biologist analyzes the shape or size of these cells, the data are inaccurate and could lead to the wrong conclusion.\n\nA cell-based experiment might involve millions of cells so it is unfeasible to visually assess them all. Instead, a subsample can be created and these cells can be manually labeled by experts as either poorly segmented (`PS`) or well-segmented (`WS`). If we can predict these labels accurately, the larger data set can be improved by filtering out the cells most likely to be poorly segmented.\n\n### Back to the cells data\n\nThe `cells` data has `class` labels for `r nrow(cells)` cells --- each cell is labeled as either poorly segmented (`PS`) or well-segmented (`WS`). Each also has a total of `r ncol(cells) - 2` predictors based on automated image analysis measurements. For example, `avg_inten_ch_1` is the mean intensity of the data contained in the nucleus, `area_ch_1` is the total size of the cell, and so on (some predictors are fairly arcane in nature).\n\n```{r}\n#| label: \"cells-show\"\ncells\n```\n\nThe rates of the classes are somewhat imbalanced; there are more poorly segmented cells than well-segmented cells:\n\n```{r}\n#| label: \"rates\"\ncells %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n```\n\n## Data splitting {#data-split}\n\nIn our previous [*Preprocess your data with recipes*](/start/recipes/#data-split) article, we started by splitting our data. It is common when beginning a modeling project to [separate the data set](https://bookdown.org/max/FES/data-splitting.html) into two partitions:\n\n- The *training set* is used to estimate parameters, compare models and feature engineering techniques, tune models, etc.\n\n- The *test set* is held in reserve until the end of the project, at which point there should only be one or two models under serious consideration. It is used as an unbiased source for measuring final model performance.\n\nThere are different ways to create these partitions of the data. The most common approach is to use a random sample. Suppose that one quarter of the data were reserved for the test set. Random sampling would randomly select 25% for the test set and use the remainder for the training set. We can use the [rsample](https://rsample.tidymodels.org/) package for this purpose.\n\nSince random sampling uses random numbers, it is important to set the random number seed. This ensures that the random numbers can be reproduced at a later time (if needed).\n\nThe function `rsample::initial_split()` takes the original data and saves the information on how to make the partitions. In the original analysis, the authors made their own training/test set and that information is contained in the column `case`. To demonstrate how to make a split, we'll remove this column before we make our own split:\n\n```{r}\n#| label: \"cell-split\"\nset.seed(123)\ncell_split <- initial_split(cells %>% select(-case), \n strata = class)\n```\n\nHere we used the [`strata` argument](https://rsample.tidymodels.org/reference/initial_split.html), which conducts a stratified split. This ensures that, despite the imbalance we noticed in our `class` variable, our training and test data sets will keep roughly the same proportions of poorly and well-segmented cells as in the original data. After the `initial_split`, the `training()` and `testing()` functions return the actual data sets.\n\n```{r}\n#| label: \"cell-train-test\"\ncell_train <- training(cell_split)\ncell_test <- testing(cell_split)\n\nnrow(cell_train)\nnrow(cell_train)/nrow(cells)\n\n# training set proportions by class\ncell_train %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n\n# test set proportions by class\ncell_test %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n```\n\nThe majority of the modeling work is then conducted on the training set data.\n\n## Modeling\n\n[Random forest models](https://en.wikipedia.org/wiki/Random_forest) are [ensembles](https://en.wikipedia.org/wiki/Ensemble_learning) of [decision trees](https://en.wikipedia.org/wiki/Decision_tree). A large number of decision tree models are created for the ensemble based on slightly different versions of the training set. When creating the individual decision trees, the fitting process encourages them to be as diverse as possible. The collection of trees are combined into the random forest model and, when a new sample is predicted, the votes from each tree are used to calculate the final predicted value for the new sample. For categorical outcome variables like `class` in our `cells` data example, the majority vote across all the trees in the random forest determines the predicted class for the new sample.\n\nOne of the benefits of a random forest model is that it is very low maintenance; it requires very little preprocessing of the data and the default parameters tend to give reasonable results. For that reason, we won't create a recipe for the `cells` data.\n\nAt the same time, the number of trees in the ensemble should be large (in the thousands) and this makes the model moderately expensive to compute.\n\nTo fit a random forest model on the training set, let's use the [parsnip](https://parsnip.tidymodels.org/) package with the [ranger](https://cran.r-project.org/package=ranger) engine. We first define the model that we want to create:\n\n```{r}\n#| label: \"rf-def\"\nrf_mod <- \n rand_forest(trees = 1000) %>% \n set_engine(\"ranger\") %>% \n set_mode(\"classification\")\n```\n\nStarting with this parsnip model object, the `fit()` function can be used with a model formula. Since random forest models use random numbers, we again set the seed prior to computing:\n\n```{r}\n#| label: \"rf-fit\"\nset.seed(234)\nrf_fit <- \n rf_mod %>% \n fit(class ~ ., data = cell_train)\nrf_fit\n```\n\nThis new `rf_fit` object is our fitted model, trained on our training data set.\n\n## Estimating performance {#performance}\n\nDuring a modeling project, we might create a variety of different models. To choose between them, we need to consider how well these models do, as measured by some performance statistics. In our example in this article, some options we could use are:\n\n- the area under the Receiver Operating Characteristic (ROC) curve, and\n\n- overall classification accuracy.\n\nThe ROC curve uses the class probability estimates to give us a sense of performance across the entire set of potential probability cutoffs. Overall accuracy uses the hard class predictions to measure performance. The hard class predictions tell us whether our model predicted `PS` or `WS` for each cell. But, behind those predictions, the model is actually estimating a probability. A simple 50% probability cutoff is used to categorize a cell as poorly segmented.\n\nThe [yardstick package](https://yardstick.tidymodels.org/) has functions for computing both of these measures called `roc_auc()` and `accuracy()`.\n\nAt first glance, it might seem like a good idea to use the training set data to compute these statistics. (This is actually a very bad idea.) Let's see what happens if we try this. To evaluate performance based on the training set, we call the `predict()` method to get both types of predictions (i.e. probabilities and hard class predictions).\n\n```{r}\n#| label: \"rf-train-pred\"\nrf_training_pred <- \n predict(rf_fit, cell_train) %>% \n bind_cols(predict(rf_fit, cell_train, type = \"prob\")) %>% \n # Add the true outcome data back in\n bind_cols(cell_train %>% \n select(class))\n```\n\nUsing the yardstick functions, this model has spectacular results, so spectacular that you might be starting to get suspicious:\n\n```{r}\n#| label: \"rf-train-perf\"\nrf_training_pred %>% # training set predictions\n roc_auc(truth = class, .pred_PS)\nrf_training_pred %>% # training set predictions\n accuracy(truth = class, .pred_class)\n```\n\nNow that we have this model with exceptional performance, we proceed to the test set. Unfortunately, we discover that, although our results aren't bad, they are certainly worse than what we initially thought based on predicting the training set:\n\n```{r}\n#| label: \"rf-test\"\nrf_testing_pred <- \n predict(rf_fit, cell_test) %>% \n bind_cols(predict(rf_fit, cell_test, type = \"prob\")) %>% \n bind_cols(cell_test %>% select(class))\n```\n\n```{r}\n#| label: \"rf-test-perf\"\nrf_testing_pred %>% # test set predictions\n roc_auc(truth = class, .pred_PS)\nrf_testing_pred %>% # test set predictions\n accuracy(truth = class, .pred_class)\n```\n\n### What happened here?\n\nThere are several reasons why training set statistics like the ones shown in this section can be unrealistically optimistic:\n\n- Models like random forests, neural networks, and other black-box methods can essentially memorize the training set. Re-predicting that same set should always result in nearly perfect results.\n\n- The training set does not have the capacity to be a good arbiter of performance. It is not an independent piece of information; predicting the training set can only reflect what the model already knows.\n\nTo understand that second point better, think about an analogy from teaching. Suppose you give a class a test, then give them the answers, then provide the same test. The student scores on the *second* test do not accurately reflect what they know about the subject; these scores would probably be higher than their results on the first test.\n\n## Resampling to the rescue {#resampling}\n\nResampling methods, such as cross-validation and the bootstrap, are empirical simulation systems. They create a series of data sets similar to the training/testing split discussed previously; a subset of the data are used for creating the model and a different subset is used to measure performance. Resampling is always used with the *training set*. This schematic from [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/resampling.html) illustrates data usage for resampling methods:\n\n```{r}\n#| label: \"resampling-fig\"\n#| echo: false\n#| fig-align: center\n#| out-width: \"85%\"\nknitr::include_graphics(\"img/resampling.svg\")\n```\n\nIn the first level of this diagram, you see what happens when you use `rsample::initial_split()`, which splits the original data into training and test sets. Then, the training set is chosen for resampling, and the test set is held out.\n\nLet's use 10-fold cross-validation (CV) in this example. This method randomly allocates the `r nrow(cell_train)` cells in the training set to 10 groups of roughly equal size, called \"folds\". For the first iteration of resampling, the first fold of about `r floor(nrow(cell_train)/10)` cells are held out for the purpose of measuring performance. This is similar to a test set but, to avoid confusion, we call these data the *assessment set* in the tidymodels framework.\n\nThe other 90% of the data (about `r floor(nrow(cell_train) * .9)` cells) are used to fit the model. Again, this sounds similar to a training set, so in tidymodels we call this data the *analysis set*. This model, trained on the analysis set, is applied to the assessment set to generate predictions, and performance statistics are computed based on those predictions.\n\nIn this example, 10-fold CV moves iteratively through the folds and leaves a different 10% out each time for model assessment. At the end of this process, there are 10 sets of performance statistics that were created on 10 data sets that were not used in the modeling process. For the cell example, this means 10 accuracies and 10 areas under the ROC curve. While 10 models were created, these are not used further; we do not keep the models themselves trained on these folds because their only purpose is calculating performance metrics.\n\n```{r}\n#| label: \"rf-rs\"\n#| include: false\nset.seed(345)\nfolds <- vfold_cv(cell_train)\n\nrf_wf <- \n workflow() %>%\n add_model(rf_mod) %>%\n add_formula(class ~ .)\n\nset.seed(456)\nrf_fit_rs <- fit_resamples(rf_wf, folds)\n\nassessment_size <- \n folds %>% \n tidy() %>% \n group_by(Fold, Data) %>% \n count() %>% \n ungroup() %>% \n filter(Data == \"Assessment\") %>% \n select(`assessment size` = n, id = Fold)\n\nassessment_stats <- \n collect_metrics(rf_fit_rs, summarize = FALSE) %>%\n select(id, .estimate, .metric) %>%\n pivot_wider(\n id_cols = c(id),\n names_from = c(.metric),\n values_from = c(.estimate)\n ) %>%\n full_join(assessment_size, by = \"id\") %>% \n dplyr::rename(resample = id)\n\nrs_stats <- collect_metrics(rf_fit_rs)\n```\n\nThe final resampling estimates for the model are the **averages** of the performance statistics replicates. For example, suppose for our data the results were:\n\n```{r}\n#| label: \"rs-table\"\n#| echo: false\n#| results: asis\nassessment_stats %>% \n kable() %>% \n kable_styling(full_width = FALSE)\n```\n\nFrom these resampling statistics, the final estimate of performance for this random forest model would be `r round(rs_stats$mean[rs_stats$.metric == \"roc_auc\"], 3)` for the area under the ROC curve and `r round(rs_stats$mean[rs_stats$.metric == \"accuracy\"], 3)` for accuracy.\n\nThese resampling statistics are an effective method for measuring model performance *without* predicting the training set directly as a whole.\n\n## Fit a model with resampling {#fit-resamples}\n\nTo generate these results, the first step is to create a resampling object using rsample. There are [several resampling methods](https://rsample.tidymodels.org/reference/index.html#section-resampling-methods) implemented in rsample; cross-validation folds can be created using `vfold_cv()`:\n\n```{r}\n#| label: \"folds\"\nset.seed(345)\nfolds <- vfold_cv(cell_train, v = 10)\nfolds\n```\n\nThe list column for `splits` contains the information on which rows belong in the analysis and assessment sets. There are functions that can be used to extract the individual resampled data called `analysis()` and `assessment()`.\n\nHowever, the tune package contains high-level functions that can do the required computations to resample a model for the purpose of measuring performance. You have several options for building an object for resampling:\n\n- Resample a model specification preprocessed with a formula or [recipe](/start/recipes/), or\n\n- Resample a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and formula/recipe.\n\nFor this example, let's use a `workflow()` that bundles together the random forest model and a formula, since we are not using a recipe. Whichever of these options you use, the syntax to `fit_resamples()` is very similar to `fit()`:\n\n```{r}\n#| label: \"rs\"\n#| eval: false\nrf_wf <- \n workflow() %>%\n add_model(rf_mod) %>%\n add_formula(class ~ .)\n\nset.seed(456)\nrf_fit_rs <- \n rf_wf %>% \n fit_resamples(folds)\n```\n\n```{r}\n#| label: \"rs-show\"\nrf_fit_rs\n```\n\nThe results are similar to the `folds` results with some extra columns. The column `.metrics` contains the performance statistics created from the 10 assessment sets. These can be manually unnested but the tune package contains a number of simple functions that can extract these data:\n\n```{r}\n#| label: \"metrics\"\ncollect_metrics(rf_fit_rs)\n```\n\nThink about these values we now have for accuracy and AUC. These performance metrics are now more realistic (i.e. lower) than our ill-advised first attempt at computing performance metrics in the section above. If we wanted to try different model types for this data set, we could more confidently compare performance metrics computed using resampling to choose between models. Also, remember that at the end of our project, we return to our test set to estimate final model performance. We have looked at this once already before we started using resampling, but let's remind ourselves of the results:\n\n```{r}\n#| ref.label: \"rf-test-perf\"\n\n```\n\nThe performance metrics from the test set are much closer to the performance metrics computed using resampling than our first (\"bad idea\") attempt. Resampling allows us to simulate how well our model will perform on new data, and the test set acts as the final, unbiased check for our model's performance.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"css":["../styles.css"],"include-after-body":["../repo-actions-delete.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Evaluate your model with resampling","weight":3,"categories":["resampling","rsample","parsnip","tune","workflows","yardstick"],"description":"Measure model performance by generating different versions of the training data through resampling.\n","toc-location":"body","toc-title":""},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/start/tuning/index.qmd.json b/.quarto/idx/start/tuning/index.qmd.json new file mode 100644 index 00000000..ae7157e2 --- /dev/null +++ b/.quarto/idx/start/tuning/index.qmd.json @@ -0,0 +1 @@ +{"title":"Tune model parameters","markdown":{"yaml":{"title":"Tune model parameters","weight":4,"categories":["tuning","rsample","parsnip","tune","dials","workflows","yardstick"],"description":"Estimate the best values for hyperparameters that cannot be learned directly during model training.\n","toc-location":"body","toc-depth":2,"toc-title":"","css":"../styles.css","include-after-body":"../repo-actions-delete.html"},"headingText":"Introduction","headingAttr":{"id":"intro","classes":[],"keyvalue":[]},"containsRefs":false,"markdown":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(rpart)\nlibrary(rpart.plot)\nlibrary(kableExtra)\nlibrary(vip)\ntheme_set(theme_bw())\ndoParallel::registerDoParallel()\npkgs <- c(\"tidymodels\", \"rpart\", \"rpart.plot\", \"vip\")\n```\n\n\nSome model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called **hyperparameters**. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this `mtry` in tidymodels) or the learning rate in a boosted tree model (we call this `learn_rate`). Instead of learning these kinds of hyperparameters during model training, we can *estimate* the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called **tuning**.\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) # for the tune package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(rpart.plot) # for visualizing a decision tree\nlibrary(vip) # for variable importance plots\n```\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n## The cell image data, revisited {#data}\n\nIn our previous [*Evaluate your model with resampling*](/start/resampling/) article, we introduced a data set of images of cells that were labeled by experts as well-segmented (`WS`) or poorly segmented (`PS`). We trained a [random forest model](/start/resampling/#modeling) to predict which images are segmented well vs. poorly, so that a biologist could filter out poorly segmented cell images in their analysis. We used [resampling](/start/resampling/#resampling) to estimate the performance of our model on this data.\n\n```{r}\n#| label: \"cell-import\"\ndata(cells, package = \"modeldata\")\ncells\n```\n\n## Predicting image segmentation, but better {#why-tune}\n\nRandom forest models are a tree-based ensemble method, and typically perform well with [default hyperparameters](https://bradleyboehmke.github.io/HOML/random-forest.html#out-of-the-box-performance). However, the accuracy of some other tree-based models, such as [boosted tree models](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) or [decision tree models](https://en.wikipedia.org/wiki/Decision_tree), can be sensitive to the values of hyperparameters. In this article, we will train a **decision tree** model. There are several hyperparameters for decision tree models that can be tuned for better performance. Let's explore:\n\n- the complexity parameter (which we call `cost_complexity` in tidymodels) for the tree, and\n- the maximum `tree_depth`.\n\nTuning these hyperparameters can improve model performance because decision tree models are prone to [overfitting](https://bookdown.org/max/FES/important-concepts.html#overfitting). This happens because single tree models tend to fit the training data *too well* --- so well, in fact, that they over-learn patterns present in the training data that end up being detrimental when predicting new data.\n\nWe will tune the model hyperparameters to avoid overfitting. Tuning the value of `cost_complexity` helps by [pruning](https://bradleyboehmke.github.io/HOML/DT.html#pruning) back our tree. It adds a cost, or penalty, to error rates of more complex trees; a cost closer to zero decreases the number tree nodes pruned and is more likely to result in an overfit tree. However, a high cost increases the number of tree nodes pruned and can result in the opposite problem---an underfit tree. Tuning `tree_depth`, on the other hand, helps by [stopping](https://bradleyboehmke.github.io/HOML/DT.html#early-stopping) our tree from growing after it reaches a certain depth. We want to tune these hyperparameters to find what those two values should be for our model to do the best job predicting image segmentation.\n\nBefore we start the tuning process, we split our data into training and testing sets, just like when we trained the model with one default set of hyperparameters. As [before](/start/resampling/), we can use `strata = class` if we want our training and testing sets to be created using stratified sampling so that both have the same proportion of both kinds of segmentation.\n\n```{r}\n#| label: \"cell-split\"\nset.seed(123)\ncell_split <- initial_split(cells %>% select(-case), \n strata = class)\ncell_train <- training(cell_split)\ncell_test <- testing(cell_split)\n```\n\nWe use the training data for tuning the model.\n\n## Tuning hyperparameters {#tuning}\n\nLet's start with the parsnip package, using a [`decision_tree()`](https://parsnip.tidymodels.org/reference/decision_tree.html) model with the [rpart](https://cran.r-project.org/web/packages/rpart/index.html) engine. To tune the decision tree hyperparameters `cost_complexity` and `tree_depth`, we create a model specification that identifies which hyperparameters we plan to tune.\n\n```{r}\n#| label: \"tune-spec\"\ntune_spec <- \n decision_tree(\n cost_complexity = tune(),\n tree_depth = tune()\n ) %>% \n set_engine(\"rpart\") %>% \n set_mode(\"classification\")\n\ntune_spec\n```\n\nThink of `tune()` here as a placeholder. After the tuning process, we will select a single numeric value for each of these hyperparameters. For now, we specify our parsnip model object and identify the hyperparameters we will `tune()`.\n\nWe can't train this specification on a single data set (such as the entire training set) and learn what the hyperparameter values should be, but we *can* train many models using resampled data and see which models turn out best. We can create a regular grid of values to try using some convenience functions for each hyperparameter:\n\n```{r}\n#| label: \"tree-grid\"\ntree_grid <- grid_regular(cost_complexity(),\n tree_depth(),\n levels = 5)\n```\n\nThe function [`grid_regular()`](https://dials.tidymodels.org/reference/grid_regular.html) is from the [dials](https://dials.tidymodels.org/) package. It chooses sensible values to try for each hyperparameter; here, we asked for 5 of each. Since we have two to tune, `grid_regular()` returns 5 $\\times$ 5 = 25 different possible tuning combinations to try in a tidy tibble format.\n\n```{r}\n#| label: \"tree-grid-tibble\"\ntree_grid\n```\n\nHere, you can see all 5 values of `cost_complexity` ranging up to `r max(tree_grid$cost_complexity)`. These values get repeated for each of the 5 values of `tree_depth`:\n\n```{r}\ntree_grid %>% \n count(tree_depth)\n```\n\nArmed with our grid filled with 25 candidate decision tree models, let's create [cross-validation folds](/start/resampling/) for tuning:\n\n```{r}\n#| label: \"cell-folds\"\n#| dependson: \"cell-split\"\nset.seed(234)\ncell_folds <- vfold_cv(cell_train)\n```\n\nTuning in tidymodels requires a resampled object created with the [rsample](https://rsample.tidymodels.org/) package.\n\n## Model tuning with a grid {#tune-grid}\n\nWe are ready to tune! Let's use [`tune_grid()`](https://tune.tidymodels.org/reference/tune_grid.html) to fit models at all the different values we chose for each tuned hyperparameter. There are several options for building the object for tuning:\n\n- Tune a model specification along with a recipe or model, or\n\n- Tune a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and a recipe or model preprocessor.\n\nHere we use a `workflow()` with a straightforward formula; if this model required more involved data preprocessing, we could use `add_recipe()` instead of `add_formula()`.\n\n```{r}\n#| label: \"tree-res\"\n#| dependson: c(\"tune-spec\", \"cell-folds\", \"tree-grid\")\n#| message: false\nset.seed(345)\n\ntree_wf <- workflow() %>%\n add_model(tune_spec) %>%\n add_formula(class ~ .)\n\ntree_res <- \n tree_wf %>% \n tune_grid(\n resamples = cell_folds,\n grid = tree_grid\n )\n\ntree_res\n```\n\nOnce we have our tuning results, we can both explore them through visualization and then select the best result. The function `collect_metrics()` gives us a tidy tibble with all the results. We had 25 candidate models and two metrics, `accuracy` and `roc_auc`, and we get a row for each `.metric` and model.\n\n```{r}\n#| label: \"collect-trees\"\n#| dependson: \"tree-res\"\ntree_res %>% \n collect_metrics()\n```\n\nWe might get more out of plotting these results:\n\n```{r}\n#| label: \"best-tree\"\n#| dependson: \"tree-res\"\n#| fig-width: 8\n#| fig-height: 7\ntree_res %>%\n collect_metrics() %>%\n mutate(tree_depth = factor(tree_depth)) %>%\n ggplot(aes(cost_complexity, mean, color = tree_depth)) +\n geom_line(size = 1.5, alpha = 0.6) +\n geom_point(size = 2) +\n facet_wrap(~ .metric, scales = \"free\", nrow = 2) +\n scale_x_log10(labels = scales::label_number()) +\n scale_color_viridis_d(option = \"plasma\", begin = .9, end = 0)\n```\n\nWe can see that our \"stubbiest\" tree, with a depth of `r min(tree_grid$tree_depth)`, is the worst model according to both metrics and across all candidate values of `cost_complexity`. Our deepest tree, with a depth of `r max(tree_grid$tree_depth)`, did better. However, the best tree seems to be between these values with a tree depth of 4. The [`show_best()`](https://tune.tidymodels.org/reference/show_best.html) function shows us the top 5 candidate models by default:\n\n```{r}\n#| label: \"show-best-tree\"\n#| dependson: \"tree-res\"\ntree_res %>%\n show_best(\"accuracy\")\n```\n\nWe can also use the [`select_best()`](https://tune.tidymodels.org/reference/show_best.html) function to pull out the single set of hyperparameter values for our best decision tree model:\n\n```{r}\n#| label: \"select-best-tree\"\n#| dependson: \"tree-res\"\nbest_tree <- tree_res %>%\n select_best(\"accuracy\")\n\nbest_tree\n```\n\nThese are the values for `tree_depth` and `cost_complexity` that maximize accuracy in this data set of cell images.\n\n## Finalizing our model {#final-model}\n\nWe can update (or \"finalize\") our workflow object `tree_wf` with the values from `select_best()`.\n\n```{r}\n#| label: \"final-wf\"\n#| dependson: \"best-tree\"\nfinal_wf <- \n tree_wf %>% \n finalize_workflow(best_tree)\n\nfinal_wf\n```\n\nOur tuning is done!\n\n### The last fit\n\nFinally, let's fit this final model to the training data and use our test data to estimate the model performance we expect to see with new data. We can use the function [`last_fit()`](https://tune.tidymodels.org/reference/last_fit.html) with our finalized model; this function *fits* the finalized model on the full training data set and *evaluates* the finalized model on the testing data.\n\n```{r}\n#| label: \"last-fit\"\n#| dependson: c(\"final-wf\", \"cell-split\")\nfinal_fit <- \n final_wf %>%\n last_fit(cell_split) \n\nfinal_fit %>%\n collect_metrics()\n\nfinal_fit %>%\n collect_predictions() %>% \n roc_curve(class, .pred_PS) %>% \n autoplot()\n```\n\nThe performance metrics from the test set indicate that we did not overfit during our tuning procedure.\n\nThe `final_fit` object contains a finalized, fitted workflow that you can use for predicting on new data or further understanding the results. You may want to extract this object, using [one of the `extract_` helper functions](https://tune.tidymodels.org/reference/extract-tune.html).\n\n```{r}\n#| label: \"last-fit-wf\"\n#| dependson: \"last-fit\"\nfinal_tree <- extract_workflow(final_fit)\nfinal_tree\n```\n\nWe can create a visualization of the decision tree using another helper function to extract the underlying engine-specific fit.\n\n```{r}\n#| label: \"rpart-plot\"\n#| dependson: \"last-fit-wf\"\n#| fig-width: 8\n#| fig-height: 5\nfinal_tree %>%\n extract_fit_engine() %>%\n rpart.plot(roundint = FALSE)\n```\n\nPerhaps we would also like to understand what variables are important in this final model. We can use the [vip](https://koalaverse.github.io/vip/) package to estimate variable importance [based on the model's structure](https://koalaverse.github.io/vip/reference/vi_model.html#details).\n\n```{r}\n#| label: \"vip\"\n#| dependson: \"final-tree\"\n#| fig-width: 6\n#| fig-height: 5\nlibrary(vip)\n\nfinal_tree %>% \n extract_fit_parsnip() %>% \n vip()\n```\n\nThese are the automated image analysis measurements that are the most important in driving segmentation quality predictions.\n\nWe leave it to the reader to explore whether you can tune a different decision tree hyperparameter. You can explore the [reference docs](/find/parsnip/#models), or use the `args()` function to see which parsnip object arguments are available:\n\n```{r}\nargs(decision_tree)\n```\n\nYou could tune the other hyperparameter we didn't use here, `min_n`, which sets the minimum `n` to split at any node. This is another early stopping method for decision trees that can help prevent overfitting. Use this [searchable table](/find/parsnip/#model-args) to find the original argument for `min_n` in the rpart package ([hint](https://stat.ethz.ch/R-manual/R-devel/library/rpart/html/rpart.control.html)). See whether you can tune a different combination of hyperparameters and/or values to improve a tree's ability to predict cell segmentation quality.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n","srcMarkdownNoYaml":"\n\n```{r}\n#| label: \"setup\"\n#| include: false\n#| message: false\n#| warning: false\nsource(here::here(\"common.R\"))\n```\n\n```{r}\n#| label: \"load\"\n#| include: false\n#| message: false\n#| warning: false\nlibrary(tidymodels)\nlibrary(rpart)\nlibrary(rpart.plot)\nlibrary(kableExtra)\nlibrary(vip)\ntheme_set(theme_bw())\ndoParallel::registerDoParallel()\npkgs <- c(\"tidymodels\", \"rpart\", \"rpart.plot\", \"vip\")\n```\n\n## Introduction {#intro}\n\nSome model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called **hyperparameters**. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this `mtry` in tidymodels) or the learning rate in a boosted tree model (we call this `learn_rate`). Instead of learning these kinds of hyperparameters during model training, we can *estimate* the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called **tuning**.\n\n`r article_req_pkgs(pkgs)`\n\n```{r}\n#| eval: false\nlibrary(tidymodels) # for the tune package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(rpart.plot) # for visualizing a decision tree\nlibrary(vip) # for variable importance plots\n```\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n## The cell image data, revisited {#data}\n\nIn our previous [*Evaluate your model with resampling*](/start/resampling/) article, we introduced a data set of images of cells that were labeled by experts as well-segmented (`WS`) or poorly segmented (`PS`). We trained a [random forest model](/start/resampling/#modeling) to predict which images are segmented well vs. poorly, so that a biologist could filter out poorly segmented cell images in their analysis. We used [resampling](/start/resampling/#resampling) to estimate the performance of our model on this data.\n\n```{r}\n#| label: \"cell-import\"\ndata(cells, package = \"modeldata\")\ncells\n```\n\n## Predicting image segmentation, but better {#why-tune}\n\nRandom forest models are a tree-based ensemble method, and typically perform well with [default hyperparameters](https://bradleyboehmke.github.io/HOML/random-forest.html#out-of-the-box-performance). However, the accuracy of some other tree-based models, such as [boosted tree models](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) or [decision tree models](https://en.wikipedia.org/wiki/Decision_tree), can be sensitive to the values of hyperparameters. In this article, we will train a **decision tree** model. There are several hyperparameters for decision tree models that can be tuned for better performance. Let's explore:\n\n- the complexity parameter (which we call `cost_complexity` in tidymodels) for the tree, and\n- the maximum `tree_depth`.\n\nTuning these hyperparameters can improve model performance because decision tree models are prone to [overfitting](https://bookdown.org/max/FES/important-concepts.html#overfitting). This happens because single tree models tend to fit the training data *too well* --- so well, in fact, that they over-learn patterns present in the training data that end up being detrimental when predicting new data.\n\nWe will tune the model hyperparameters to avoid overfitting. Tuning the value of `cost_complexity` helps by [pruning](https://bradleyboehmke.github.io/HOML/DT.html#pruning) back our tree. It adds a cost, or penalty, to error rates of more complex trees; a cost closer to zero decreases the number tree nodes pruned and is more likely to result in an overfit tree. However, a high cost increases the number of tree nodes pruned and can result in the opposite problem---an underfit tree. Tuning `tree_depth`, on the other hand, helps by [stopping](https://bradleyboehmke.github.io/HOML/DT.html#early-stopping) our tree from growing after it reaches a certain depth. We want to tune these hyperparameters to find what those two values should be for our model to do the best job predicting image segmentation.\n\nBefore we start the tuning process, we split our data into training and testing sets, just like when we trained the model with one default set of hyperparameters. As [before](/start/resampling/), we can use `strata = class` if we want our training and testing sets to be created using stratified sampling so that both have the same proportion of both kinds of segmentation.\n\n```{r}\n#| label: \"cell-split\"\nset.seed(123)\ncell_split <- initial_split(cells %>% select(-case), \n strata = class)\ncell_train <- training(cell_split)\ncell_test <- testing(cell_split)\n```\n\nWe use the training data for tuning the model.\n\n## Tuning hyperparameters {#tuning}\n\nLet's start with the parsnip package, using a [`decision_tree()`](https://parsnip.tidymodels.org/reference/decision_tree.html) model with the [rpart](https://cran.r-project.org/web/packages/rpart/index.html) engine. To tune the decision tree hyperparameters `cost_complexity` and `tree_depth`, we create a model specification that identifies which hyperparameters we plan to tune.\n\n```{r}\n#| label: \"tune-spec\"\ntune_spec <- \n decision_tree(\n cost_complexity = tune(),\n tree_depth = tune()\n ) %>% \n set_engine(\"rpart\") %>% \n set_mode(\"classification\")\n\ntune_spec\n```\n\nThink of `tune()` here as a placeholder. After the tuning process, we will select a single numeric value for each of these hyperparameters. For now, we specify our parsnip model object and identify the hyperparameters we will `tune()`.\n\nWe can't train this specification on a single data set (such as the entire training set) and learn what the hyperparameter values should be, but we *can* train many models using resampled data and see which models turn out best. We can create a regular grid of values to try using some convenience functions for each hyperparameter:\n\n```{r}\n#| label: \"tree-grid\"\ntree_grid <- grid_regular(cost_complexity(),\n tree_depth(),\n levels = 5)\n```\n\nThe function [`grid_regular()`](https://dials.tidymodels.org/reference/grid_regular.html) is from the [dials](https://dials.tidymodels.org/) package. It chooses sensible values to try for each hyperparameter; here, we asked for 5 of each. Since we have two to tune, `grid_regular()` returns 5 $\\times$ 5 = 25 different possible tuning combinations to try in a tidy tibble format.\n\n```{r}\n#| label: \"tree-grid-tibble\"\ntree_grid\n```\n\nHere, you can see all 5 values of `cost_complexity` ranging up to `r max(tree_grid$cost_complexity)`. These values get repeated for each of the 5 values of `tree_depth`:\n\n```{r}\ntree_grid %>% \n count(tree_depth)\n```\n\nArmed with our grid filled with 25 candidate decision tree models, let's create [cross-validation folds](/start/resampling/) for tuning:\n\n```{r}\n#| label: \"cell-folds\"\n#| dependson: \"cell-split\"\nset.seed(234)\ncell_folds <- vfold_cv(cell_train)\n```\n\nTuning in tidymodels requires a resampled object created with the [rsample](https://rsample.tidymodels.org/) package.\n\n## Model tuning with a grid {#tune-grid}\n\nWe are ready to tune! Let's use [`tune_grid()`](https://tune.tidymodels.org/reference/tune_grid.html) to fit models at all the different values we chose for each tuned hyperparameter. There are several options for building the object for tuning:\n\n- Tune a model specification along with a recipe or model, or\n\n- Tune a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and a recipe or model preprocessor.\n\nHere we use a `workflow()` with a straightforward formula; if this model required more involved data preprocessing, we could use `add_recipe()` instead of `add_formula()`.\n\n```{r}\n#| label: \"tree-res\"\n#| dependson: c(\"tune-spec\", \"cell-folds\", \"tree-grid\")\n#| message: false\nset.seed(345)\n\ntree_wf <- workflow() %>%\n add_model(tune_spec) %>%\n add_formula(class ~ .)\n\ntree_res <- \n tree_wf %>% \n tune_grid(\n resamples = cell_folds,\n grid = tree_grid\n )\n\ntree_res\n```\n\nOnce we have our tuning results, we can both explore them through visualization and then select the best result. The function `collect_metrics()` gives us a tidy tibble with all the results. We had 25 candidate models and two metrics, `accuracy` and `roc_auc`, and we get a row for each `.metric` and model.\n\n```{r}\n#| label: \"collect-trees\"\n#| dependson: \"tree-res\"\ntree_res %>% \n collect_metrics()\n```\n\nWe might get more out of plotting these results:\n\n```{r}\n#| label: \"best-tree\"\n#| dependson: \"tree-res\"\n#| fig-width: 8\n#| fig-height: 7\ntree_res %>%\n collect_metrics() %>%\n mutate(tree_depth = factor(tree_depth)) %>%\n ggplot(aes(cost_complexity, mean, color = tree_depth)) +\n geom_line(size = 1.5, alpha = 0.6) +\n geom_point(size = 2) +\n facet_wrap(~ .metric, scales = \"free\", nrow = 2) +\n scale_x_log10(labels = scales::label_number()) +\n scale_color_viridis_d(option = \"plasma\", begin = .9, end = 0)\n```\n\nWe can see that our \"stubbiest\" tree, with a depth of `r min(tree_grid$tree_depth)`, is the worst model according to both metrics and across all candidate values of `cost_complexity`. Our deepest tree, with a depth of `r max(tree_grid$tree_depth)`, did better. However, the best tree seems to be between these values with a tree depth of 4. The [`show_best()`](https://tune.tidymodels.org/reference/show_best.html) function shows us the top 5 candidate models by default:\n\n```{r}\n#| label: \"show-best-tree\"\n#| dependson: \"tree-res\"\ntree_res %>%\n show_best(\"accuracy\")\n```\n\nWe can also use the [`select_best()`](https://tune.tidymodels.org/reference/show_best.html) function to pull out the single set of hyperparameter values for our best decision tree model:\n\n```{r}\n#| label: \"select-best-tree\"\n#| dependson: \"tree-res\"\nbest_tree <- tree_res %>%\n select_best(\"accuracy\")\n\nbest_tree\n```\n\nThese are the values for `tree_depth` and `cost_complexity` that maximize accuracy in this data set of cell images.\n\n## Finalizing our model {#final-model}\n\nWe can update (or \"finalize\") our workflow object `tree_wf` with the values from `select_best()`.\n\n```{r}\n#| label: \"final-wf\"\n#| dependson: \"best-tree\"\nfinal_wf <- \n tree_wf %>% \n finalize_workflow(best_tree)\n\nfinal_wf\n```\n\nOur tuning is done!\n\n### The last fit\n\nFinally, let's fit this final model to the training data and use our test data to estimate the model performance we expect to see with new data. We can use the function [`last_fit()`](https://tune.tidymodels.org/reference/last_fit.html) with our finalized model; this function *fits* the finalized model on the full training data set and *evaluates* the finalized model on the testing data.\n\n```{r}\n#| label: \"last-fit\"\n#| dependson: c(\"final-wf\", \"cell-split\")\nfinal_fit <- \n final_wf %>%\n last_fit(cell_split) \n\nfinal_fit %>%\n collect_metrics()\n\nfinal_fit %>%\n collect_predictions() %>% \n roc_curve(class, .pred_PS) %>% \n autoplot()\n```\n\nThe performance metrics from the test set indicate that we did not overfit during our tuning procedure.\n\nThe `final_fit` object contains a finalized, fitted workflow that you can use for predicting on new data or further understanding the results. You may want to extract this object, using [one of the `extract_` helper functions](https://tune.tidymodels.org/reference/extract-tune.html).\n\n```{r}\n#| label: \"last-fit-wf\"\n#| dependson: \"last-fit\"\nfinal_tree <- extract_workflow(final_fit)\nfinal_tree\n```\n\nWe can create a visualization of the decision tree using another helper function to extract the underlying engine-specific fit.\n\n```{r}\n#| label: \"rpart-plot\"\n#| dependson: \"last-fit-wf\"\n#| fig-width: 8\n#| fig-height: 5\nfinal_tree %>%\n extract_fit_engine() %>%\n rpart.plot(roundint = FALSE)\n```\n\nPerhaps we would also like to understand what variables are important in this final model. We can use the [vip](https://koalaverse.github.io/vip/) package to estimate variable importance [based on the model's structure](https://koalaverse.github.io/vip/reference/vi_model.html#details).\n\n```{r}\n#| label: \"vip\"\n#| dependson: \"final-tree\"\n#| fig-width: 6\n#| fig-height: 5\nlibrary(vip)\n\nfinal_tree %>% \n extract_fit_parsnip() %>% \n vip()\n```\n\nThese are the automated image analysis measurements that are the most important in driving segmentation quality predictions.\n\nWe leave it to the reader to explore whether you can tune a different decision tree hyperparameter. You can explore the [reference docs](/find/parsnip/#models), or use the `args()` function to see which parsnip object arguments are available:\n\n```{r}\nargs(decision_tree)\n```\n\nYou could tune the other hyperparameter we didn't use here, `min_n`, which sets the minimum `n` to split at any node. This is another early stopping method for decision trees that can help prevent overfitting. Use this [searchable table](/find/parsnip/#model-args) to find the original argument for `min_n` in the rpart package ([hint](https://stat.ethz.ch/R-manual/R-devel/library/rpart/html/rpart.control.html)). See whether you can tune a different combination of hyperparameters and/or values to improve a tree's ability to predict cell segmentation quality.\n\n## Session information {#session-info}\n\n```{r}\n#| label: \"si\"\n#| echo: false\nsmall_session(pkgs)\n```\n"},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":"auto","echo":true,"output":true,"warning":true,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":"none","code-overflow":"scroll","code-link":false,"code-line-numbers":false,"code-tools":false,"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":false,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","toc":true,"toc-depth":2,"css":["../styles.css"],"include-after-body":["../repo-actions-delete.html"],"output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","notebook-preview-download":"Download Notebook","notebook-preview-back":"Back to Article"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.4.92","theme":["cosmo","../../styles.scss","../../styles-frontpage.scss"],"quarto-required":">= 1.3.353","linestretch":1.6,"grid":{"body-width":"840px"},"title":"Tune model parameters","weight":4,"categories":["tuning","rsample","parsnip","tune","dials","workflows","yardstick"],"description":"Estimate the best values for hyperparameters that cannot be learned directly during model training.\n","toc-location":"body","toc-title":""},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/listing/listing-cache.json b/.quarto/listing/listing-cache.json new file mode 100644 index 00000000..7d4d8867 --- /dev/null +++ b/.quarto/listing/listing-cache.json @@ -0,0 +1,15 @@ +{ + "listingMap": { + "books/index.qmd": [ + "*.qmd" + ], + "learn/index.qmd": [ + "*.qmd", + "../start/models/index.qmd", + "../start/recipes/index.qmd", + "../start/resampling/index.qmd", + "../start/tuning/index.qmd", + "../start/case-study/index.qmd" + ] + } +} \ No newline at end of file diff --git a/.quarto/xref/02476cb8 b/.quarto/xref/02476cb8 new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/02476cb8 @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/034f56ee b/.quarto/xref/034f56ee new file mode 100644 index 00000000..577fb56d --- /dev/null +++ b/.quarto/xref/034f56ee @@ -0,0 +1 @@ +{"headings":["books"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/156f35b7 b/.quarto/xref/156f35b7 new file mode 100644 index 00000000..1d4ab516 --- /dev/null +++ b/.quarto/xref/156f35b7 @@ -0,0 +1 @@ +{"entries":[],"headings":["intro","data","data-split","recipe","features","fit-workflow","predict-workflow","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/1ffffadf b/.quarto/xref/1ffffadf new file mode 100644 index 00000000..5d591afa --- /dev/null +++ b/.quarto/xref/1ffffadf @@ -0,0 +1 @@ +{"headings":["introduction","a-new-step-definition","create-the-function","initialize-a-new-object","create-the-prep-method","create-the-bake-method","run-the-example","custom-check-operations","other-step-methods","a-print-method","methods-for-declaring-required-packages","a-tidy-method","methods-for-tuning-parameters","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/2b579da6 b/.quarto/xref/2b579da6 new file mode 100644 index 00000000..d1d4faf4 --- /dev/null +++ b/.quarto/xref/2b579da6 @@ -0,0 +1 @@ +{"headings":["design-goals","answer-questions","file-issues","contribute-documentation","contribute-code"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/2efb7f01 b/.quarto/xref/2efb7f01 new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/2efb7f01 @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/38455488 b/.quarto/xref/38455488 new file mode 100644 index 00000000..5f18e5aa --- /dev/null +++ b/.quarto/xref/38455488 @@ -0,0 +1 @@ +{"headings":["introduction","how-does-k-means-work","clustering-in-r","exploratory-clustering","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/38db11f5 b/.quarto/xref/38db11f5 new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/38db11f5 @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/3d42fd62 b/.quarto/xref/3d42fd62 new file mode 100644 index 00000000..c33d194f --- /dev/null +++ b/.quarto/xref/3d42fd62 @@ -0,0 +1 @@ +{"headings":["introduction","example-data","time-series-resampling","model-performance","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/3ed825bc b/.quarto/xref/3ed825bc new file mode 100644 index 00000000..a878ad94 --- /dev/null +++ b/.quarto/xref/3ed825bc @@ -0,0 +1 @@ +{"headings":["introduction","test-of-independence","goodness-of-fit","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/43a00ced b/.quarto/xref/43a00ced new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/43a00ced @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/46ee07cc b/.quarto/xref/46ee07cc new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/46ee07cc @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/54df745b b/.quarto/xref/54df745b new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/54df745b @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/58bbc235 b/.quarto/xref/58bbc235 new file mode 100644 index 00000000..228d8ab3 --- /dev/null +++ b/.quarto/xref/58bbc235 @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","an-example-model","aspects-of-models","the-general-process","step-1.-register-the-model-modes-and-arguments","step-2.-create-the-model-function","step-3.-add-a-fit-module","step-4.-add-modules-for-prediction","does-it-work","add-an-engine","add-parsnip-models-to-another-package","your-model-tuning-parameters-and-you","pro-tips-what-ifs-exceptions-faq-and-minutiae","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/5de328d5 b/.quarto/xref/5de328d5 new file mode 100644 index 00000000..00350eba --- /dev/null +++ b/.quarto/xref/5de328d5 @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","quantitative-parameters","finalizing-parameters","qualitative-parameters","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/5f45f46a b/.quarto/xref/5f45f46a new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/5f45f46a @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/62b184de b/.quarto/xref/62b184de new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/62b184de @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/7732aaab b/.quarto/xref/7732aaab new file mode 100644 index 00000000..371d9814 --- /dev/null +++ b/.quarto/xref/7732aaab @@ -0,0 +1 @@ +{"headings":["introduction","linear-regression","a-single-model","resampled-or-tuned-models","more-complex-a-glmnet-model","using-glmnet-penalty-values","using-specific-penalty-values","tuning-a-glmnet-model","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/83748eba b/.quarto/xref/83748eba new file mode 100644 index 00000000..2eb45415 --- /dev/null +++ b/.quarto/xref/83748eba @@ -0,0 +1 @@ +{"entries":[],"headings":["installation-and-use","core-tidymodels","specialized-packages","perform-statistical-analysis","create-robust-models","tune-compare-and-work-with-your-models","develop-custom-modeling-tools"]} \ No newline at end of file diff --git a/.quarto/xref/95382162 b/.quarto/xref/95382162 new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/95382162 @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/9b0c8d96 b/.quarto/xref/9b0c8d96 new file mode 100644 index 00000000..8ee2d797 --- /dev/null +++ b/.quarto/xref/9b0c8d96 @@ -0,0 +1 @@ +{"headings":["asking-for-help","where-to-ask","resources"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/9c7fc66b b/.quarto/xref/9c7fc66b new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/9c7fc66b @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/INDEX b/.quarto/xref/INDEX new file mode 100644 index 00000000..d04d5c9b --- /dev/null +++ b/.quarto/xref/INDEX @@ -0,0 +1,140 @@ +{ + "about/davis/index.qmd": { + "index.html": "5f45f46a" + }, + "about/index.qmd": { + "index.html": "dda515d4" + }, + "about/max/index.qmd": { + "index.html": "95382162" + }, + "books/fes/index.qmd": { + "index.html": "43a00ced" + }, + "books/index.qmd": { + "index.html": "54df745b" + }, + "books/moderndive/index.qmd": { + "index.html": "2efb7f01" + }, + "books/smltar/index.qmd": { + "index.html": "38db11f5" + }, + "books/tidytext/index.qmd": { + "index.html": "f0fca32c" + }, + "books/tmwr/index.qmd": { + "index.html": "b307cd5a" + }, + "contribute/index.qmd": { + "index.html": "2b579da6" + }, + "find/all/index.qmd": { + "index.html": "46ee07cc" + }, + "find/broom/index.qmd": { + "index.html": "9c7fc66b" + }, + "find/index.qmd": { + "index.html": "034f56ee" + }, + "find/parsnip/index.qmd": { + "index.html": "cecba1e5" + }, + "find/recipes/index.qmd": { + "index.html": "02476cb8" + }, + "help/index.qmd": { + "index.html": "9b0c8d96" + }, + "index.qmd": { + "index.html": "62b184de" + }, + "learn/develop/broom/index.qmd": { + "index.html": "c43ce151" + }, + "learn/develop/metrics/index.qmd": { + "index.html": "db2a99f8" + }, + "learn/develop/models/index.qmd": { + "index.html": "58bbc235" + }, + "learn/develop/parameters/index.qmd": { + "index.html": "5de328d5" + }, + "learn/develop/recipes/index.qmd": { + "index.html": "1ffffadf" + }, + "learn/index.qmd": { + "index.html": "b7a06e62" + }, + "learn/models/coefficients/index.qmd": { + "index.html": "7732aaab" + }, + "learn/models/parsnip-nnet/index.qmd": { + "index.html": "a842be7c" + }, + "learn/models/parsnip-ranger-glmnet/index.qmd": { + "index.html": "ab559fdd" + }, + "learn/models/pls/index.qmd": { + "index.html": "ac075621" + }, + "learn/models/sub-sampling/index.qmd": { + "index.html": "c7932bc5" + }, + "learn/models/time-series/index.qmd": { + "index.html": "3d42fd62" + }, + "learn/statistics/bootstrap/index.qmd": { + "index.html": "ed21da6f" + }, + "learn/statistics/infer/index.qmd": { + "index.html": "a7c4cb7a" + }, + "learn/statistics/k-means/index.qmd": { + "index.html": "38455488" + }, + "learn/statistics/tidy-analysis/index.qmd": { + "index.html": "f8a3c266" + }, + "learn/statistics/xtabs/index.qmd": { + "index.html": "3ed825bc" + }, + "learn/work/bayes-opt/index.qmd": { + "index.html": "bd22a73c" + }, + "learn/work/case-weights/index.qmd": { + "index.html": "ba333c2e" + }, + "learn/work/nested-resampling/index.qmd": { + "index.html": "ac395285" + }, + "learn/work/tune-svm/index.qmd": { + "index.html": "cdaecce1" + }, + "learn/work/tune-text/index.qmd": { + "index.html": "ac429b7b" + }, + "packages/index.qmd": { + "index.html": "83748eba" + }, + "start/case-study/index.qmd": { + "index.html": "cccb19fd" + }, + "start/index.qmd": { + "index.html": "ca807105" + }, + "start/models/index.qmd": { + "index.html": "c43b39dc" + }, + "start/recipes/index.qmd": { + "index.html": "156f35b7" + }, + "start/resampling/index.qmd": { + "index.html": "ead09a15" + }, + "start/tuning/index.qmd": { + "index.html": "b1825ac5" + } +} \ No newline at end of file diff --git a/.quarto/xref/a7c4cb7a b/.quarto/xref/a7c4cb7a new file mode 100644 index 00000000..b127c073 --- /dev/null +++ b/.quarto/xref/a7c4cb7a @@ -0,0 +1 @@ +{"headings":["introduction","specify-variables","declare-the-hypothesis","generate-the-distribution","calculate-statistics","other-utilities","theoretical-methods","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/a842be7c b/.quarto/xref/a842be7c new file mode 100644 index 00000000..a3646987 --- /dev/null +++ b/.quarto/xref/a842be7c @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","fitting-a-neural-network","model-performance","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/ab559fdd b/.quarto/xref/ab559fdd new file mode 100644 index 00000000..c9a1ab51 --- /dev/null +++ b/.quarto/xref/ab559fdd @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","the-ames-housing-data","random-forest","regularized-regression","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/ac075621 b/.quarto/xref/ac075621 new file mode 100644 index 00000000..d7f13c05 --- /dev/null +++ b/.quarto/xref/ac075621 @@ -0,0 +1 @@ +{"headings":["introduction","preprocessing-the-data","partial-least-squares","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/ac395285 b/.quarto/xref/ac395285 new file mode 100644 index 00000000..0c8d2bd8 --- /dev/null +++ b/.quarto/xref/ac395285 @@ -0,0 +1 @@ +{"headings":["introduction","resampling-models","nested-resampling","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/ac429b7b b/.quarto/xref/ac429b7b new file mode 100644 index 00000000..0b54de42 --- /dev/null +++ b/.quarto/xref/ac429b7b @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","text-as-data","inputs-for-the-search","resampling","grid-search","directed-search","extracted-results","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/b1825ac5 b/.quarto/xref/b1825ac5 new file mode 100644 index 00000000..6f527f0a --- /dev/null +++ b/.quarto/xref/b1825ac5 @@ -0,0 +1 @@ +{"entries":[],"headings":["intro","data","why-tune","tuning","tune-grid","final-model","the-last-fit","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/b307cd5a b/.quarto/xref/b307cd5a new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/b307cd5a @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/b7a06e62 b/.quarto/xref/b7a06e62 new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/b7a06e62 @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/ba333c2e b/.quarto/xref/ba333c2e new file mode 100644 index 00000000..2c989e6b --- /dev/null +++ b/.quarto/xref/ba333c2e @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","example-data","creating-weights","modeling","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/bd22a73c b/.quarto/xref/bd22a73c new file mode 100644 index 00000000..30b9a394 --- /dev/null +++ b/.quarto/xref/bd22a73c @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","cell-segmenting-revisited","the-tuning-scheme","sequential-tuning","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/c43b39dc b/.quarto/xref/c43b39dc new file mode 100644 index 00000000..e972e065 --- /dev/null +++ b/.quarto/xref/c43b39dc @@ -0,0 +1 @@ +{"entries":[],"headings":["intro","data","build-model","predict-model","new-engine","why","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/c43ce151 b/.quarto/xref/c43ce151 new file mode 100644 index 00000000..c4714798 --- /dev/null +++ b/.quarto/xref/c43ce151 @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","re-export-the-tidier-generics","implement-tidying-methods","implementing-the-tidy-method","implementing-the-glance-method","implementing-the-augment-method","document-the-new-methods","glossaries","arguments","column-names","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/c7932bc5 b/.quarto/xref/c7932bc5 new file mode 100644 index 00000000..eed98fb1 --- /dev/null +++ b/.quarto/xref/c7932bc5 @@ -0,0 +1 @@ +{"headings":["introduction","simulated-data","subsampling-the-data","model-performance","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/ca807105 b/.quarto/xref/ca807105 new file mode 100644 index 00000000..4d8d2d8f --- /dev/null +++ b/.quarto/xref/ca807105 @@ -0,0 +1 @@ +{"headings":["if-you-are-new-to-r-or-the-tidyverse"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/cccb19fd b/.quarto/xref/cccb19fd new file mode 100644 index 00000000..032812fb --- /dev/null +++ b/.quarto/xref/cccb19fd @@ -0,0 +1 @@ +{"entries":[],"headings":["intro","data","data-split","first-model","build-the-model","create-the-recipe","create-the-workflow","create-the-grid-for-tuning","train-and-tune-the-model","second-model","build-the-model-and-improve-training-time","create-the-recipe-and-workflow","train-and-tune-the-model-1","last-fit","next","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/cdaecce1 b/.quarto/xref/cdaecce1 new file mode 100644 index 00000000..8b0089c4 --- /dev/null +++ b/.quarto/xref/cdaecce1 @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","example-data","inputs-for-the-search","optional-inputs","executing-with-a-formula","executing-with-a-recipe","out-of-sample-predictions","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/cecba1e5 b/.quarto/xref/cecba1e5 new file mode 100644 index 00000000..7df77d53 --- /dev/null +++ b/.quarto/xref/cecba1e5 @@ -0,0 +1 @@ +{"headings":[],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/db2a99f8 b/.quarto/xref/db2a99f8 new file mode 100644 index 00000000..499c1621 --- /dev/null +++ b/.quarto/xref/db2a99f8 @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","numeric-example-mse","vector-implementation","data-frame-implementation","class-example-miss-rate","vector-implementation-1","supporting-multiclass-miss-rate","data-frame-implementation-1","using-custom-metrics","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/dda515d4 b/.quarto/xref/dda515d4 new file mode 100644 index 00000000..208374ba --- /dev/null +++ b/.quarto/xref/dda515d4 @@ -0,0 +1 @@ +{"entries":[],"headings":[]} \ No newline at end of file diff --git a/.quarto/xref/ead09a15 b/.quarto/xref/ead09a15 new file mode 100644 index 00000000..45a97444 --- /dev/null +++ b/.quarto/xref/ead09a15 @@ -0,0 +1 @@ +{"headings":["intro","data","predicting-image-segmentation-quality","back-to-the-cells-data","data-split","modeling","performance","what-happened-here","resampling","fit-resamples","session-info"],"entries":[]} \ No newline at end of file diff --git a/.quarto/xref/ed21da6f b/.quarto/xref/ed21da6f new file mode 100644 index 00000000..9e82f46a --- /dev/null +++ b/.quarto/xref/ed21da6f @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","bootstrapping-models","confidence-intervals","possible-model-fits","session-info"]} \ No newline at end of file diff --git a/.quarto/xref/f0fca32c b/.quarto/xref/f0fca32c new file mode 100644 index 00000000..208374ba --- /dev/null +++ b/.quarto/xref/f0fca32c @@ -0,0 +1 @@ +{"entries":[],"headings":[]} \ No newline at end of file diff --git a/.quarto/xref/f8a3c266 b/.quarto/xref/f8a3c266 new file mode 100644 index 00000000..3d98fe67 --- /dev/null +++ b/.quarto/xref/f8a3c266 @@ -0,0 +1 @@ +{"entries":[],"headings":["introduction","correlation-analysis","regression-models","session-info"]} \ No newline at end of file diff --git a/STYLING.md b/STYLING.md new file mode 100644 index 00000000..3c907afd --- /dev/null +++ b/STYLING.md @@ -0,0 +1,7 @@ +# Styling + +The styling of this website is happening in a number of different places. some of the highlevel changes are set in the `format` section of [_quarto.yml](_quarto.yml), with the rest of the main styles set in [styles.scss](styles.scss). + +The front page includes a number of detailed styling, these are all located in [styles-frontpage.scss](styles-frontpage.scss). They are all wrapped in `#FrontPage` ID so they shouldn't affect anything not located in the front page. + +The sidebar for the [Get Started](start/) section has a unique style, and that is specified in the [start/styles.css](start/styles.css) file, that is loaded into each of these pages with either `css: styles.css` or `css: ../styles.css`. diff --git a/_extensions/quarto-ext/fontawesome/_extension.yml b/_extensions/quarto-ext/fontawesome/_extension.yml new file mode 100644 index 00000000..c0787a8c --- /dev/null +++ b/_extensions/quarto-ext/fontawesome/_extension.yml @@ -0,0 +1,7 @@ +title: Font Awesome support +author: Carlos Scheidegger +version: 1.1.0 +quarto-required: ">=1.2.269" +contributes: + shortcodes: + - fontawesome.lua diff --git a/_extensions/quarto-ext/fontawesome/assets/css/all.css b/_extensions/quarto-ext/fontawesome/assets/css/all.css new file mode 100644 index 00000000..9c2adee7 --- /dev/null +++ b/_extensions/quarto-ext/fontawesome/assets/css/all.css @@ -0,0 +1,7831 @@ +/*! + * Font Awesome Free 6.1.1 by @fontawesome - https://fontawesome.com + * License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) + * Copyright 2022 Fonticons, Inc. + */ +.fa { + font-family: var(--fa-style-family, "Font Awesome 6 Free"); + font-weight: var(--fa-style, 900); } + +.fa, +.fas, +.fa-solid, +.far, +.fa-regular, +.fal, +.fa-light, +.fat, +.fa-thin, +.fad, +.fa-duotone, +.fab, +.fa-brands { + -moz-osx-font-smoothing: grayscale; + -webkit-font-smoothing: antialiased; + display: var(--fa-display, inline-block); + font-style: normal; + font-variant: normal; + line-height: 1; + text-rendering: auto; } + +.fa-1x { + font-size: 1em; } + +.fa-2x { + font-size: 2em; } + +.fa-3x { + font-size: 3em; } + +.fa-4x { + font-size: 4em; } + +.fa-5x { + font-size: 5em; } + +.fa-6x { + font-size: 6em; } + +.fa-7x { + font-size: 7em; } + +.fa-8x { + font-size: 8em; } + +.fa-9x { + font-size: 9em; } + +.fa-10x { + font-size: 10em; } + +.fa-2xs { + font-size: 0.625em; + line-height: 0.1em; + vertical-align: 0.225em; } + +.fa-xs { + font-size: 0.75em; + line-height: 0.08333em; + vertical-align: 0.125em; } + +.fa-sm { + font-size: 0.875em; + line-height: 0.07143em; + vertical-align: 0.05357em; } + +.fa-lg { + font-size: 1.25em; + line-height: 0.05em; + vertical-align: -0.075em; } + +.fa-xl { + font-size: 1.5em; + line-height: 0.04167em; + vertical-align: -0.125em; } + +.fa-2xl { + font-size: 2em; + line-height: 0.03125em; + vertical-align: -0.1875em; } + +.fa-fw { + text-align: center; + width: 1.25em; } + +.fa-ul { + list-style-type: none; + margin-left: var(--fa-li-margin, 2.5em); + padding-left: 0; } + .fa-ul > li { + position: relative; } + +.fa-li { + left: calc(var(--fa-li-width, 2em) * -1); + position: absolute; + text-align: center; + width: var(--fa-li-width, 2em); + line-height: inherit; } + +.fa-border { + border-color: var(--fa-border-color, #eee); + border-radius: var(--fa-border-radius, 0.1em); + border-style: var(--fa-border-style, solid); + border-width: var(--fa-border-width, 0.08em); + padding: var(--fa-border-padding, 0.2em 0.25em 0.15em); } + +.fa-pull-left { + float: left; + margin-right: var(--fa-pull-margin, 0.3em); } + +.fa-pull-right { + float: right; + margin-left: var(--fa-pull-margin, 0.3em); } + +.fa-beat { + -webkit-animation-name: fa-beat; + animation-name: fa-beat; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, ease-in-out); + animation-timing-function: var(--fa-animation-timing, ease-in-out); } + +.fa-bounce { + -webkit-animation-name: fa-bounce; + animation-name: fa-bounce; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.28, 0.84, 0.42, 1)); + animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.28, 0.84, 0.42, 1)); } + +.fa-fade { + -webkit-animation-name: fa-fade; + animation-name: fa-fade; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.4, 0, 0.6, 1)); + animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.4, 0, 0.6, 1)); } + +.fa-beat-fade { + -webkit-animation-name: fa-beat-fade; + animation-name: fa-beat-fade; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.4, 0, 0.6, 1)); + animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.4, 0, 0.6, 1)); } + +.fa-flip { + -webkit-animation-name: fa-flip; + animation-name: fa-flip; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, ease-in-out); + animation-timing-function: var(--fa-animation-timing, ease-in-out); } + +.fa-shake { + -webkit-animation-name: fa-shake; + animation-name: fa-shake; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, linear); + animation-timing-function: var(--fa-animation-timing, linear); } + +.fa-spin { + -webkit-animation-name: fa-spin; + animation-name: fa-spin; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 2s); + animation-duration: var(--fa-animation-duration, 2s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, linear); + animation-timing-function: var(--fa-animation-timing, linear); } + +.fa-spin-reverse { + --fa-animation-direction: reverse; } + +.fa-pulse, +.fa-spin-pulse { + -webkit-animation-name: fa-spin; + animation-name: fa-spin; + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, steps(8)); + animation-timing-function: var(--fa-animation-timing, steps(8)); } + +@media (prefers-reduced-motion: reduce) { + .fa-beat, + .fa-bounce, + .fa-fade, + .fa-beat-fade, + .fa-flip, + .fa-pulse, + .fa-shake, + .fa-spin, + .fa-spin-pulse { + -webkit-animation-delay: -1ms; + animation-delay: -1ms; + -webkit-animation-duration: 1ms; + animation-duration: 1ms; + -webkit-animation-iteration-count: 1; + animation-iteration-count: 1; + transition-delay: 0s; + transition-duration: 0s; } } + +@-webkit-keyframes fa-beat { + 0%, 90% { + -webkit-transform: scale(1); + transform: scale(1); } + 45% { + -webkit-transform: scale(var(--fa-beat-scale, 1.25)); + transform: scale(var(--fa-beat-scale, 1.25)); } } + +@keyframes fa-beat { + 0%, 90% { + -webkit-transform: scale(1); + transform: scale(1); } + 45% { + -webkit-transform: scale(var(--fa-beat-scale, 1.25)); + transform: scale(var(--fa-beat-scale, 1.25)); } } + +@-webkit-keyframes fa-bounce { + 0% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } + 10% { + -webkit-transform: scale(var(--fa-bounce-start-scale-x, 1.1), var(--fa-bounce-start-scale-y, 0.9)) translateY(0); + transform: scale(var(--fa-bounce-start-scale-x, 1.1), var(--fa-bounce-start-scale-y, 0.9)) translateY(0); } + 30% { + -webkit-transform: scale(var(--fa-bounce-jump-scale-x, 0.9), var(--fa-bounce-jump-scale-y, 1.1)) translateY(var(--fa-bounce-height, -0.5em)); + transform: scale(var(--fa-bounce-jump-scale-x, 0.9), var(--fa-bounce-jump-scale-y, 1.1)) translateY(var(--fa-bounce-height, -0.5em)); } + 50% { + -webkit-transform: scale(var(--fa-bounce-land-scale-x, 1.05), var(--fa-bounce-land-scale-y, 0.95)) translateY(0); + transform: scale(var(--fa-bounce-land-scale-x, 1.05), var(--fa-bounce-land-scale-y, 0.95)) translateY(0); } + 57% { + -webkit-transform: scale(1, 1) translateY(var(--fa-bounce-rebound, -0.125em)); + transform: scale(1, 1) translateY(var(--fa-bounce-rebound, -0.125em)); } + 64% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } + 100% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } } + +@keyframes fa-bounce { + 0% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } + 10% { + -webkit-transform: scale(var(--fa-bounce-start-scale-x, 1.1), var(--fa-bounce-start-scale-y, 0.9)) translateY(0); + transform: scale(var(--fa-bounce-start-scale-x, 1.1), var(--fa-bounce-start-scale-y, 0.9)) translateY(0); } + 30% { + -webkit-transform: scale(var(--fa-bounce-jump-scale-x, 0.9), var(--fa-bounce-jump-scale-y, 1.1)) translateY(var(--fa-bounce-height, -0.5em)); + transform: scale(var(--fa-bounce-jump-scale-x, 0.9), var(--fa-bounce-jump-scale-y, 1.1)) translateY(var(--fa-bounce-height, -0.5em)); } + 50% { + -webkit-transform: scale(var(--fa-bounce-land-scale-x, 1.05), var(--fa-bounce-land-scale-y, 0.95)) translateY(0); + transform: scale(var(--fa-bounce-land-scale-x, 1.05), var(--fa-bounce-land-scale-y, 0.95)) translateY(0); } + 57% { + -webkit-transform: scale(1, 1) translateY(var(--fa-bounce-rebound, -0.125em)); + transform: scale(1, 1) translateY(var(--fa-bounce-rebound, -0.125em)); } + 64% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } + 100% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } } + +@-webkit-keyframes fa-fade { + 50% { + opacity: var(--fa-fade-opacity, 0.4); } } + +@keyframes fa-fade { + 50% { + opacity: var(--fa-fade-opacity, 0.4); } } + +@-webkit-keyframes fa-beat-fade { + 0%, 100% { + opacity: var(--fa-beat-fade-opacity, 0.4); + -webkit-transform: scale(1); + transform: scale(1); } + 50% { + opacity: 1; + -webkit-transform: scale(var(--fa-beat-fade-scale, 1.125)); + transform: scale(var(--fa-beat-fade-scale, 1.125)); } } + +@keyframes fa-beat-fade { + 0%, 100% { + opacity: var(--fa-beat-fade-opacity, 0.4); + -webkit-transform: scale(1); + transform: scale(1); } + 50% { + opacity: 1; + -webkit-transform: scale(var(--fa-beat-fade-scale, 1.125)); + transform: scale(var(--fa-beat-fade-scale, 1.125)); } } + +@-webkit-keyframes fa-flip { + 50% { + -webkit-transform: rotate3d(var(--fa-flip-x, 0), var(--fa-flip-y, 1), var(--fa-flip-z, 0), var(--fa-flip-angle, -180deg)); + transform: rotate3d(var(--fa-flip-x, 0), var(--fa-flip-y, 1), var(--fa-flip-z, 0), var(--fa-flip-angle, -180deg)); } } + +@keyframes fa-flip { + 50% { + -webkit-transform: rotate3d(var(--fa-flip-x, 0), var(--fa-flip-y, 1), var(--fa-flip-z, 0), var(--fa-flip-angle, -180deg)); + transform: rotate3d(var(--fa-flip-x, 0), var(--fa-flip-y, 1), var(--fa-flip-z, 0), var(--fa-flip-angle, -180deg)); } } + +@-webkit-keyframes fa-shake { + 0% { + -webkit-transform: rotate(-15deg); + transform: rotate(-15deg); } + 4% { + -webkit-transform: rotate(15deg); + transform: rotate(15deg); } + 8%, 24% { + -webkit-transform: rotate(-18deg); + transform: rotate(-18deg); } + 12%, 28% { + -webkit-transform: rotate(18deg); + transform: rotate(18deg); } + 16% { + -webkit-transform: rotate(-22deg); + transform: rotate(-22deg); } + 20% { + -webkit-transform: rotate(22deg); + transform: rotate(22deg); } + 32% { + -webkit-transform: rotate(-12deg); + transform: rotate(-12deg); } + 36% { + -webkit-transform: rotate(12deg); + transform: rotate(12deg); } + 40%, 100% { + -webkit-transform: rotate(0deg); + transform: rotate(0deg); } } + +@keyframes fa-shake { + 0% { + -webkit-transform: rotate(-15deg); + transform: rotate(-15deg); } + 4% { + -webkit-transform: rotate(15deg); + transform: rotate(15deg); } + 8%, 24% { + -webkit-transform: rotate(-18deg); + transform: rotate(-18deg); } + 12%, 28% { + -webkit-transform: rotate(18deg); + transform: rotate(18deg); } + 16% { + -webkit-transform: rotate(-22deg); + transform: rotate(-22deg); } + 20% { + -webkit-transform: rotate(22deg); + transform: rotate(22deg); } + 32% { + -webkit-transform: rotate(-12deg); + transform: rotate(-12deg); } + 36% { + -webkit-transform: rotate(12deg); + transform: rotate(12deg); } + 40%, 100% { + -webkit-transform: rotate(0deg); + transform: rotate(0deg); } } + +@-webkit-keyframes fa-spin { + 0% { + -webkit-transform: rotate(0deg); + transform: rotate(0deg); } + 100% { + -webkit-transform: rotate(360deg); + transform: rotate(360deg); } } + +@keyframes fa-spin { + 0% { + -webkit-transform: rotate(0deg); + transform: rotate(0deg); } + 100% { + -webkit-transform: rotate(360deg); + transform: rotate(360deg); } } + +.fa-rotate-90 { + -webkit-transform: rotate(90deg); + transform: rotate(90deg); } + +.fa-rotate-180 { + -webkit-transform: rotate(180deg); + transform: rotate(180deg); } + +.fa-rotate-270 { + -webkit-transform: rotate(270deg); + transform: rotate(270deg); } + +.fa-flip-horizontal { + -webkit-transform: scale(-1, 1); + transform: scale(-1, 1); } + +.fa-flip-vertical { + -webkit-transform: scale(1, -1); + transform: scale(1, -1); } + +.fa-flip-both, +.fa-flip-horizontal.fa-flip-vertical { + -webkit-transform: scale(-1, -1); + transform: scale(-1, -1); } + +.fa-rotate-by { + -webkit-transform: rotate(var(--fa-rotate-angle, none)); + transform: rotate(var(--fa-rotate-angle, none)); } + +.fa-stack { + display: inline-block; + height: 2em; + line-height: 2em; + position: relative; + vertical-align: middle; + width: 2.5em; } + +.fa-stack-1x, +.fa-stack-2x { + left: 0; + position: absolute; + text-align: center; + width: 100%; + z-index: var(--fa-stack-z-index, auto); } + +.fa-stack-1x { + line-height: inherit; } + +.fa-stack-2x { + font-size: 2em; } + +.fa-inverse { + color: var(--fa-inverse, #fff); } + +/* Font Awesome uses the Unicode Private Use Area (PUA) to ensure screen +readers do not read off random characters that represent icons */ +.fa-0::before { + content: "\30"; } + +.fa-1::before { + content: "\31"; } + +.fa-2::before { + content: "\32"; } + +.fa-3::before { + content: "\33"; } + +.fa-4::before { + content: "\34"; } + +.fa-5::before { + content: "\35"; } + +.fa-6::before { + content: "\36"; } + +.fa-7::before { + content: "\37"; } + +.fa-8::before { + content: "\38"; } + +.fa-9::before { + content: "\39"; } + +.fa-a::before { + content: "\41"; } + +.fa-address-book::before { + content: "\f2b9"; } + +.fa-contact-book::before { + content: "\f2b9"; } + +.fa-address-card::before { + content: "\f2bb"; } + +.fa-contact-card::before { + content: "\f2bb"; } + +.fa-vcard::before { + content: "\f2bb"; } + +.fa-align-center::before { + content: "\f037"; } + +.fa-align-justify::before { + content: "\f039"; } + +.fa-align-left::before { + content: "\f036"; } + +.fa-align-right::before { + content: "\f038"; } + +.fa-anchor::before { + content: "\f13d"; } + +.fa-anchor-circle-check::before { + content: "\e4aa"; } + +.fa-anchor-circle-exclamation::before { + content: "\e4ab"; } + +.fa-anchor-circle-xmark::before { + content: "\e4ac"; } + +.fa-anchor-lock::before { + content: "\e4ad"; } + +.fa-angle-down::before { + content: "\f107"; } + +.fa-angle-left::before { + content: "\f104"; } + +.fa-angle-right::before { + content: "\f105"; } + +.fa-angle-up::before { + content: "\f106"; } + +.fa-angles-down::before { + content: "\f103"; } + +.fa-angle-double-down::before { + content: "\f103"; } + +.fa-angles-left::before { + content: "\f100"; } + +.fa-angle-double-left::before { + content: "\f100"; } + +.fa-angles-right::before { + content: "\f101"; } + +.fa-angle-double-right::before { + content: "\f101"; } + +.fa-angles-up::before { + content: "\f102"; } + +.fa-angle-double-up::before { + content: "\f102"; } + +.fa-ankh::before { + content: "\f644"; } + +.fa-apple-whole::before { + content: "\f5d1"; } + +.fa-apple-alt::before { + content: "\f5d1"; } + +.fa-archway::before { + content: "\f557"; } + +.fa-arrow-down::before { + content: "\f063"; } + +.fa-arrow-down-1-9::before { + content: "\f162"; } + +.fa-sort-numeric-asc::before { + content: "\f162"; } + +.fa-sort-numeric-down::before { + content: "\f162"; } + +.fa-arrow-down-9-1::before { + content: "\f886"; } + +.fa-sort-numeric-desc::before { + content: "\f886"; } + +.fa-sort-numeric-down-alt::before { + content: "\f886"; } + +.fa-arrow-down-a-z::before { + content: "\f15d"; } + +.fa-sort-alpha-asc::before { + content: "\f15d"; } + +.fa-sort-alpha-down::before { + content: "\f15d"; } + +.fa-arrow-down-long::before { + content: "\f175"; } + +.fa-long-arrow-down::before { + content: "\f175"; } + +.fa-arrow-down-short-wide::before { + content: "\f884"; } + +.fa-sort-amount-desc::before { + content: "\f884"; } + +.fa-sort-amount-down-alt::before { + content: "\f884"; } + +.fa-arrow-down-up-across-line::before { + content: "\e4af"; } + +.fa-arrow-down-up-lock::before { + content: "\e4b0"; } + +.fa-arrow-down-wide-short::before { + content: "\f160"; } + +.fa-sort-amount-asc::before { + content: "\f160"; } + +.fa-sort-amount-down::before { + content: "\f160"; } + +.fa-arrow-down-z-a::before { + content: "\f881"; } + +.fa-sort-alpha-desc::before { + content: "\f881"; } + +.fa-sort-alpha-down-alt::before { + content: "\f881"; } + +.fa-arrow-left::before { + content: "\f060"; } + +.fa-arrow-left-long::before { + content: "\f177"; } + +.fa-long-arrow-left::before { + content: "\f177"; } + +.fa-arrow-pointer::before { + content: "\f245"; } + +.fa-mouse-pointer::before { + content: "\f245"; } + +.fa-arrow-right::before { + content: "\f061"; } + +.fa-arrow-right-arrow-left::before { + content: "\f0ec"; } + +.fa-exchange::before { + content: "\f0ec"; } + +.fa-arrow-right-from-bracket::before { + content: "\f08b"; } + +.fa-sign-out::before { + content: "\f08b"; } + +.fa-arrow-right-long::before { + content: "\f178"; } + +.fa-long-arrow-right::before { + content: "\f178"; } + +.fa-arrow-right-to-bracket::before { + content: "\f090"; } + +.fa-sign-in::before { + content: "\f090"; } + +.fa-arrow-right-to-city::before { + content: "\e4b3"; } + +.fa-arrow-rotate-left::before { + content: "\f0e2"; } + +.fa-arrow-left-rotate::before { + content: "\f0e2"; } + +.fa-arrow-rotate-back::before { + content: "\f0e2"; } + +.fa-arrow-rotate-backward::before { + content: "\f0e2"; } + +.fa-undo::before { + content: "\f0e2"; } + +.fa-arrow-rotate-right::before { + content: "\f01e"; } + +.fa-arrow-right-rotate::before { + content: "\f01e"; } + +.fa-arrow-rotate-forward::before { + content: "\f01e"; } + +.fa-redo::before { + content: "\f01e"; } + +.fa-arrow-trend-down::before { + content: "\e097"; } + +.fa-arrow-trend-up::before { + content: "\e098"; } + +.fa-arrow-turn-down::before { + content: "\f149"; } + +.fa-level-down::before { + content: "\f149"; } + +.fa-arrow-turn-up::before { + content: "\f148"; } + +.fa-level-up::before { + content: "\f148"; } + +.fa-arrow-up::before { + content: "\f062"; } + +.fa-arrow-up-1-9::before { + content: "\f163"; } + +.fa-sort-numeric-up::before { + content: "\f163"; } + +.fa-arrow-up-9-1::before { + content: "\f887"; } + +.fa-sort-numeric-up-alt::before { + content: "\f887"; } + +.fa-arrow-up-a-z::before { + content: "\f15e"; } + +.fa-sort-alpha-up::before { + content: "\f15e"; } + +.fa-arrow-up-from-bracket::before { + content: "\e09a"; } + +.fa-arrow-up-from-ground-water::before { + content: "\e4b5"; } + +.fa-arrow-up-from-water-pump::before { + content: "\e4b6"; } + +.fa-arrow-up-long::before { + content: "\f176"; } + +.fa-long-arrow-up::before { + content: "\f176"; } + +.fa-arrow-up-right-dots::before { + content: "\e4b7"; } + +.fa-arrow-up-right-from-square::before { + content: "\f08e"; } + +.fa-external-link::before { + content: "\f08e"; } + +.fa-arrow-up-short-wide::before { + content: "\f885"; } + +.fa-sort-amount-up-alt::before { + content: "\f885"; } + +.fa-arrow-up-wide-short::before { + content: "\f161"; } + +.fa-sort-amount-up::before { + content: "\f161"; } + +.fa-arrow-up-z-a::before { + content: "\f882"; } + +.fa-sort-alpha-up-alt::before { + content: "\f882"; } + +.fa-arrows-down-to-line::before { + content: "\e4b8"; } + +.fa-arrows-down-to-people::before { + content: "\e4b9"; } + +.fa-arrows-left-right::before { + content: "\f07e"; } + +.fa-arrows-h::before { + content: "\f07e"; } + +.fa-arrows-left-right-to-line::before { + content: "\e4ba"; } + +.fa-arrows-rotate::before { + content: "\f021"; } + +.fa-refresh::before { + content: "\f021"; } + +.fa-sync::before { + content: "\f021"; } + +.fa-arrows-spin::before { + content: "\e4bb"; } + +.fa-arrows-split-up-and-left::before { + content: "\e4bc"; } + +.fa-arrows-to-circle::before { + content: "\e4bd"; } + +.fa-arrows-to-dot::before { + content: "\e4be"; } + +.fa-arrows-to-eye::before { + content: "\e4bf"; } + +.fa-arrows-turn-right::before { + content: "\e4c0"; } + +.fa-arrows-turn-to-dots::before { + content: "\e4c1"; } + +.fa-arrows-up-down::before { + content: "\f07d"; } + +.fa-arrows-v::before { + content: "\f07d"; } + +.fa-arrows-up-down-left-right::before { + content: "\f047"; } + +.fa-arrows::before { + content: "\f047"; } + +.fa-arrows-up-to-line::before { + content: "\e4c2"; } + +.fa-asterisk::before { + content: "\2a"; } + +.fa-at::before { + content: "\40"; } + +.fa-atom::before { + content: "\f5d2"; } + +.fa-audio-description::before { + content: "\f29e"; } + +.fa-austral-sign::before { + content: "\e0a9"; } + +.fa-award::before { + content: "\f559"; } + +.fa-b::before { + content: "\42"; } + +.fa-baby::before { + content: "\f77c"; } + +.fa-baby-carriage::before { + content: "\f77d"; } + +.fa-carriage-baby::before { + content: "\f77d"; } + +.fa-backward::before { + content: "\f04a"; } + +.fa-backward-fast::before { + content: "\f049"; } + +.fa-fast-backward::before { + content: "\f049"; } + +.fa-backward-step::before { + content: "\f048"; } + +.fa-step-backward::before { + content: "\f048"; } + +.fa-bacon::before { + content: "\f7e5"; } + +.fa-bacteria::before { + content: "\e059"; } + +.fa-bacterium::before { + content: "\e05a"; } + +.fa-bag-shopping::before { + content: "\f290"; } + +.fa-shopping-bag::before { + content: "\f290"; } + +.fa-bahai::before { + content: "\f666"; } + +.fa-baht-sign::before { + content: "\e0ac"; } + +.fa-ban::before { + content: "\f05e"; } + +.fa-cancel::before { + content: "\f05e"; } + +.fa-ban-smoking::before { + content: "\f54d"; } + +.fa-smoking-ban::before { + content: "\f54d"; } + +.fa-bandage::before { + content: "\f462"; } + +.fa-band-aid::before { + content: "\f462"; } + +.fa-barcode::before { + content: "\f02a"; } + +.fa-bars::before { + content: "\f0c9"; } + +.fa-navicon::before { + content: "\f0c9"; } + +.fa-bars-progress::before { + content: "\f828"; } + +.fa-tasks-alt::before { + content: "\f828"; } + +.fa-bars-staggered::before { + content: "\f550"; } + +.fa-reorder::before { + content: "\f550"; } + +.fa-stream::before { + content: "\f550"; } + +.fa-baseball::before { + content: "\f433"; } + +.fa-baseball-ball::before { + content: "\f433"; } + +.fa-baseball-bat-ball::before { + content: "\f432"; } + +.fa-basket-shopping::before { + content: "\f291"; } + +.fa-shopping-basket::before { + content: "\f291"; } + +.fa-basketball::before { + content: "\f434"; } + +.fa-basketball-ball::before { + content: "\f434"; } + +.fa-bath::before { + content: "\f2cd"; } + +.fa-bathtub::before { + content: "\f2cd"; } + +.fa-battery-empty::before { + content: "\f244"; } + +.fa-battery-0::before { + content: "\f244"; } + +.fa-battery-full::before { + content: "\f240"; } + +.fa-battery::before { + content: "\f240"; } + +.fa-battery-5::before { + content: "\f240"; } + +.fa-battery-half::before { + content: "\f242"; } + +.fa-battery-3::before { + content: "\f242"; } + +.fa-battery-quarter::before { + content: "\f243"; } + +.fa-battery-2::before { + content: "\f243"; } + +.fa-battery-three-quarters::before { + content: "\f241"; } + +.fa-battery-4::before { + content: "\f241"; } + +.fa-bed::before { + content: "\f236"; } + +.fa-bed-pulse::before { + content: "\f487"; } + +.fa-procedures::before { + content: "\f487"; } + +.fa-beer-mug-empty::before { + content: "\f0fc"; } + +.fa-beer::before { + content: "\f0fc"; } + +.fa-bell::before { + content: "\f0f3"; } + +.fa-bell-concierge::before { + content: "\f562"; } + +.fa-concierge-bell::before { + content: "\f562"; } + +.fa-bell-slash::before { + content: "\f1f6"; } + +.fa-bezier-curve::before { + content: "\f55b"; } + +.fa-bicycle::before { + content: "\f206"; } + +.fa-binoculars::before { + content: "\f1e5"; } + +.fa-biohazard::before { + content: "\f780"; } + +.fa-bitcoin-sign::before { + content: "\e0b4"; } + +.fa-blender::before { + content: "\f517"; } + +.fa-blender-phone::before { + content: "\f6b6"; } + +.fa-blog::before { + content: "\f781"; } + +.fa-bold::before { + content: "\f032"; } + +.fa-bolt::before { + content: "\f0e7"; } + +.fa-zap::before { + content: "\f0e7"; } + +.fa-bolt-lightning::before { + content: "\e0b7"; } + +.fa-bomb::before { + content: "\f1e2"; } + +.fa-bone::before { + content: "\f5d7"; } + +.fa-bong::before { + content: "\f55c"; } + +.fa-book::before { + content: "\f02d"; } + +.fa-book-atlas::before { + content: "\f558"; } + +.fa-atlas::before { + content: "\f558"; } + +.fa-book-bible::before { + content: "\f647"; } + +.fa-bible::before { + content: "\f647"; } + +.fa-book-bookmark::before { + content: "\e0bb"; } + +.fa-book-journal-whills::before { + content: "\f66a"; } + +.fa-journal-whills::before { + content: "\f66a"; } + +.fa-book-medical::before { + content: "\f7e6"; } + +.fa-book-open::before { + content: "\f518"; } + +.fa-book-open-reader::before { + content: "\f5da"; } + +.fa-book-reader::before { + content: "\f5da"; } + +.fa-book-quran::before { + content: "\f687"; } + +.fa-quran::before { + content: "\f687"; } + +.fa-book-skull::before { + content: "\f6b7"; } + +.fa-book-dead::before { + content: "\f6b7"; } + +.fa-bookmark::before { + content: "\f02e"; } + +.fa-border-all::before { + content: "\f84c"; } + +.fa-border-none::before { + content: "\f850"; } + +.fa-border-top-left::before { + content: "\f853"; } + +.fa-border-style::before { + content: "\f853"; } + +.fa-bore-hole::before { + content: "\e4c3"; } + +.fa-bottle-droplet::before { + content: "\e4c4"; } + +.fa-bottle-water::before { + content: "\e4c5"; } + +.fa-bowl-food::before { + content: "\e4c6"; } + +.fa-bowl-rice::before { + content: "\e2eb"; } + +.fa-bowling-ball::before { + content: "\f436"; } + +.fa-box::before { + content: "\f466"; } + +.fa-box-archive::before { + content: "\f187"; } + +.fa-archive::before { + content: "\f187"; } + +.fa-box-open::before { + content: "\f49e"; } + +.fa-box-tissue::before { + content: "\e05b"; } + +.fa-boxes-packing::before { + content: "\e4c7"; } + +.fa-boxes-stacked::before { + content: "\f468"; } + +.fa-boxes::before { + content: "\f468"; } + +.fa-boxes-alt::before { + content: "\f468"; } + +.fa-braille::before { + content: "\f2a1"; } + +.fa-brain::before { + content: "\f5dc"; } + +.fa-brazilian-real-sign::before { + content: "\e46c"; } + +.fa-bread-slice::before { + content: "\f7ec"; } + +.fa-bridge::before { + content: "\e4c8"; } + +.fa-bridge-circle-check::before { + content: "\e4c9"; } + +.fa-bridge-circle-exclamation::before { + content: "\e4ca"; } + +.fa-bridge-circle-xmark::before { + content: "\e4cb"; } + +.fa-bridge-lock::before { + content: "\e4cc"; } + +.fa-bridge-water::before { + content: "\e4ce"; } + +.fa-briefcase::before { + content: "\f0b1"; } + +.fa-briefcase-medical::before { + content: "\f469"; } + +.fa-broom::before { + content: "\f51a"; } + +.fa-broom-ball::before { + content: "\f458"; } + +.fa-quidditch::before { + content: "\f458"; } + +.fa-quidditch-broom-ball::before { + content: "\f458"; } + +.fa-brush::before { + content: "\f55d"; } + +.fa-bucket::before { + content: "\e4cf"; } + +.fa-bug::before { + content: "\f188"; } + +.fa-bug-slash::before { + content: "\e490"; } + +.fa-bugs::before { + content: "\e4d0"; } + +.fa-building::before { + content: "\f1ad"; } + +.fa-building-circle-arrow-right::before { + content: "\e4d1"; } + +.fa-building-circle-check::before { + content: "\e4d2"; } + +.fa-building-circle-exclamation::before { + content: "\e4d3"; } + +.fa-building-circle-xmark::before { + content: "\e4d4"; } + +.fa-building-columns::before { + content: "\f19c"; } + +.fa-bank::before { + content: "\f19c"; } + +.fa-institution::before { + content: "\f19c"; } + +.fa-museum::before { + content: "\f19c"; } + +.fa-university::before { + content: "\f19c"; } + +.fa-building-flag::before { + content: "\e4d5"; } + +.fa-building-lock::before { + content: "\e4d6"; } + +.fa-building-ngo::before { + content: "\e4d7"; } + +.fa-building-shield::before { + content: "\e4d8"; } + +.fa-building-un::before { + content: "\e4d9"; } + +.fa-building-user::before { + content: "\e4da"; } + +.fa-building-wheat::before { + content: "\e4db"; } + +.fa-bullhorn::before { + content: "\f0a1"; } + +.fa-bullseye::before { + content: "\f140"; } + +.fa-burger::before { + content: "\f805"; } + +.fa-hamburger::before { + content: "\f805"; } + +.fa-burst::before { + content: "\e4dc"; } + +.fa-bus::before { + content: "\f207"; } + +.fa-bus-simple::before { + content: "\f55e"; } + +.fa-bus-alt::before { + content: "\f55e"; } + +.fa-business-time::before { + content: "\f64a"; } + +.fa-briefcase-clock::before { + content: "\f64a"; } + +.fa-c::before { + content: "\43"; } + +.fa-cake-candles::before { + content: "\f1fd"; } + +.fa-birthday-cake::before { + content: "\f1fd"; } + +.fa-cake::before { + content: "\f1fd"; } + +.fa-calculator::before { + content: "\f1ec"; } + +.fa-calendar::before { + content: "\f133"; } + +.fa-calendar-check::before { + content: "\f274"; } + +.fa-calendar-day::before { + content: "\f783"; } + +.fa-calendar-days::before { + content: "\f073"; } + +.fa-calendar-alt::before { + content: "\f073"; } + +.fa-calendar-minus::before { + content: "\f272"; } + +.fa-calendar-plus::before { + content: "\f271"; } + +.fa-calendar-week::before { + content: "\f784"; } + +.fa-calendar-xmark::before { + content: "\f273"; } + +.fa-calendar-times::before { + content: "\f273"; } + +.fa-camera::before { + content: "\f030"; } + +.fa-camera-alt::before { + content: "\f030"; } + +.fa-camera-retro::before { + content: "\f083"; } + +.fa-camera-rotate::before { + content: "\e0d8"; } + +.fa-campground::before { + content: "\f6bb"; } + +.fa-candy-cane::before { + content: "\f786"; } + +.fa-cannabis::before { + content: "\f55f"; } + +.fa-capsules::before { + content: "\f46b"; } + +.fa-car::before { + content: "\f1b9"; } + +.fa-automobile::before { + content: "\f1b9"; } + +.fa-car-battery::before { + content: "\f5df"; } + +.fa-battery-car::before { + content: "\f5df"; } + +.fa-car-burst::before { + content: "\f5e1"; } + +.fa-car-crash::before { + content: "\f5e1"; } + +.fa-car-on::before { + content: "\e4dd"; } + +.fa-car-rear::before { + content: "\f5de"; } + +.fa-car-alt::before { + content: "\f5de"; } + +.fa-car-side::before { + content: "\f5e4"; } + +.fa-car-tunnel::before { + content: "\e4de"; } + +.fa-caravan::before { + content: "\f8ff"; } + +.fa-caret-down::before { + content: "\f0d7"; } + +.fa-caret-left::before { + content: "\f0d9"; } + +.fa-caret-right::before { + content: "\f0da"; } + +.fa-caret-up::before { + content: "\f0d8"; } + +.fa-carrot::before { + content: "\f787"; } + +.fa-cart-arrow-down::before { + content: "\f218"; } + +.fa-cart-flatbed::before { + content: "\f474"; } + +.fa-dolly-flatbed::before { + content: "\f474"; } + +.fa-cart-flatbed-suitcase::before { + content: "\f59d"; } + +.fa-luggage-cart::before { + content: "\f59d"; } + +.fa-cart-plus::before { + content: "\f217"; } + +.fa-cart-shopping::before { + content: "\f07a"; } + +.fa-shopping-cart::before { + content: "\f07a"; } + +.fa-cash-register::before { + content: "\f788"; } + +.fa-cat::before { + content: "\f6be"; } + +.fa-cedi-sign::before { + content: "\e0df"; } + +.fa-cent-sign::before { + content: "\e3f5"; } + +.fa-certificate::before { + content: "\f0a3"; } + +.fa-chair::before { + content: "\f6c0"; } + +.fa-chalkboard::before { + content: "\f51b"; } + +.fa-blackboard::before { + content: "\f51b"; } + +.fa-chalkboard-user::before { + content: "\f51c"; } + +.fa-chalkboard-teacher::before { + content: "\f51c"; } + +.fa-champagne-glasses::before { + content: "\f79f"; } + +.fa-glass-cheers::before { + content: "\f79f"; } + +.fa-charging-station::before { + content: "\f5e7"; } + +.fa-chart-area::before { + content: "\f1fe"; } + +.fa-area-chart::before { + content: "\f1fe"; } + +.fa-chart-bar::before { + content: "\f080"; } + +.fa-bar-chart::before { + content: "\f080"; } + +.fa-chart-column::before { + content: "\e0e3"; } + +.fa-chart-gantt::before { + content: "\e0e4"; } + +.fa-chart-line::before { + content: "\f201"; } + +.fa-line-chart::before { + content: "\f201"; } + +.fa-chart-pie::before { + content: "\f200"; } + +.fa-pie-chart::before { + content: "\f200"; } + +.fa-chart-simple::before { + content: "\e473"; } + +.fa-check::before { + content: "\f00c"; } + +.fa-check-double::before { + content: "\f560"; } + +.fa-check-to-slot::before { + content: "\f772"; } + +.fa-vote-yea::before { + content: "\f772"; } + +.fa-cheese::before { + content: "\f7ef"; } + +.fa-chess::before { + content: "\f439"; } + +.fa-chess-bishop::before { + content: "\f43a"; } + +.fa-chess-board::before { + content: "\f43c"; } + +.fa-chess-king::before { + content: "\f43f"; } + +.fa-chess-knight::before { + content: "\f441"; } + +.fa-chess-pawn::before { + content: "\f443"; } + +.fa-chess-queen::before { + content: "\f445"; } + +.fa-chess-rook::before { + content: "\f447"; } + +.fa-chevron-down::before { + content: "\f078"; } + +.fa-chevron-left::before { + content: "\f053"; } + +.fa-chevron-right::before { + content: "\f054"; } + +.fa-chevron-up::before { + content: "\f077"; } + +.fa-child::before { + content: "\f1ae"; } + +.fa-child-dress::before { + content: "\e59c"; } + +.fa-child-reaching::before { + content: "\e59d"; } + +.fa-child-rifle::before { + content: "\e4e0"; } + +.fa-children::before { + content: "\e4e1"; } + +.fa-church::before { + content: "\f51d"; } + +.fa-circle::before { + content: "\f111"; } + +.fa-circle-arrow-down::before { + content: "\f0ab"; } + +.fa-arrow-circle-down::before { + content: "\f0ab"; } + +.fa-circle-arrow-left::before { + content: "\f0a8"; } + +.fa-arrow-circle-left::before { + content: "\f0a8"; } + +.fa-circle-arrow-right::before { + content: "\f0a9"; } + +.fa-arrow-circle-right::before { + content: "\f0a9"; } + +.fa-circle-arrow-up::before { + content: "\f0aa"; } + +.fa-arrow-circle-up::before { + content: "\f0aa"; } + +.fa-circle-check::before { + content: "\f058"; } + +.fa-check-circle::before { + content: "\f058"; } + +.fa-circle-chevron-down::before { + content: "\f13a"; } + +.fa-chevron-circle-down::before { + content: "\f13a"; } + +.fa-circle-chevron-left::before { + content: "\f137"; } + +.fa-chevron-circle-left::before { + content: "\f137"; } + +.fa-circle-chevron-right::before { + content: "\f138"; } + +.fa-chevron-circle-right::before { + content: "\f138"; } + +.fa-circle-chevron-up::before { + content: "\f139"; } + +.fa-chevron-circle-up::before { + content: "\f139"; } + +.fa-circle-dollar-to-slot::before { + content: "\f4b9"; } + +.fa-donate::before { + content: "\f4b9"; } + +.fa-circle-dot::before { + content: "\f192"; } + +.fa-dot-circle::before { + content: "\f192"; } + +.fa-circle-down::before { + content: "\f358"; } + +.fa-arrow-alt-circle-down::before { + content: "\f358"; } + +.fa-circle-exclamation::before { + content: "\f06a"; } + +.fa-exclamation-circle::before { + content: "\f06a"; } + +.fa-circle-h::before { + content: "\f47e"; } + +.fa-hospital-symbol::before { + content: "\f47e"; } + +.fa-circle-half-stroke::before { + content: "\f042"; } + +.fa-adjust::before { + content: "\f042"; } + +.fa-circle-info::before { + content: "\f05a"; } + +.fa-info-circle::before { + content: "\f05a"; } + +.fa-circle-left::before { + content: "\f359"; } + +.fa-arrow-alt-circle-left::before { + content: "\f359"; } + +.fa-circle-minus::before { + content: "\f056"; } + +.fa-minus-circle::before { + content: "\f056"; } + +.fa-circle-nodes::before { + content: "\e4e2"; } + +.fa-circle-notch::before { + content: "\f1ce"; } + +.fa-circle-pause::before { + content: "\f28b"; } + +.fa-pause-circle::before { + content: "\f28b"; } + +.fa-circle-play::before { + content: "\f144"; } + +.fa-play-circle::before { + content: "\f144"; } + +.fa-circle-plus::before { + content: "\f055"; } + +.fa-plus-circle::before { + content: "\f055"; } + +.fa-circle-question::before { + content: "\f059"; } + +.fa-question-circle::before { + content: "\f059"; } + +.fa-circle-radiation::before { + content: "\f7ba"; } + +.fa-radiation-alt::before { + content: "\f7ba"; } + +.fa-circle-right::before { + content: "\f35a"; } + +.fa-arrow-alt-circle-right::before { + content: "\f35a"; } + +.fa-circle-stop::before { + content: "\f28d"; } + +.fa-stop-circle::before { + content: "\f28d"; } + +.fa-circle-up::before { + content: "\f35b"; } + +.fa-arrow-alt-circle-up::before { + content: "\f35b"; } + +.fa-circle-user::before { + content: "\f2bd"; } + +.fa-user-circle::before { + content: "\f2bd"; } + +.fa-circle-xmark::before { + content: "\f057"; } + +.fa-times-circle::before { + content: "\f057"; } + +.fa-xmark-circle::before { + content: "\f057"; } + +.fa-city::before { + content: "\f64f"; } + +.fa-clapperboard::before { + content: "\e131"; } + +.fa-clipboard::before { + content: "\f328"; } + +.fa-clipboard-check::before { + content: "\f46c"; } + +.fa-clipboard-list::before { + content: "\f46d"; } + +.fa-clipboard-question::before { + content: "\e4e3"; } + +.fa-clipboard-user::before { + content: "\f7f3"; } + +.fa-clock::before { + content: "\f017"; } + +.fa-clock-four::before { + content: "\f017"; } + +.fa-clock-rotate-left::before { + content: "\f1da"; } + +.fa-history::before { + content: "\f1da"; } + +.fa-clone::before { + content: "\f24d"; } + +.fa-closed-captioning::before { + content: "\f20a"; } + +.fa-cloud::before { + content: "\f0c2"; } + +.fa-cloud-arrow-down::before { + content: "\f0ed"; } + +.fa-cloud-download::before { + content: "\f0ed"; } + +.fa-cloud-download-alt::before { + content: "\f0ed"; } + +.fa-cloud-arrow-up::before { + content: "\f0ee"; } + +.fa-cloud-upload::before { + content: "\f0ee"; } + +.fa-cloud-upload-alt::before { + content: "\f0ee"; } + +.fa-cloud-bolt::before { + content: "\f76c"; } + +.fa-thunderstorm::before { + content: "\f76c"; } + +.fa-cloud-meatball::before { + content: "\f73b"; } + +.fa-cloud-moon::before { + content: "\f6c3"; } + +.fa-cloud-moon-rain::before { + content: "\f73c"; } + +.fa-cloud-rain::before { + content: "\f73d"; } + +.fa-cloud-showers-heavy::before { + content: "\f740"; } + +.fa-cloud-showers-water::before { + content: "\e4e4"; } + +.fa-cloud-sun::before { + content: "\f6c4"; } + +.fa-cloud-sun-rain::before { + content: "\f743"; } + +.fa-clover::before { + content: "\e139"; } + +.fa-code::before { + content: "\f121"; } + +.fa-code-branch::before { + content: "\f126"; } + +.fa-code-commit::before { + content: "\f386"; } + +.fa-code-compare::before { + content: "\e13a"; } + +.fa-code-fork::before { + content: "\e13b"; } + +.fa-code-merge::before { + content: "\f387"; } + +.fa-code-pull-request::before { + content: "\e13c"; } + +.fa-coins::before { + content: "\f51e"; } + +.fa-colon-sign::before { + content: "\e140"; } + +.fa-comment::before { + content: "\f075"; } + +.fa-comment-dollar::before { + content: "\f651"; } + +.fa-comment-dots::before { + content: "\f4ad"; } + +.fa-commenting::before { + content: "\f4ad"; } + +.fa-comment-medical::before { + content: "\f7f5"; } + +.fa-comment-slash::before { + content: "\f4b3"; } + +.fa-comment-sms::before { + content: "\f7cd"; } + +.fa-sms::before { + content: "\f7cd"; } + +.fa-comments::before { + content: "\f086"; } + +.fa-comments-dollar::before { + content: "\f653"; } + +.fa-compact-disc::before { + content: "\f51f"; } + +.fa-compass::before { + content: "\f14e"; } + +.fa-compass-drafting::before { + content: "\f568"; } + +.fa-drafting-compass::before { + content: "\f568"; } + +.fa-compress::before { + content: "\f066"; } + +.fa-computer::before { + content: "\e4e5"; } + +.fa-computer-mouse::before { + content: "\f8cc"; } + +.fa-mouse::before { + content: "\f8cc"; } + +.fa-cookie::before { + content: "\f563"; } + +.fa-cookie-bite::before { + content: "\f564"; } + +.fa-copy::before { + content: "\f0c5"; } + +.fa-copyright::before { + content: "\f1f9"; } + +.fa-couch::before { + content: "\f4b8"; } + +.fa-cow::before { + content: "\f6c8"; } + +.fa-credit-card::before { + content: "\f09d"; } + +.fa-credit-card-alt::before { + content: "\f09d"; } + +.fa-crop::before { + content: "\f125"; } + +.fa-crop-simple::before { + content: "\f565"; } + +.fa-crop-alt::before { + content: "\f565"; } + +.fa-cross::before { + content: "\f654"; } + +.fa-crosshairs::before { + content: "\f05b"; } + +.fa-crow::before { + content: "\f520"; } + +.fa-crown::before { + content: "\f521"; } + +.fa-crutch::before { + content: "\f7f7"; } + +.fa-cruzeiro-sign::before { + content: "\e152"; } + +.fa-cube::before { + content: "\f1b2"; } + +.fa-cubes::before { + content: "\f1b3"; } + +.fa-cubes-stacked::before { + content: "\e4e6"; } + +.fa-d::before { + content: "\44"; } + +.fa-database::before { + content: "\f1c0"; } + +.fa-delete-left::before { + content: "\f55a"; } + +.fa-backspace::before { + content: "\f55a"; } + +.fa-democrat::before { + content: "\f747"; } + +.fa-desktop::before { + content: "\f390"; } + +.fa-desktop-alt::before { + content: "\f390"; } + +.fa-dharmachakra::before { + content: "\f655"; } + +.fa-diagram-next::before { + content: "\e476"; } + +.fa-diagram-predecessor::before { + content: "\e477"; } + +.fa-diagram-project::before { + content: "\f542"; } + +.fa-project-diagram::before { + content: "\f542"; } + +.fa-diagram-successor::before { + content: "\e47a"; } + +.fa-diamond::before { + content: "\f219"; } + +.fa-diamond-turn-right::before { + content: "\f5eb"; } + +.fa-directions::before { + content: "\f5eb"; } + +.fa-dice::before { + content: "\f522"; } + +.fa-dice-d20::before { + content: "\f6cf"; } + +.fa-dice-d6::before { + content: "\f6d1"; } + +.fa-dice-five::before { + content: "\f523"; } + +.fa-dice-four::before { + content: "\f524"; } + +.fa-dice-one::before { + content: "\f525"; } + +.fa-dice-six::before { + content: "\f526"; } + +.fa-dice-three::before { + content: "\f527"; } + +.fa-dice-two::before { + content: "\f528"; } + +.fa-disease::before { + content: "\f7fa"; } + +.fa-display::before { + content: "\e163"; } + +.fa-divide::before { + content: "\f529"; } + +.fa-dna::before { + content: "\f471"; } + +.fa-dog::before { + content: "\f6d3"; } + +.fa-dollar-sign::before { + content: "\24"; } + +.fa-dollar::before { + content: "\24"; } + +.fa-usd::before { + content: "\24"; } + +.fa-dolly::before { + content: "\f472"; } + +.fa-dolly-box::before { + content: "\f472"; } + +.fa-dong-sign::before { + content: "\e169"; } + +.fa-door-closed::before { + content: "\f52a"; } + +.fa-door-open::before { + content: "\f52b"; } + +.fa-dove::before { + content: "\f4ba"; } + +.fa-down-left-and-up-right-to-center::before { + content: "\f422"; } + +.fa-compress-alt::before { + content: "\f422"; } + +.fa-down-long::before { + content: "\f309"; } + +.fa-long-arrow-alt-down::before { + content: "\f309"; } + +.fa-download::before { + content: "\f019"; } + +.fa-dragon::before { + content: "\f6d5"; } + +.fa-draw-polygon::before { + content: "\f5ee"; } + +.fa-droplet::before { + content: "\f043"; } + +.fa-tint::before { + content: "\f043"; } + +.fa-droplet-slash::before { + content: "\f5c7"; } + +.fa-tint-slash::before { + content: "\f5c7"; } + +.fa-drum::before { + content: "\f569"; } + +.fa-drum-steelpan::before { + content: "\f56a"; } + +.fa-drumstick-bite::before { + content: "\f6d7"; } + +.fa-dumbbell::before { + content: "\f44b"; } + +.fa-dumpster::before { + content: "\f793"; } + +.fa-dumpster-fire::before { + content: "\f794"; } + +.fa-dungeon::before { + content: "\f6d9"; } + +.fa-e::before { + content: "\45"; } + +.fa-ear-deaf::before { + content: "\f2a4"; } + +.fa-deaf::before { + content: "\f2a4"; } + +.fa-deafness::before { + content: "\f2a4"; } + +.fa-hard-of-hearing::before { + content: "\f2a4"; } + +.fa-ear-listen::before { + content: "\f2a2"; } + +.fa-assistive-listening-systems::before { + content: "\f2a2"; } + +.fa-earth-africa::before { + content: "\f57c"; } + +.fa-globe-africa::before { + content: "\f57c"; } + +.fa-earth-americas::before { + content: "\f57d"; } + +.fa-earth::before { + content: "\f57d"; } + +.fa-earth-america::before { + content: "\f57d"; } + +.fa-globe-americas::before { + content: "\f57d"; } + +.fa-earth-asia::before { + content: "\f57e"; } + +.fa-globe-asia::before { + content: "\f57e"; } + +.fa-earth-europe::before { + content: "\f7a2"; } + +.fa-globe-europe::before { + content: "\f7a2"; } + +.fa-earth-oceania::before { + content: "\e47b"; } + +.fa-globe-oceania::before { + content: "\e47b"; } + +.fa-egg::before { + content: "\f7fb"; } + +.fa-eject::before { + content: "\f052"; } + +.fa-elevator::before { + content: "\e16d"; } + +.fa-ellipsis::before { + content: "\f141"; } + +.fa-ellipsis-h::before { + content: "\f141"; } + +.fa-ellipsis-vertical::before { + content: "\f142"; } + +.fa-ellipsis-v::before { + content: "\f142"; } + +.fa-envelope::before { + content: "\f0e0"; } + +.fa-envelope-circle-check::before { + content: "\e4e8"; } + +.fa-envelope-open::before { + content: "\f2b6"; } + +.fa-envelope-open-text::before { + content: "\f658"; } + +.fa-envelopes-bulk::before { + content: "\f674"; } + +.fa-mail-bulk::before { + content: "\f674"; } + +.fa-equals::before { + content: "\3d"; } + +.fa-eraser::before { + content: "\f12d"; } + +.fa-ethernet::before { + content: "\f796"; } + +.fa-euro-sign::before { + content: "\f153"; } + +.fa-eur::before { + content: "\f153"; } + +.fa-euro::before { + content: "\f153"; } + +.fa-exclamation::before { + content: "\21"; } + +.fa-expand::before { + content: "\f065"; } + +.fa-explosion::before { + content: "\e4e9"; } + +.fa-eye::before { + content: "\f06e"; } + +.fa-eye-dropper::before { + content: "\f1fb"; } + +.fa-eye-dropper-empty::before { + content: "\f1fb"; } + +.fa-eyedropper::before { + content: "\f1fb"; } + +.fa-eye-low-vision::before { + content: "\f2a8"; } + +.fa-low-vision::before { + content: "\f2a8"; } + +.fa-eye-slash::before { + content: "\f070"; } + +.fa-f::before { + content: "\46"; } + +.fa-face-angry::before { + content: "\f556"; } + +.fa-angry::before { + content: "\f556"; } + +.fa-face-dizzy::before { + content: "\f567"; } + +.fa-dizzy::before { + content: "\f567"; } + +.fa-face-flushed::before { + content: "\f579"; } + +.fa-flushed::before { + content: "\f579"; } + +.fa-face-frown::before { + content: "\f119"; } + +.fa-frown::before { + content: "\f119"; } + +.fa-face-frown-open::before { + content: "\f57a"; } + +.fa-frown-open::before { + content: "\f57a"; } + +.fa-face-grimace::before { + content: "\f57f"; } + +.fa-grimace::before { + content: "\f57f"; } + +.fa-face-grin::before { + content: "\f580"; } + +.fa-grin::before { + content: "\f580"; } + +.fa-face-grin-beam::before { + content: "\f582"; } + +.fa-grin-beam::before { + content: "\f582"; } + +.fa-face-grin-beam-sweat::before { + content: "\f583"; } + +.fa-grin-beam-sweat::before { + content: "\f583"; } + +.fa-face-grin-hearts::before { + content: "\f584"; } + +.fa-grin-hearts::before { + content: "\f584"; } + +.fa-face-grin-squint::before { + content: "\f585"; } + +.fa-grin-squint::before { + content: "\f585"; } + +.fa-face-grin-squint-tears::before { + content: "\f586"; } + +.fa-grin-squint-tears::before { + content: "\f586"; } + +.fa-face-grin-stars::before { + content: "\f587"; } + +.fa-grin-stars::before { + content: "\f587"; } + +.fa-face-grin-tears::before { + content: "\f588"; } + +.fa-grin-tears::before { + content: "\f588"; } + +.fa-face-grin-tongue::before { + content: "\f589"; } + +.fa-grin-tongue::before { + content: "\f589"; } + +.fa-face-grin-tongue-squint::before { + content: "\f58a"; } + +.fa-grin-tongue-squint::before { + content: "\f58a"; } + +.fa-face-grin-tongue-wink::before { + content: "\f58b"; } + +.fa-grin-tongue-wink::before { + content: "\f58b"; } + +.fa-face-grin-wide::before { + content: "\f581"; } + +.fa-grin-alt::before { + content: "\f581"; } + +.fa-face-grin-wink::before { + content: "\f58c"; } + +.fa-grin-wink::before { + content: "\f58c"; } + +.fa-face-kiss::before { + content: "\f596"; } + +.fa-kiss::before { + content: "\f596"; } + +.fa-face-kiss-beam::before { + content: "\f597"; } + +.fa-kiss-beam::before { + content: "\f597"; } + +.fa-face-kiss-wink-heart::before { + content: "\f598"; } + +.fa-kiss-wink-heart::before { + content: "\f598"; } + +.fa-face-laugh::before { + content: "\f599"; } + +.fa-laugh::before { + content: "\f599"; } + +.fa-face-laugh-beam::before { + content: "\f59a"; } + +.fa-laugh-beam::before { + content: "\f59a"; } + +.fa-face-laugh-squint::before { + content: "\f59b"; } + +.fa-laugh-squint::before { + content: "\f59b"; } + +.fa-face-laugh-wink::before { + content: "\f59c"; } + +.fa-laugh-wink::before { + content: "\f59c"; } + +.fa-face-meh::before { + content: "\f11a"; } + +.fa-meh::before { + content: "\f11a"; } + +.fa-face-meh-blank::before { + content: "\f5a4"; } + +.fa-meh-blank::before { + content: "\f5a4"; } + +.fa-face-rolling-eyes::before { + content: "\f5a5"; } + +.fa-meh-rolling-eyes::before { + content: "\f5a5"; } + +.fa-face-sad-cry::before { + content: "\f5b3"; } + +.fa-sad-cry::before { + content: "\f5b3"; } + +.fa-face-sad-tear::before { + content: "\f5b4"; } + +.fa-sad-tear::before { + content: "\f5b4"; } + +.fa-face-smile::before { + content: "\f118"; } + +.fa-smile::before { + content: "\f118"; } + +.fa-face-smile-beam::before { + content: "\f5b8"; } + +.fa-smile-beam::before { + content: "\f5b8"; } + +.fa-face-smile-wink::before { + content: "\f4da"; } + +.fa-smile-wink::before { + content: "\f4da"; } + +.fa-face-surprise::before { + content: "\f5c2"; } + +.fa-surprise::before { + content: "\f5c2"; } + +.fa-face-tired::before { + content: "\f5c8"; } + +.fa-tired::before { + content: "\f5c8"; } + +.fa-fan::before { + content: "\f863"; } + +.fa-faucet::before { + content: "\e005"; } + +.fa-faucet-drip::before { + content: "\e006"; } + +.fa-fax::before { + content: "\f1ac"; } + +.fa-feather::before { + content: "\f52d"; } + +.fa-feather-pointed::before { + content: "\f56b"; } + +.fa-feather-alt::before { + content: "\f56b"; } + +.fa-ferry::before { + content: "\e4ea"; } + +.fa-file::before { + content: "\f15b"; } + +.fa-file-arrow-down::before { + content: "\f56d"; } + +.fa-file-download::before { + content: "\f56d"; } + +.fa-file-arrow-up::before { + content: "\f574"; } + +.fa-file-upload::before { + content: "\f574"; } + +.fa-file-audio::before { + content: "\f1c7"; } + +.fa-file-circle-check::before { + content: "\e493"; } + +.fa-file-circle-exclamation::before { + content: "\e4eb"; } + +.fa-file-circle-minus::before { + content: "\e4ed"; } + +.fa-file-circle-plus::before { + content: "\e4ee"; } + +.fa-file-circle-question::before { + content: "\e4ef"; } + +.fa-file-circle-xmark::before { + content: "\e494"; } + +.fa-file-code::before { + content: "\f1c9"; } + +.fa-file-contract::before { + content: "\f56c"; } + +.fa-file-csv::before { + content: "\f6dd"; } + +.fa-file-excel::before { + content: "\f1c3"; } + +.fa-file-export::before { + content: "\f56e"; } + +.fa-arrow-right-from-file::before { + content: "\f56e"; } + +.fa-file-image::before { + content: "\f1c5"; } + +.fa-file-import::before { + content: "\f56f"; } + +.fa-arrow-right-to-file::before { + content: "\f56f"; } + +.fa-file-invoice::before { + content: "\f570"; } + +.fa-file-invoice-dollar::before { + content: "\f571"; } + +.fa-file-lines::before { + content: "\f15c"; } + +.fa-file-alt::before { + content: "\f15c"; } + +.fa-file-text::before { + content: "\f15c"; } + +.fa-file-medical::before { + content: "\f477"; } + +.fa-file-pdf::before { + content: "\f1c1"; } + +.fa-file-pen::before { + content: "\f31c"; } + +.fa-file-edit::before { + content: "\f31c"; } + +.fa-file-powerpoint::before { + content: "\f1c4"; } + +.fa-file-prescription::before { + content: "\f572"; } + +.fa-file-shield::before { + content: "\e4f0"; } + +.fa-file-signature::before { + content: "\f573"; } + +.fa-file-video::before { + content: "\f1c8"; } + +.fa-file-waveform::before { + content: "\f478"; } + +.fa-file-medical-alt::before { + content: "\f478"; } + +.fa-file-word::before { + content: "\f1c2"; } + +.fa-file-zipper::before { + content: "\f1c6"; } + +.fa-file-archive::before { + content: "\f1c6"; } + +.fa-fill::before { + content: "\f575"; } + +.fa-fill-drip::before { + content: "\f576"; } + +.fa-film::before { + content: "\f008"; } + +.fa-filter::before { + content: "\f0b0"; } + +.fa-filter-circle-dollar::before { + content: "\f662"; } + +.fa-funnel-dollar::before { + content: "\f662"; } + +.fa-filter-circle-xmark::before { + content: "\e17b"; } + +.fa-fingerprint::before { + content: "\f577"; } + +.fa-fire::before { + content: "\f06d"; } + +.fa-fire-burner::before { + content: "\e4f1"; } + +.fa-fire-extinguisher::before { + content: "\f134"; } + +.fa-fire-flame-curved::before { + content: "\f7e4"; } + +.fa-fire-alt::before { + content: "\f7e4"; } + +.fa-fire-flame-simple::before { + content: "\f46a"; } + +.fa-burn::before { + content: "\f46a"; } + +.fa-fish::before { + content: "\f578"; } + +.fa-fish-fins::before { + content: "\e4f2"; } + +.fa-flag::before { + content: "\f024"; } + +.fa-flag-checkered::before { + content: "\f11e"; } + +.fa-flag-usa::before { + content: "\f74d"; } + +.fa-flask::before { + content: "\f0c3"; } + +.fa-flask-vial::before { + content: "\e4f3"; } + +.fa-floppy-disk::before { + content: "\f0c7"; } + +.fa-save::before { + content: "\f0c7"; } + +.fa-florin-sign::before { + content: "\e184"; } + +.fa-folder::before { + content: "\f07b"; } + +.fa-folder-blank::before { + content: "\f07b"; } + +.fa-folder-closed::before { + content: "\e185"; } + +.fa-folder-minus::before { + content: "\f65d"; } + +.fa-folder-open::before { + content: "\f07c"; } + +.fa-folder-plus::before { + content: "\f65e"; } + +.fa-folder-tree::before { + content: "\f802"; } + +.fa-font::before { + content: "\f031"; } + +.fa-football::before { + content: "\f44e"; } + +.fa-football-ball::before { + content: "\f44e"; } + +.fa-forward::before { + content: "\f04e"; } + +.fa-forward-fast::before { + content: "\f050"; } + +.fa-fast-forward::before { + content: "\f050"; } + +.fa-forward-step::before { + content: "\f051"; } + +.fa-step-forward::before { + content: "\f051"; } + +.fa-franc-sign::before { + content: "\e18f"; } + +.fa-frog::before { + content: "\f52e"; } + +.fa-futbol::before { + content: "\f1e3"; } + +.fa-futbol-ball::before { + content: "\f1e3"; } + +.fa-soccer-ball::before { + content: "\f1e3"; } + +.fa-g::before { + content: "\47"; } + +.fa-gamepad::before { + content: "\f11b"; } + +.fa-gas-pump::before { + content: "\f52f"; } + +.fa-gauge::before { + content: "\f624"; } + +.fa-dashboard::before { + content: "\f624"; } + +.fa-gauge-med::before { + content: "\f624"; } + +.fa-tachometer-alt-average::before { + content: "\f624"; } + +.fa-gauge-high::before { + content: "\f625"; } + +.fa-tachometer-alt::before { + content: "\f625"; } + +.fa-tachometer-alt-fast::before { + content: "\f625"; } + +.fa-gauge-simple::before { + content: "\f629"; } + +.fa-gauge-simple-med::before { + content: "\f629"; } + +.fa-tachometer-average::before { + content: "\f629"; } + +.fa-gauge-simple-high::before { + content: "\f62a"; } + +.fa-tachometer::before { + content: "\f62a"; } + +.fa-tachometer-fast::before { + content: "\f62a"; } + +.fa-gavel::before { + content: "\f0e3"; } + +.fa-legal::before { + content: "\f0e3"; } + +.fa-gear::before { + content: "\f013"; } + +.fa-cog::before { + content: "\f013"; } + +.fa-gears::before { + content: "\f085"; } + +.fa-cogs::before { + content: "\f085"; } + +.fa-gem::before { + content: "\f3a5"; } + +.fa-genderless::before { + content: "\f22d"; } + +.fa-ghost::before { + content: "\f6e2"; } + +.fa-gift::before { + content: "\f06b"; } + +.fa-gifts::before { + content: "\f79c"; } + +.fa-glass-water::before { + content: "\e4f4"; } + +.fa-glass-water-droplet::before { + content: "\e4f5"; } + +.fa-glasses::before { + content: "\f530"; } + +.fa-globe::before { + content: "\f0ac"; } + +.fa-golf-ball-tee::before { + content: "\f450"; } + +.fa-golf-ball::before { + content: "\f450"; } + +.fa-gopuram::before { + content: "\f664"; } + +.fa-graduation-cap::before { + content: "\f19d"; } + +.fa-mortar-board::before { + content: "\f19d"; } + +.fa-greater-than::before { + content: "\3e"; } + +.fa-greater-than-equal::before { + content: "\f532"; } + +.fa-grip::before { + content: "\f58d"; } + +.fa-grip-horizontal::before { + content: "\f58d"; } + +.fa-grip-lines::before { + content: "\f7a4"; } + +.fa-grip-lines-vertical::before { + content: "\f7a5"; } + +.fa-grip-vertical::before { + content: "\f58e"; } + +.fa-group-arrows-rotate::before { + content: "\e4f6"; } + +.fa-guarani-sign::before { + content: "\e19a"; } + +.fa-guitar::before { + content: "\f7a6"; } + +.fa-gun::before { + content: "\e19b"; } + +.fa-h::before { + content: "\48"; } + +.fa-hammer::before { + content: "\f6e3"; } + +.fa-hamsa::before { + content: "\f665"; } + +.fa-hand::before { + content: "\f256"; } + +.fa-hand-paper::before { + content: "\f256"; } + +.fa-hand-back-fist::before { + content: "\f255"; } + +.fa-hand-rock::before { + content: "\f255"; } + +.fa-hand-dots::before { + content: "\f461"; } + +.fa-allergies::before { + content: "\f461"; } + +.fa-hand-fist::before { + content: "\f6de"; } + +.fa-fist-raised::before { + content: "\f6de"; } + +.fa-hand-holding::before { + content: "\f4bd"; } + +.fa-hand-holding-dollar::before { + content: "\f4c0"; } + +.fa-hand-holding-usd::before { + content: "\f4c0"; } + +.fa-hand-holding-droplet::before { + content: "\f4c1"; } + +.fa-hand-holding-water::before { + content: "\f4c1"; } + +.fa-hand-holding-hand::before { + content: "\e4f7"; } + +.fa-hand-holding-heart::before { + content: "\f4be"; } + +.fa-hand-holding-medical::before { + content: "\e05c"; } + +.fa-hand-lizard::before { + content: "\f258"; } + +.fa-hand-middle-finger::before { + content: "\f806"; } + +.fa-hand-peace::before { + content: "\f25b"; } + +.fa-hand-point-down::before { + content: "\f0a7"; } + +.fa-hand-point-left::before { + content: "\f0a5"; } + +.fa-hand-point-right::before { + content: "\f0a4"; } + +.fa-hand-point-up::before { + content: "\f0a6"; } + +.fa-hand-pointer::before { + content: "\f25a"; } + +.fa-hand-scissors::before { + content: "\f257"; } + +.fa-hand-sparkles::before { + content: "\e05d"; } + +.fa-hand-spock::before { + content: "\f259"; } + +.fa-handcuffs::before { + content: "\e4f8"; } + +.fa-hands::before { + content: "\f2a7"; } + +.fa-sign-language::before { + content: "\f2a7"; } + +.fa-signing::before { + content: "\f2a7"; } + +.fa-hands-asl-interpreting::before { + content: "\f2a3"; } + +.fa-american-sign-language-interpreting::before { + content: "\f2a3"; } + +.fa-asl-interpreting::before { + content: "\f2a3"; } + +.fa-hands-american-sign-language-interpreting::before { + content: "\f2a3"; } + +.fa-hands-bound::before { + content: "\e4f9"; } + +.fa-hands-bubbles::before { + content: "\e05e"; } + +.fa-hands-wash::before { + content: "\e05e"; } + +.fa-hands-clapping::before { + content: "\e1a8"; } + +.fa-hands-holding::before { + content: "\f4c2"; } + +.fa-hands-holding-child::before { + content: "\e4fa"; } + +.fa-hands-holding-circle::before { + content: "\e4fb"; } + +.fa-hands-praying::before { + content: "\f684"; } + +.fa-praying-hands::before { + content: "\f684"; } + +.fa-handshake::before { + content: "\f2b5"; } + +.fa-handshake-angle::before { + content: "\f4c4"; } + +.fa-hands-helping::before { + content: "\f4c4"; } + +.fa-handshake-simple::before { + content: "\f4c6"; } + +.fa-handshake-alt::before { + content: "\f4c6"; } + +.fa-handshake-simple-slash::before { + content: "\e05f"; } + +.fa-handshake-alt-slash::before { + content: "\e05f"; } + +.fa-handshake-slash::before { + content: "\e060"; } + +.fa-hanukiah::before { + content: "\f6e6"; } + +.fa-hard-drive::before { + content: "\f0a0"; } + +.fa-hdd::before { + content: "\f0a0"; } + +.fa-hashtag::before { + content: "\23"; } + +.fa-hat-cowboy::before { + content: "\f8c0"; } + +.fa-hat-cowboy-side::before { + content: "\f8c1"; } + +.fa-hat-wizard::before { + content: "\f6e8"; } + +.fa-head-side-cough::before { + content: "\e061"; } + +.fa-head-side-cough-slash::before { + content: "\e062"; } + +.fa-head-side-mask::before { + content: "\e063"; } + +.fa-head-side-virus::before { + content: "\e064"; } + +.fa-heading::before { + content: "\f1dc"; } + +.fa-header::before { + content: "\f1dc"; } + +.fa-headphones::before { + content: "\f025"; } + +.fa-headphones-simple::before { + content: "\f58f"; } + +.fa-headphones-alt::before { + content: "\f58f"; } + +.fa-headset::before { + content: "\f590"; } + +.fa-heart::before { + content: "\f004"; } + +.fa-heart-circle-bolt::before { + content: "\e4fc"; } + +.fa-heart-circle-check::before { + content: "\e4fd"; } + +.fa-heart-circle-exclamation::before { + content: "\e4fe"; } + +.fa-heart-circle-minus::before { + content: "\e4ff"; } + +.fa-heart-circle-plus::before { + content: "\e500"; } + +.fa-heart-circle-xmark::before { + content: "\e501"; } + +.fa-heart-crack::before { + content: "\f7a9"; } + +.fa-heart-broken::before { + content: "\f7a9"; } + +.fa-heart-pulse::before { + content: "\f21e"; } + +.fa-heartbeat::before { + content: "\f21e"; } + +.fa-helicopter::before { + content: "\f533"; } + +.fa-helicopter-symbol::before { + content: "\e502"; } + +.fa-helmet-safety::before { + content: "\f807"; } + +.fa-hard-hat::before { + content: "\f807"; } + +.fa-hat-hard::before { + content: "\f807"; } + +.fa-helmet-un::before { + content: "\e503"; } + +.fa-highlighter::before { + content: "\f591"; } + +.fa-hill-avalanche::before { + content: "\e507"; } + +.fa-hill-rockslide::before { + content: "\e508"; } + +.fa-hippo::before { + content: "\f6ed"; } + +.fa-hockey-puck::before { + content: "\f453"; } + +.fa-holly-berry::before { + content: "\f7aa"; } + +.fa-horse::before { + content: "\f6f0"; } + +.fa-horse-head::before { + content: "\f7ab"; } + +.fa-hospital::before { + content: "\f0f8"; } + +.fa-hospital-alt::before { + content: "\f0f8"; } + +.fa-hospital-wide::before { + content: "\f0f8"; } + +.fa-hospital-user::before { + content: "\f80d"; } + +.fa-hot-tub-person::before { + content: "\f593"; } + +.fa-hot-tub::before { + content: "\f593"; } + +.fa-hotdog::before { + content: "\f80f"; } + +.fa-hotel::before { + content: "\f594"; } + +.fa-hourglass::before { + content: "\f254"; } + +.fa-hourglass-2::before { + content: "\f254"; } + +.fa-hourglass-half::before { + content: "\f254"; } + +.fa-hourglass-empty::before { + content: "\f252"; } + +.fa-hourglass-end::before { + content: "\f253"; } + +.fa-hourglass-3::before { + content: "\f253"; } + +.fa-hourglass-start::before { + content: "\f251"; } + +.fa-hourglass-1::before { + content: "\f251"; } + +.fa-house::before { + content: "\f015"; } + +.fa-home::before { + content: "\f015"; } + +.fa-home-alt::before { + content: "\f015"; } + +.fa-home-lg-alt::before { + content: "\f015"; } + +.fa-house-chimney::before { + content: "\e3af"; } + +.fa-home-lg::before { + content: "\e3af"; } + +.fa-house-chimney-crack::before { + content: "\f6f1"; } + +.fa-house-damage::before { + content: "\f6f1"; } + +.fa-house-chimney-medical::before { + content: "\f7f2"; } + +.fa-clinic-medical::before { + content: "\f7f2"; } + +.fa-house-chimney-user::before { + content: "\e065"; } + +.fa-house-chimney-window::before { + content: "\e00d"; } + +.fa-house-circle-check::before { + content: "\e509"; } + +.fa-house-circle-exclamation::before { + content: "\e50a"; } + +.fa-house-circle-xmark::before { + content: "\e50b"; } + +.fa-house-crack::before { + content: "\e3b1"; } + +.fa-house-fire::before { + content: "\e50c"; } + +.fa-house-flag::before { + content: "\e50d"; } + +.fa-house-flood-water::before { + content: "\e50e"; } + +.fa-house-flood-water-circle-arrow-right::before { + content: "\e50f"; } + +.fa-house-laptop::before { + content: "\e066"; } + +.fa-laptop-house::before { + content: "\e066"; } + +.fa-house-lock::before { + content: "\e510"; } + +.fa-house-medical::before { + content: "\e3b2"; } + +.fa-house-medical-circle-check::before { + content: "\e511"; } + +.fa-house-medical-circle-exclamation::before { + content: "\e512"; } + +.fa-house-medical-circle-xmark::before { + content: "\e513"; } + +.fa-house-medical-flag::before { + content: "\e514"; } + +.fa-house-signal::before { + content: "\e012"; } + +.fa-house-tsunami::before { + content: "\e515"; } + +.fa-house-user::before { + content: "\e1b0"; } + +.fa-home-user::before { + content: "\e1b0"; } + +.fa-hryvnia-sign::before { + content: "\f6f2"; } + +.fa-hryvnia::before { + content: "\f6f2"; } + +.fa-hurricane::before { + content: "\f751"; } + +.fa-i::before { + content: "\49"; } + +.fa-i-cursor::before { + content: "\f246"; } + +.fa-ice-cream::before { + content: "\f810"; } + +.fa-icicles::before { + content: "\f7ad"; } + +.fa-icons::before { + content: "\f86d"; } + +.fa-heart-music-camera-bolt::before { + content: "\f86d"; } + +.fa-id-badge::before { + content: "\f2c1"; } + +.fa-id-card::before { + content: "\f2c2"; } + +.fa-drivers-license::before { + content: "\f2c2"; } + +.fa-id-card-clip::before { + content: "\f47f"; } + +.fa-id-card-alt::before { + content: "\f47f"; } + +.fa-igloo::before { + content: "\f7ae"; } + +.fa-image::before { + content: "\f03e"; } + +.fa-image-portrait::before { + content: "\f3e0"; } + +.fa-portrait::before { + content: "\f3e0"; } + +.fa-images::before { + content: "\f302"; } + +.fa-inbox::before { + content: "\f01c"; } + +.fa-indent::before { + content: "\f03c"; } + +.fa-indian-rupee-sign::before { + content: "\e1bc"; } + +.fa-indian-rupee::before { + content: "\e1bc"; } + +.fa-inr::before { + content: "\e1bc"; } + +.fa-industry::before { + content: "\f275"; } + +.fa-infinity::before { + content: "\f534"; } + +.fa-info::before { + content: "\f129"; } + +.fa-italic::before { + content: "\f033"; } + +.fa-j::before { + content: "\4a"; } + +.fa-jar::before { + content: "\e516"; } + +.fa-jar-wheat::before { + content: "\e517"; } + +.fa-jedi::before { + content: "\f669"; } + +.fa-jet-fighter::before { + content: "\f0fb"; } + +.fa-fighter-jet::before { + content: "\f0fb"; } + +.fa-jet-fighter-up::before { + content: "\e518"; } + +.fa-joint::before { + content: "\f595"; } + +.fa-jug-detergent::before { + content: "\e519"; } + +.fa-k::before { + content: "\4b"; } + +.fa-kaaba::before { + content: "\f66b"; } + +.fa-key::before { + content: "\f084"; } + +.fa-keyboard::before { + content: "\f11c"; } + +.fa-khanda::before { + content: "\f66d"; } + +.fa-kip-sign::before { + content: "\e1c4"; } + +.fa-kit-medical::before { + content: "\f479"; } + +.fa-first-aid::before { + content: "\f479"; } + +.fa-kitchen-set::before { + content: "\e51a"; } + +.fa-kiwi-bird::before { + content: "\f535"; } + +.fa-l::before { + content: "\4c"; } + +.fa-land-mine-on::before { + content: "\e51b"; } + +.fa-landmark::before { + content: "\f66f"; } + +.fa-landmark-dome::before { + content: "\f752"; } + +.fa-landmark-alt::before { + content: "\f752"; } + +.fa-landmark-flag::before { + content: "\e51c"; } + +.fa-language::before { + content: "\f1ab"; } + +.fa-laptop::before { + content: "\f109"; } + +.fa-laptop-code::before { + content: "\f5fc"; } + +.fa-laptop-file::before { + content: "\e51d"; } + +.fa-laptop-medical::before { + content: "\f812"; } + +.fa-lari-sign::before { + content: "\e1c8"; } + +.fa-layer-group::before { + content: "\f5fd"; } + +.fa-leaf::before { + content: "\f06c"; } + +.fa-left-long::before { + content: "\f30a"; } + +.fa-long-arrow-alt-left::before { + content: "\f30a"; } + +.fa-left-right::before { + content: "\f337"; } + +.fa-arrows-alt-h::before { + content: "\f337"; } + +.fa-lemon::before { + content: "\f094"; } + +.fa-less-than::before { + content: "\3c"; } + +.fa-less-than-equal::before { + content: "\f537"; } + +.fa-life-ring::before { + content: "\f1cd"; } + +.fa-lightbulb::before { + content: "\f0eb"; } + +.fa-lines-leaning::before { + content: "\e51e"; } + +.fa-link::before { + content: "\f0c1"; } + +.fa-chain::before { + content: "\f0c1"; } + +.fa-link-slash::before { + content: "\f127"; } + +.fa-chain-broken::before { + content: "\f127"; } + +.fa-chain-slash::before { + content: "\f127"; } + +.fa-unlink::before { + content: "\f127"; } + +.fa-lira-sign::before { + content: "\f195"; } + +.fa-list::before { + content: "\f03a"; } + +.fa-list-squares::before { + content: "\f03a"; } + +.fa-list-check::before { + content: "\f0ae"; } + +.fa-tasks::before { + content: "\f0ae"; } + +.fa-list-ol::before { + content: "\f0cb"; } + +.fa-list-1-2::before { + content: "\f0cb"; } + +.fa-list-numeric::before { + content: "\f0cb"; } + +.fa-list-ul::before { + content: "\f0ca"; } + +.fa-list-dots::before { + content: "\f0ca"; } + +.fa-litecoin-sign::before { + content: "\e1d3"; } + +.fa-location-arrow::before { + content: "\f124"; } + +.fa-location-crosshairs::before { + content: "\f601"; } + +.fa-location::before { + content: "\f601"; } + +.fa-location-dot::before { + content: "\f3c5"; } + +.fa-map-marker-alt::before { + content: "\f3c5"; } + +.fa-location-pin::before { + content: "\f041"; } + +.fa-map-marker::before { + content: "\f041"; } + +.fa-location-pin-lock::before { + content: "\e51f"; } + +.fa-lock::before { + content: "\f023"; } + +.fa-lock-open::before { + content: "\f3c1"; } + +.fa-locust::before { + content: "\e520"; } + +.fa-lungs::before { + content: "\f604"; } + +.fa-lungs-virus::before { + content: "\e067"; } + +.fa-m::before { + content: "\4d"; } + +.fa-magnet::before { + content: "\f076"; } + +.fa-magnifying-glass::before { + content: "\f002"; } + +.fa-search::before { + content: "\f002"; } + +.fa-magnifying-glass-arrow-right::before { + content: "\e521"; } + +.fa-magnifying-glass-chart::before { + content: "\e522"; } + +.fa-magnifying-glass-dollar::before { + content: "\f688"; } + +.fa-search-dollar::before { + content: "\f688"; } + +.fa-magnifying-glass-location::before { + content: "\f689"; } + +.fa-search-location::before { + content: "\f689"; } + +.fa-magnifying-glass-minus::before { + content: "\f010"; } + +.fa-search-minus::before { + content: "\f010"; } + +.fa-magnifying-glass-plus::before { + content: "\f00e"; } + +.fa-search-plus::before { + content: "\f00e"; } + +.fa-manat-sign::before { + content: "\e1d5"; } + +.fa-map::before { + content: "\f279"; } + +.fa-map-location::before { + content: "\f59f"; } + +.fa-map-marked::before { + content: "\f59f"; } + +.fa-map-location-dot::before { + content: "\f5a0"; } + +.fa-map-marked-alt::before { + content: "\f5a0"; } + +.fa-map-pin::before { + content: "\f276"; } + +.fa-marker::before { + content: "\f5a1"; } + +.fa-mars::before { + content: "\f222"; } + +.fa-mars-and-venus::before { + content: "\f224"; } + +.fa-mars-and-venus-burst::before { + content: "\e523"; } + +.fa-mars-double::before { + content: "\f227"; } + +.fa-mars-stroke::before { + content: "\f229"; } + +.fa-mars-stroke-right::before { + content: "\f22b"; } + +.fa-mars-stroke-h::before { + content: "\f22b"; } + +.fa-mars-stroke-up::before { + content: "\f22a"; } + +.fa-mars-stroke-v::before { + content: "\f22a"; } + +.fa-martini-glass::before { + content: "\f57b"; } + +.fa-glass-martini-alt::before { + content: "\f57b"; } + +.fa-martini-glass-citrus::before { + content: "\f561"; } + +.fa-cocktail::before { + content: "\f561"; } + +.fa-martini-glass-empty::before { + content: "\f000"; } + +.fa-glass-martini::before { + content: "\f000"; } + +.fa-mask::before { + content: "\f6fa"; } + +.fa-mask-face::before { + content: "\e1d7"; } + +.fa-mask-ventilator::before { + content: "\e524"; } + +.fa-masks-theater::before { + content: "\f630"; } + +.fa-theater-masks::before { + content: "\f630"; } + +.fa-mattress-pillow::before { + content: "\e525"; } + +.fa-maximize::before { + content: "\f31e"; } + +.fa-expand-arrows-alt::before { + content: "\f31e"; } + +.fa-medal::before { + content: "\f5a2"; } + +.fa-memory::before { + content: "\f538"; } + +.fa-menorah::before { + content: "\f676"; } + +.fa-mercury::before { + content: "\f223"; } + +.fa-message::before { + content: "\f27a"; } + +.fa-comment-alt::before { + content: "\f27a"; } + +.fa-meteor::before { + content: "\f753"; } + +.fa-microchip::before { + content: "\f2db"; } + +.fa-microphone::before { + content: "\f130"; } + +.fa-microphone-lines::before { + content: "\f3c9"; } + +.fa-microphone-alt::before { + content: "\f3c9"; } + +.fa-microphone-lines-slash::before { + content: "\f539"; } + +.fa-microphone-alt-slash::before { + content: "\f539"; } + +.fa-microphone-slash::before { + content: "\f131"; } + +.fa-microscope::before { + content: "\f610"; } + +.fa-mill-sign::before { + content: "\e1ed"; } + +.fa-minimize::before { + content: "\f78c"; } + +.fa-compress-arrows-alt::before { + content: "\f78c"; } + +.fa-minus::before { + content: "\f068"; } + +.fa-subtract::before { + content: "\f068"; } + +.fa-mitten::before { + content: "\f7b5"; } + +.fa-mobile::before { + content: "\f3ce"; } + +.fa-mobile-android::before { + content: "\f3ce"; } + +.fa-mobile-phone::before { + content: "\f3ce"; } + +.fa-mobile-button::before { + content: "\f10b"; } + +.fa-mobile-retro::before { + content: "\e527"; } + +.fa-mobile-screen::before { + content: "\f3cf"; } + +.fa-mobile-android-alt::before { + content: "\f3cf"; } + +.fa-mobile-screen-button::before { + content: "\f3cd"; } + +.fa-mobile-alt::before { + content: "\f3cd"; } + +.fa-money-bill::before { + content: "\f0d6"; } + +.fa-money-bill-1::before { + content: "\f3d1"; } + +.fa-money-bill-alt::before { + content: "\f3d1"; } + +.fa-money-bill-1-wave::before { + content: "\f53b"; } + +.fa-money-bill-wave-alt::before { + content: "\f53b"; } + +.fa-money-bill-transfer::before { + content: "\e528"; } + +.fa-money-bill-trend-up::before { + content: "\e529"; } + +.fa-money-bill-wave::before { + content: "\f53a"; } + +.fa-money-bill-wheat::before { + content: "\e52a"; } + +.fa-money-bills::before { + content: "\e1f3"; } + +.fa-money-check::before { + content: "\f53c"; } + +.fa-money-check-dollar::before { + content: "\f53d"; } + +.fa-money-check-alt::before { + content: "\f53d"; } + +.fa-monument::before { + content: "\f5a6"; } + +.fa-moon::before { + content: "\f186"; } + +.fa-mortar-pestle::before { + content: "\f5a7"; } + +.fa-mosque::before { + content: "\f678"; } + +.fa-mosquito::before { + content: "\e52b"; } + +.fa-mosquito-net::before { + content: "\e52c"; } + +.fa-motorcycle::before { + content: "\f21c"; } + +.fa-mound::before { + content: "\e52d"; } + +.fa-mountain::before { + content: "\f6fc"; } + +.fa-mountain-city::before { + content: "\e52e"; } + +.fa-mountain-sun::before { + content: "\e52f"; } + +.fa-mug-hot::before { + content: "\f7b6"; } + +.fa-mug-saucer::before { + content: "\f0f4"; } + +.fa-coffee::before { + content: "\f0f4"; } + +.fa-music::before { + content: "\f001"; } + +.fa-n::before { + content: "\4e"; } + +.fa-naira-sign::before { + content: "\e1f6"; } + +.fa-network-wired::before { + content: "\f6ff"; } + +.fa-neuter::before { + content: "\f22c"; } + +.fa-newspaper::before { + content: "\f1ea"; } + +.fa-not-equal::before { + content: "\f53e"; } + +.fa-note-sticky::before { + content: "\f249"; } + +.fa-sticky-note::before { + content: "\f249"; } + +.fa-notes-medical::before { + content: "\f481"; } + +.fa-o::before { + content: "\4f"; } + +.fa-object-group::before { + content: "\f247"; } + +.fa-object-ungroup::before { + content: "\f248"; } + +.fa-oil-can::before { + content: "\f613"; } + +.fa-oil-well::before { + content: "\e532"; } + +.fa-om::before { + content: "\f679"; } + +.fa-otter::before { + content: "\f700"; } + +.fa-outdent::before { + content: "\f03b"; } + +.fa-dedent::before { + content: "\f03b"; } + +.fa-p::before { + content: "\50"; } + +.fa-pager::before { + content: "\f815"; } + +.fa-paint-roller::before { + content: "\f5aa"; } + +.fa-paintbrush::before { + content: "\f1fc"; } + +.fa-paint-brush::before { + content: "\f1fc"; } + +.fa-palette::before { + content: "\f53f"; } + +.fa-pallet::before { + content: "\f482"; } + +.fa-panorama::before { + content: "\e209"; } + +.fa-paper-plane::before { + content: "\f1d8"; } + +.fa-paperclip::before { + content: "\f0c6"; } + +.fa-parachute-box::before { + content: "\f4cd"; } + +.fa-paragraph::before { + content: "\f1dd"; } + +.fa-passport::before { + content: "\f5ab"; } + +.fa-paste::before { + content: "\f0ea"; } + +.fa-file-clipboard::before { + content: "\f0ea"; } + +.fa-pause::before { + content: "\f04c"; } + +.fa-paw::before { + content: "\f1b0"; } + +.fa-peace::before { + content: "\f67c"; } + +.fa-pen::before { + content: "\f304"; } + +.fa-pen-clip::before { + content: "\f305"; } + +.fa-pen-alt::before { + content: "\f305"; } + +.fa-pen-fancy::before { + content: "\f5ac"; } + +.fa-pen-nib::before { + content: "\f5ad"; } + +.fa-pen-ruler::before { + content: "\f5ae"; } + +.fa-pencil-ruler::before { + content: "\f5ae"; } + +.fa-pen-to-square::before { + content: "\f044"; } + +.fa-edit::before { + content: "\f044"; } + +.fa-pencil::before { + content: "\f303"; } + +.fa-pencil-alt::before { + content: "\f303"; } + +.fa-people-arrows-left-right::before { + content: "\e068"; } + +.fa-people-arrows::before { + content: "\e068"; } + +.fa-people-carry-box::before { + content: "\f4ce"; } + +.fa-people-carry::before { + content: "\f4ce"; } + +.fa-people-group::before { + content: "\e533"; } + +.fa-people-line::before { + content: "\e534"; } + +.fa-people-pulling::before { + content: "\e535"; } + +.fa-people-robbery::before { + content: "\e536"; } + +.fa-people-roof::before { + content: "\e537"; } + +.fa-pepper-hot::before { + content: "\f816"; } + +.fa-percent::before { + content: "\25"; } + +.fa-percentage::before { + content: "\25"; } + +.fa-person::before { + content: "\f183"; } + +.fa-male::before { + content: "\f183"; } + +.fa-person-arrow-down-to-line::before { + content: "\e538"; } + +.fa-person-arrow-up-from-line::before { + content: "\e539"; } + +.fa-person-biking::before { + content: "\f84a"; } + +.fa-biking::before { + content: "\f84a"; } + +.fa-person-booth::before { + content: "\f756"; } + +.fa-person-breastfeeding::before { + content: "\e53a"; } + +.fa-person-burst::before { + content: "\e53b"; } + +.fa-person-cane::before { + content: "\e53c"; } + +.fa-person-chalkboard::before { + content: "\e53d"; } + +.fa-person-circle-check::before { + content: "\e53e"; } + +.fa-person-circle-exclamation::before { + content: "\e53f"; } + +.fa-person-circle-minus::before { + content: "\e540"; } + +.fa-person-circle-plus::before { + content: "\e541"; } + +.fa-person-circle-question::before { + content: "\e542"; } + +.fa-person-circle-xmark::before { + content: "\e543"; } + +.fa-person-digging::before { + content: "\f85e"; } + +.fa-digging::before { + content: "\f85e"; } + +.fa-person-dots-from-line::before { + content: "\f470"; } + +.fa-diagnoses::before { + content: "\f470"; } + +.fa-person-dress::before { + content: "\f182"; } + +.fa-female::before { + content: "\f182"; } + +.fa-person-dress-burst::before { + content: "\e544"; } + +.fa-person-drowning::before { + content: "\e545"; } + +.fa-person-falling::before { + content: "\e546"; } + +.fa-person-falling-burst::before { + content: "\e547"; } + +.fa-person-half-dress::before { + content: "\e548"; } + +.fa-person-harassing::before { + content: "\e549"; } + +.fa-person-hiking::before { + content: "\f6ec"; } + +.fa-hiking::before { + content: "\f6ec"; } + +.fa-person-military-pointing::before { + content: "\e54a"; } + +.fa-person-military-rifle::before { + content: "\e54b"; } + +.fa-person-military-to-person::before { + content: "\e54c"; } + +.fa-person-praying::before { + content: "\f683"; } + +.fa-pray::before { + content: "\f683"; } + +.fa-person-pregnant::before { + content: "\e31e"; } + +.fa-person-rays::before { + content: "\e54d"; } + +.fa-person-rifle::before { + content: "\e54e"; } + +.fa-person-running::before { + content: "\f70c"; } + +.fa-running::before { + content: "\f70c"; } + +.fa-person-shelter::before { + content: "\e54f"; } + +.fa-person-skating::before { + content: "\f7c5"; } + +.fa-skating::before { + content: "\f7c5"; } + +.fa-person-skiing::before { + content: "\f7c9"; } + +.fa-skiing::before { + content: "\f7c9"; } + +.fa-person-skiing-nordic::before { + content: "\f7ca"; } + +.fa-skiing-nordic::before { + content: "\f7ca"; } + +.fa-person-snowboarding::before { + content: "\f7ce"; } + +.fa-snowboarding::before { + content: "\f7ce"; } + +.fa-person-swimming::before { + content: "\f5c4"; } + +.fa-swimmer::before { + content: "\f5c4"; } + +.fa-person-through-window::before { + content: "\e433"; } + +.fa-person-walking::before { + content: "\f554"; } + +.fa-walking::before { + content: "\f554"; } + +.fa-person-walking-arrow-loop-left::before { + content: "\e551"; } + +.fa-person-walking-arrow-right::before { + content: "\e552"; } + +.fa-person-walking-dashed-line-arrow-right::before { + content: "\e553"; } + +.fa-person-walking-luggage::before { + content: "\e554"; } + +.fa-person-walking-with-cane::before { + content: "\f29d"; } + +.fa-blind::before { + content: "\f29d"; } + +.fa-peseta-sign::before { + content: "\e221"; } + +.fa-peso-sign::before { + content: "\e222"; } + +.fa-phone::before { + content: "\f095"; } + +.fa-phone-flip::before { + content: "\f879"; } + +.fa-phone-alt::before { + content: "\f879"; } + +.fa-phone-slash::before { + content: "\f3dd"; } + +.fa-phone-volume::before { + content: "\f2a0"; } + +.fa-volume-control-phone::before { + content: "\f2a0"; } + +.fa-photo-film::before { + content: "\f87c"; } + +.fa-photo-video::before { + content: "\f87c"; } + +.fa-piggy-bank::before { + content: "\f4d3"; } + +.fa-pills::before { + content: "\f484"; } + +.fa-pizza-slice::before { + content: "\f818"; } + +.fa-place-of-worship::before { + content: "\f67f"; } + +.fa-plane::before { + content: "\f072"; } + +.fa-plane-arrival::before { + content: "\f5af"; } + +.fa-plane-circle-check::before { + content: "\e555"; } + +.fa-plane-circle-exclamation::before { + content: "\e556"; } + +.fa-plane-circle-xmark::before { + content: "\e557"; } + +.fa-plane-departure::before { + content: "\f5b0"; } + +.fa-plane-lock::before { + content: "\e558"; } + +.fa-plane-slash::before { + content: "\e069"; } + +.fa-plane-up::before { + content: "\e22d"; } + +.fa-plant-wilt::before { + content: "\e43b"; } + +.fa-plate-wheat::before { + content: "\e55a"; } + +.fa-play::before { + content: "\f04b"; } + +.fa-plug::before { + content: "\f1e6"; } + +.fa-plug-circle-bolt::before { + content: "\e55b"; } + +.fa-plug-circle-check::before { + content: "\e55c"; } + +.fa-plug-circle-exclamation::before { + content: "\e55d"; } + +.fa-plug-circle-minus::before { + content: "\e55e"; } + +.fa-plug-circle-plus::before { + content: "\e55f"; } + +.fa-plug-circle-xmark::before { + content: "\e560"; } + +.fa-plus::before { + content: "\2b"; } + +.fa-add::before { + content: "\2b"; } + +.fa-plus-minus::before { + content: "\e43c"; } + +.fa-podcast::before { + content: "\f2ce"; } + +.fa-poo::before { + content: "\f2fe"; } + +.fa-poo-storm::before { + content: "\f75a"; } + +.fa-poo-bolt::before { + content: "\f75a"; } + +.fa-poop::before { + content: "\f619"; } + +.fa-power-off::before { + content: "\f011"; } + +.fa-prescription::before { + content: "\f5b1"; } + +.fa-prescription-bottle::before { + content: "\f485"; } + +.fa-prescription-bottle-medical::before { + content: "\f486"; } + +.fa-prescription-bottle-alt::before { + content: "\f486"; } + +.fa-print::before { + content: "\f02f"; } + +.fa-pump-medical::before { + content: "\e06a"; } + +.fa-pump-soap::before { + content: "\e06b"; } + +.fa-puzzle-piece::before { + content: "\f12e"; } + +.fa-q::before { + content: "\51"; } + +.fa-qrcode::before { + content: "\f029"; } + +.fa-question::before { + content: "\3f"; } + +.fa-quote-left::before { + content: "\f10d"; } + +.fa-quote-left-alt::before { + content: "\f10d"; } + +.fa-quote-right::before { + content: "\f10e"; } + +.fa-quote-right-alt::before { + content: "\f10e"; } + +.fa-r::before { + content: "\52"; } + +.fa-radiation::before { + content: "\f7b9"; } + +.fa-radio::before { + content: "\f8d7"; } + +.fa-rainbow::before { + content: "\f75b"; } + +.fa-ranking-star::before { + content: "\e561"; } + +.fa-receipt::before { + content: "\f543"; } + +.fa-record-vinyl::before { + content: "\f8d9"; } + +.fa-rectangle-ad::before { + content: "\f641"; } + +.fa-ad::before { + content: "\f641"; } + +.fa-rectangle-list::before { + content: "\f022"; } + +.fa-list-alt::before { + content: "\f022"; } + +.fa-rectangle-xmark::before { + content: "\f410"; } + +.fa-rectangle-times::before { + content: "\f410"; } + +.fa-times-rectangle::before { + content: "\f410"; } + +.fa-window-close::before { + content: "\f410"; } + +.fa-recycle::before { + content: "\f1b8"; } + +.fa-registered::before { + content: "\f25d"; } + +.fa-repeat::before { + content: "\f363"; } + +.fa-reply::before { + content: "\f3e5"; } + +.fa-mail-reply::before { + content: "\f3e5"; } + +.fa-reply-all::before { + content: "\f122"; } + +.fa-mail-reply-all::before { + content: "\f122"; } + +.fa-republican::before { + content: "\f75e"; } + +.fa-restroom::before { + content: "\f7bd"; } + +.fa-retweet::before { + content: "\f079"; } + +.fa-ribbon::before { + content: "\f4d6"; } + +.fa-right-from-bracket::before { + content: "\f2f5"; } + +.fa-sign-out-alt::before { + content: "\f2f5"; } + +.fa-right-left::before { + content: "\f362"; } + +.fa-exchange-alt::before { + content: "\f362"; } + +.fa-right-long::before { + content: "\f30b"; } + +.fa-long-arrow-alt-right::before { + content: "\f30b"; } + +.fa-right-to-bracket::before { + content: "\f2f6"; } + +.fa-sign-in-alt::before { + content: "\f2f6"; } + +.fa-ring::before { + content: "\f70b"; } + +.fa-road::before { + content: "\f018"; } + +.fa-road-barrier::before { + content: "\e562"; } + +.fa-road-bridge::before { + content: "\e563"; } + +.fa-road-circle-check::before { + content: "\e564"; } + +.fa-road-circle-exclamation::before { + content: "\e565"; } + +.fa-road-circle-xmark::before { + content: "\e566"; } + +.fa-road-lock::before { + content: "\e567"; } + +.fa-road-spikes::before { + content: "\e568"; } + +.fa-robot::before { + content: "\f544"; } + +.fa-rocket::before { + content: "\f135"; } + +.fa-rotate::before { + content: "\f2f1"; } + +.fa-sync-alt::before { + content: "\f2f1"; } + +.fa-rotate-left::before { + content: "\f2ea"; } + +.fa-rotate-back::before { + content: "\f2ea"; } + +.fa-rotate-backward::before { + content: "\f2ea"; } + +.fa-undo-alt::before { + content: "\f2ea"; } + +.fa-rotate-right::before { + content: "\f2f9"; } + +.fa-redo-alt::before { + content: "\f2f9"; } + +.fa-rotate-forward::before { + content: "\f2f9"; } + +.fa-route::before { + content: "\f4d7"; } + +.fa-rss::before { + content: "\f09e"; } + +.fa-feed::before { + content: "\f09e"; } + +.fa-ruble-sign::before { + content: "\f158"; } + +.fa-rouble::before { + content: "\f158"; } + +.fa-rub::before { + content: "\f158"; } + +.fa-ruble::before { + content: "\f158"; } + +.fa-rug::before { + content: "\e569"; } + +.fa-ruler::before { + content: "\f545"; } + +.fa-ruler-combined::before { + content: "\f546"; } + +.fa-ruler-horizontal::before { + content: "\f547"; } + +.fa-ruler-vertical::before { + content: "\f548"; } + +.fa-rupee-sign::before { + content: "\f156"; } + +.fa-rupee::before { + content: "\f156"; } + +.fa-rupiah-sign::before { + content: "\e23d"; } + +.fa-s::before { + content: "\53"; } + +.fa-sack-dollar::before { + content: "\f81d"; } + +.fa-sack-xmark::before { + content: "\e56a"; } + +.fa-sailboat::before { + content: "\e445"; } + +.fa-satellite::before { + content: "\f7bf"; } + +.fa-satellite-dish::before { + content: "\f7c0"; } + +.fa-scale-balanced::before { + content: "\f24e"; } + +.fa-balance-scale::before { + content: "\f24e"; } + +.fa-scale-unbalanced::before { + content: "\f515"; } + +.fa-balance-scale-left::before { + content: "\f515"; } + +.fa-scale-unbalanced-flip::before { + content: "\f516"; } + +.fa-balance-scale-right::before { + content: "\f516"; } + +.fa-school::before { + content: "\f549"; } + +.fa-school-circle-check::before { + content: "\e56b"; } + +.fa-school-circle-exclamation::before { + content: "\e56c"; } + +.fa-school-circle-xmark::before { + content: "\e56d"; } + +.fa-school-flag::before { + content: "\e56e"; } + +.fa-school-lock::before { + content: "\e56f"; } + +.fa-scissors::before { + content: "\f0c4"; } + +.fa-cut::before { + content: "\f0c4"; } + +.fa-screwdriver::before { + content: "\f54a"; } + +.fa-screwdriver-wrench::before { + content: "\f7d9"; } + +.fa-tools::before { + content: "\f7d9"; } + +.fa-scroll::before { + content: "\f70e"; } + +.fa-scroll-torah::before { + content: "\f6a0"; } + +.fa-torah::before { + content: "\f6a0"; } + +.fa-sd-card::before { + content: "\f7c2"; } + +.fa-section::before { + content: "\e447"; } + +.fa-seedling::before { + content: "\f4d8"; } + +.fa-sprout::before { + content: "\f4d8"; } + +.fa-server::before { + content: "\f233"; } + +.fa-shapes::before { + content: "\f61f"; } + +.fa-triangle-circle-square::before { + content: "\f61f"; } + +.fa-share::before { + content: "\f064"; } + +.fa-arrow-turn-right::before { + content: "\f064"; } + +.fa-mail-forward::before { + content: "\f064"; } + +.fa-share-from-square::before { + content: "\f14d"; } + +.fa-share-square::before { + content: "\f14d"; } + +.fa-share-nodes::before { + content: "\f1e0"; } + +.fa-share-alt::before { + content: "\f1e0"; } + +.fa-sheet-plastic::before { + content: "\e571"; } + +.fa-shekel-sign::before { + content: "\f20b"; } + +.fa-ils::before { + content: "\f20b"; } + +.fa-shekel::before { + content: "\f20b"; } + +.fa-sheqel::before { + content: "\f20b"; } + +.fa-sheqel-sign::before { + content: "\f20b"; } + +.fa-shield::before { + content: "\f132"; } + +.fa-shield-blank::before { + content: "\f132"; } + +.fa-shield-cat::before { + content: "\e572"; } + +.fa-shield-dog::before { + content: "\e573"; } + +.fa-shield-halved::before { + content: "\f3ed"; } + +.fa-shield-alt::before { + content: "\f3ed"; } + +.fa-shield-heart::before { + content: "\e574"; } + +.fa-shield-virus::before { + content: "\e06c"; } + +.fa-ship::before { + content: "\f21a"; } + +.fa-shirt::before { + content: "\f553"; } + +.fa-t-shirt::before { + content: "\f553"; } + +.fa-tshirt::before { + content: "\f553"; } + +.fa-shoe-prints::before { + content: "\f54b"; } + +.fa-shop::before { + content: "\f54f"; } + +.fa-store-alt::before { + content: "\f54f"; } + +.fa-shop-lock::before { + content: "\e4a5"; } + +.fa-shop-slash::before { + content: "\e070"; } + +.fa-store-alt-slash::before { + content: "\e070"; } + +.fa-shower::before { + content: "\f2cc"; } + +.fa-shrimp::before { + content: "\e448"; } + +.fa-shuffle::before { + content: "\f074"; } + +.fa-random::before { + content: "\f074"; } + +.fa-shuttle-space::before { + content: "\f197"; } + +.fa-space-shuttle::before { + content: "\f197"; } + +.fa-sign-hanging::before { + content: "\f4d9"; } + +.fa-sign::before { + content: "\f4d9"; } + +.fa-signal::before { + content: "\f012"; } + +.fa-signal-5::before { + content: "\f012"; } + +.fa-signal-perfect::before { + content: "\f012"; } + +.fa-signature::before { + content: "\f5b7"; } + +.fa-signs-post::before { + content: "\f277"; } + +.fa-map-signs::before { + content: "\f277"; } + +.fa-sim-card::before { + content: "\f7c4"; } + +.fa-sink::before { + content: "\e06d"; } + +.fa-sitemap::before { + content: "\f0e8"; } + +.fa-skull::before { + content: "\f54c"; } + +.fa-skull-crossbones::before { + content: "\f714"; } + +.fa-slash::before { + content: "\f715"; } + +.fa-sleigh::before { + content: "\f7cc"; } + +.fa-sliders::before { + content: "\f1de"; } + +.fa-sliders-h::before { + content: "\f1de"; } + +.fa-smog::before { + content: "\f75f"; } + +.fa-smoking::before { + content: "\f48d"; } + +.fa-snowflake::before { + content: "\f2dc"; } + +.fa-snowman::before { + content: "\f7d0"; } + +.fa-snowplow::before { + content: "\f7d2"; } + +.fa-soap::before { + content: "\e06e"; } + +.fa-socks::before { + content: "\f696"; } + +.fa-solar-panel::before { + content: "\f5ba"; } + +.fa-sort::before { + content: "\f0dc"; } + +.fa-unsorted::before { + content: "\f0dc"; } + +.fa-sort-down::before { + content: "\f0dd"; } + +.fa-sort-desc::before { + content: "\f0dd"; } + +.fa-sort-up::before { + content: "\f0de"; } + +.fa-sort-asc::before { + content: "\f0de"; } + +.fa-spa::before { + content: "\f5bb"; } + +.fa-spaghetti-monster-flying::before { + content: "\f67b"; } + +.fa-pastafarianism::before { + content: "\f67b"; } + +.fa-spell-check::before { + content: "\f891"; } + +.fa-spider::before { + content: "\f717"; } + +.fa-spinner::before { + content: "\f110"; } + +.fa-splotch::before { + content: "\f5bc"; } + +.fa-spoon::before { + content: "\f2e5"; } + +.fa-utensil-spoon::before { + content: "\f2e5"; } + +.fa-spray-can::before { + content: "\f5bd"; } + +.fa-spray-can-sparkles::before { + content: "\f5d0"; } + +.fa-air-freshener::before { + content: "\f5d0"; } + +.fa-square::before { + content: "\f0c8"; } + +.fa-square-arrow-up-right::before { + content: "\f14c"; } + +.fa-external-link-square::before { + content: "\f14c"; } + +.fa-square-caret-down::before { + content: "\f150"; } + +.fa-caret-square-down::before { + content: "\f150"; } + +.fa-square-caret-left::before { + content: "\f191"; } + +.fa-caret-square-left::before { + content: "\f191"; } + +.fa-square-caret-right::before { + content: "\f152"; } + +.fa-caret-square-right::before { + content: "\f152"; } + +.fa-square-caret-up::before { + content: "\f151"; } + +.fa-caret-square-up::before { + content: "\f151"; } + +.fa-square-check::before { + content: "\f14a"; } + +.fa-check-square::before { + content: "\f14a"; } + +.fa-square-envelope::before { + content: "\f199"; } + +.fa-envelope-square::before { + content: "\f199"; } + +.fa-square-full::before { + content: "\f45c"; } + +.fa-square-h::before { + content: "\f0fd"; } + +.fa-h-square::before { + content: "\f0fd"; } + +.fa-square-minus::before { + content: "\f146"; } + +.fa-minus-square::before { + content: "\f146"; } + +.fa-square-nfi::before { + content: "\e576"; } + +.fa-square-parking::before { + content: "\f540"; } + +.fa-parking::before { + content: "\f540"; } + +.fa-square-pen::before { + content: "\f14b"; } + +.fa-pen-square::before { + content: "\f14b"; } + +.fa-pencil-square::before { + content: "\f14b"; } + +.fa-square-person-confined::before { + content: "\e577"; } + +.fa-square-phone::before { + content: "\f098"; } + +.fa-phone-square::before { + content: "\f098"; } + +.fa-square-phone-flip::before { + content: "\f87b"; } + +.fa-phone-square-alt::before { + content: "\f87b"; } + +.fa-square-plus::before { + content: "\f0fe"; } + +.fa-plus-square::before { + content: "\f0fe"; } + +.fa-square-poll-horizontal::before { + content: "\f682"; } + +.fa-poll-h::before { + content: "\f682"; } + +.fa-square-poll-vertical::before { + content: "\f681"; } + +.fa-poll::before { + content: "\f681"; } + +.fa-square-root-variable::before { + content: "\f698"; } + +.fa-square-root-alt::before { + content: "\f698"; } + +.fa-square-rss::before { + content: "\f143"; } + +.fa-rss-square::before { + content: "\f143"; } + +.fa-square-share-nodes::before { + content: "\f1e1"; } + +.fa-share-alt-square::before { + content: "\f1e1"; } + +.fa-square-up-right::before { + content: "\f360"; } + +.fa-external-link-square-alt::before { + content: "\f360"; } + +.fa-square-virus::before { + content: "\e578"; } + +.fa-square-xmark::before { + content: "\f2d3"; } + +.fa-times-square::before { + content: "\f2d3"; } + +.fa-xmark-square::before { + content: "\f2d3"; } + +.fa-staff-aesculapius::before { + content: "\e579"; } + +.fa-rod-asclepius::before { + content: "\e579"; } + +.fa-rod-snake::before { + content: "\e579"; } + +.fa-staff-snake::before { + content: "\e579"; } + +.fa-stairs::before { + content: "\e289"; } + +.fa-stamp::before { + content: "\f5bf"; } + +.fa-star::before { + content: "\f005"; } + +.fa-star-and-crescent::before { + content: "\f699"; } + +.fa-star-half::before { + content: "\f089"; } + +.fa-star-half-stroke::before { + content: "\f5c0"; } + +.fa-star-half-alt::before { + content: "\f5c0"; } + +.fa-star-of-david::before { + content: "\f69a"; } + +.fa-star-of-life::before { + content: "\f621"; } + +.fa-sterling-sign::before { + content: "\f154"; } + +.fa-gbp::before { + content: "\f154"; } + +.fa-pound-sign::before { + content: "\f154"; } + +.fa-stethoscope::before { + content: "\f0f1"; } + +.fa-stop::before { + content: "\f04d"; } + +.fa-stopwatch::before { + content: "\f2f2"; } + +.fa-stopwatch-20::before { + content: "\e06f"; } + +.fa-store::before { + content: "\f54e"; } + +.fa-store-slash::before { + content: "\e071"; } + +.fa-street-view::before { + content: "\f21d"; } + +.fa-strikethrough::before { + content: "\f0cc"; } + +.fa-stroopwafel::before { + content: "\f551"; } + +.fa-subscript::before { + content: "\f12c"; } + +.fa-suitcase::before { + content: "\f0f2"; } + +.fa-suitcase-medical::before { + content: "\f0fa"; } + +.fa-medkit::before { + content: "\f0fa"; } + +.fa-suitcase-rolling::before { + content: "\f5c1"; } + +.fa-sun::before { + content: "\f185"; } + +.fa-sun-plant-wilt::before { + content: "\e57a"; } + +.fa-superscript::before { + content: "\f12b"; } + +.fa-swatchbook::before { + content: "\f5c3"; } + +.fa-synagogue::before { + content: "\f69b"; } + +.fa-syringe::before { + content: "\f48e"; } + +.fa-t::before { + content: "\54"; } + +.fa-table::before { + content: "\f0ce"; } + +.fa-table-cells::before { + content: "\f00a"; } + +.fa-th::before { + content: "\f00a"; } + +.fa-table-cells-large::before { + content: "\f009"; } + +.fa-th-large::before { + content: "\f009"; } + +.fa-table-columns::before { + content: "\f0db"; } + +.fa-columns::before { + content: "\f0db"; } + +.fa-table-list::before { + content: "\f00b"; } + +.fa-th-list::before { + content: "\f00b"; } + +.fa-table-tennis-paddle-ball::before { + content: "\f45d"; } + +.fa-ping-pong-paddle-ball::before { + content: "\f45d"; } + +.fa-table-tennis::before { + content: "\f45d"; } + +.fa-tablet::before { + content: "\f3fb"; } + +.fa-tablet-android::before { + content: "\f3fb"; } + +.fa-tablet-button::before { + content: "\f10a"; } + +.fa-tablet-screen-button::before { + content: "\f3fa"; } + +.fa-tablet-alt::before { + content: "\f3fa"; } + +.fa-tablets::before { + content: "\f490"; } + +.fa-tachograph-digital::before { + content: "\f566"; } + +.fa-digital-tachograph::before { + content: "\f566"; } + +.fa-tag::before { + content: "\f02b"; } + +.fa-tags::before { + content: "\f02c"; } + +.fa-tape::before { + content: "\f4db"; } + +.fa-tarp::before { + content: "\e57b"; } + +.fa-tarp-droplet::before { + content: "\e57c"; } + +.fa-taxi::before { + content: "\f1ba"; } + +.fa-cab::before { + content: "\f1ba"; } + +.fa-teeth::before { + content: "\f62e"; } + +.fa-teeth-open::before { + content: "\f62f"; } + +.fa-temperature-arrow-down::before { + content: "\e03f"; } + +.fa-temperature-down::before { + content: "\e03f"; } + +.fa-temperature-arrow-up::before { + content: "\e040"; } + +.fa-temperature-up::before { + content: "\e040"; } + +.fa-temperature-empty::before { + content: "\f2cb"; } + +.fa-temperature-0::before { + content: "\f2cb"; } + +.fa-thermometer-0::before { + content: "\f2cb"; } + +.fa-thermometer-empty::before { + content: "\f2cb"; } + +.fa-temperature-full::before { + content: "\f2c7"; } + +.fa-temperature-4::before { + content: "\f2c7"; } + +.fa-thermometer-4::before { + content: "\f2c7"; } + +.fa-thermometer-full::before { + content: "\f2c7"; } + +.fa-temperature-half::before { + content: "\f2c9"; } + +.fa-temperature-2::before { + content: "\f2c9"; } + +.fa-thermometer-2::before { + content: "\f2c9"; } + +.fa-thermometer-half::before { + content: "\f2c9"; } + +.fa-temperature-high::before { + content: "\f769"; } + +.fa-temperature-low::before { + content: "\f76b"; } + +.fa-temperature-quarter::before { + content: "\f2ca"; } + +.fa-temperature-1::before { + content: "\f2ca"; } + +.fa-thermometer-1::before { + content: "\f2ca"; } + +.fa-thermometer-quarter::before { + content: "\f2ca"; } + +.fa-temperature-three-quarters::before { + content: "\f2c8"; } + +.fa-temperature-3::before { + content: "\f2c8"; } + +.fa-thermometer-3::before { + content: "\f2c8"; } + +.fa-thermometer-three-quarters::before { + content: "\f2c8"; } + +.fa-tenge-sign::before { + content: "\f7d7"; } + +.fa-tenge::before { + content: "\f7d7"; } + +.fa-tent::before { + content: "\e57d"; } + +.fa-tent-arrow-down-to-line::before { + content: "\e57e"; } + +.fa-tent-arrow-left-right::before { + content: "\e57f"; } + +.fa-tent-arrow-turn-left::before { + content: "\e580"; } + +.fa-tent-arrows-down::before { + content: "\e581"; } + +.fa-tents::before { + content: "\e582"; } + +.fa-terminal::before { + content: "\f120"; } + +.fa-text-height::before { + content: "\f034"; } + +.fa-text-slash::before { + content: "\f87d"; } + +.fa-remove-format::before { + content: "\f87d"; } + +.fa-text-width::before { + content: "\f035"; } + +.fa-thermometer::before { + content: "\f491"; } + +.fa-thumbs-down::before { + content: "\f165"; } + +.fa-thumbs-up::before { + content: "\f164"; } + +.fa-thumbtack::before { + content: "\f08d"; } + +.fa-thumb-tack::before { + content: "\f08d"; } + +.fa-ticket::before { + content: "\f145"; } + +.fa-ticket-simple::before { + content: "\f3ff"; } + +.fa-ticket-alt::before { + content: "\f3ff"; } + +.fa-timeline::before { + content: "\e29c"; } + +.fa-toggle-off::before { + content: "\f204"; } + +.fa-toggle-on::before { + content: "\f205"; } + +.fa-toilet::before { + content: "\f7d8"; } + +.fa-toilet-paper::before { + content: "\f71e"; } + +.fa-toilet-paper-slash::before { + content: "\e072"; } + +.fa-toilet-portable::before { + content: "\e583"; } + +.fa-toilets-portable::before { + content: "\e584"; } + +.fa-toolbox::before { + content: "\f552"; } + +.fa-tooth::before { + content: "\f5c9"; } + +.fa-torii-gate::before { + content: "\f6a1"; } + +.fa-tornado::before { + content: "\f76f"; } + +.fa-tower-broadcast::before { + content: "\f519"; } + +.fa-broadcast-tower::before { + content: "\f519"; } + +.fa-tower-cell::before { + content: "\e585"; } + +.fa-tower-observation::before { + content: "\e586"; } + +.fa-tractor::before { + content: "\f722"; } + +.fa-trademark::before { + content: "\f25c"; } + +.fa-traffic-light::before { + content: "\f637"; } + +.fa-trailer::before { + content: "\e041"; } + +.fa-train::before { + content: "\f238"; } + +.fa-train-subway::before { + content: "\f239"; } + +.fa-subway::before { + content: "\f239"; } + +.fa-train-tram::before { + content: "\f7da"; } + +.fa-tram::before { + content: "\f7da"; } + +.fa-transgender::before { + content: "\f225"; } + +.fa-transgender-alt::before { + content: "\f225"; } + +.fa-trash::before { + content: "\f1f8"; } + +.fa-trash-arrow-up::before { + content: "\f829"; } + +.fa-trash-restore::before { + content: "\f829"; } + +.fa-trash-can::before { + content: "\f2ed"; } + +.fa-trash-alt::before { + content: "\f2ed"; } + +.fa-trash-can-arrow-up::before { + content: "\f82a"; } + +.fa-trash-restore-alt::before { + content: "\f82a"; } + +.fa-tree::before { + content: "\f1bb"; } + +.fa-tree-city::before { + content: "\e587"; } + +.fa-triangle-exclamation::before { + content: "\f071"; } + +.fa-exclamation-triangle::before { + content: "\f071"; } + +.fa-warning::before { + content: "\f071"; } + +.fa-trophy::before { + content: "\f091"; } + +.fa-trowel::before { + content: "\e589"; } + +.fa-trowel-bricks::before { + content: "\e58a"; } + +.fa-truck::before { + content: "\f0d1"; } + +.fa-truck-arrow-right::before { + content: "\e58b"; } + +.fa-truck-droplet::before { + content: "\e58c"; } + +.fa-truck-fast::before { + content: "\f48b"; } + +.fa-shipping-fast::before { + content: "\f48b"; } + +.fa-truck-field::before { + content: "\e58d"; } + +.fa-truck-field-un::before { + content: "\e58e"; } + +.fa-truck-front::before { + content: "\e2b7"; } + +.fa-truck-medical::before { + content: "\f0f9"; } + +.fa-ambulance::before { + content: "\f0f9"; } + +.fa-truck-monster::before { + content: "\f63b"; } + +.fa-truck-moving::before { + content: "\f4df"; } + +.fa-truck-pickup::before { + content: "\f63c"; } + +.fa-truck-plane::before { + content: "\e58f"; } + +.fa-truck-ramp-box::before { + content: "\f4de"; } + +.fa-truck-loading::before { + content: "\f4de"; } + +.fa-tty::before { + content: "\f1e4"; } + +.fa-teletype::before { + content: "\f1e4"; } + +.fa-turkish-lira-sign::before { + content: "\e2bb"; } + +.fa-try::before { + content: "\e2bb"; } + +.fa-turkish-lira::before { + content: "\e2bb"; } + +.fa-turn-down::before { + content: "\f3be"; } + +.fa-level-down-alt::before { + content: "\f3be"; } + +.fa-turn-up::before { + content: "\f3bf"; } + +.fa-level-up-alt::before { + content: "\f3bf"; } + +.fa-tv::before { + content: "\f26c"; } + +.fa-television::before { + content: "\f26c"; } + +.fa-tv-alt::before { + content: "\f26c"; } + +.fa-u::before { + content: "\55"; } + +.fa-umbrella::before { + content: "\f0e9"; } + +.fa-umbrella-beach::before { + content: "\f5ca"; } + +.fa-underline::before { + content: "\f0cd"; } + +.fa-universal-access::before { + content: "\f29a"; } + +.fa-unlock::before { + content: "\f09c"; } + +.fa-unlock-keyhole::before { + content: "\f13e"; } + +.fa-unlock-alt::before { + content: "\f13e"; } + +.fa-up-down::before { + content: "\f338"; } + +.fa-arrows-alt-v::before { + content: "\f338"; } + +.fa-up-down-left-right::before { + content: "\f0b2"; } + +.fa-arrows-alt::before { + content: "\f0b2"; } + +.fa-up-long::before { + content: "\f30c"; } + +.fa-long-arrow-alt-up::before { + content: "\f30c"; } + +.fa-up-right-and-down-left-from-center::before { + content: "\f424"; } + +.fa-expand-alt::before { + content: "\f424"; } + +.fa-up-right-from-square::before { + content: "\f35d"; } + +.fa-external-link-alt::before { + content: "\f35d"; } + +.fa-upload::before { + content: "\f093"; } + +.fa-user::before { + content: "\f007"; } + +.fa-user-astronaut::before { + content: "\f4fb"; } + +.fa-user-check::before { + content: "\f4fc"; } + +.fa-user-clock::before { + content: "\f4fd"; } + +.fa-user-doctor::before { + content: "\f0f0"; } + +.fa-user-md::before { + content: "\f0f0"; } + +.fa-user-gear::before { + content: "\f4fe"; } + +.fa-user-cog::before { + content: "\f4fe"; } + +.fa-user-graduate::before { + content: "\f501"; } + +.fa-user-group::before { + content: "\f500"; } + +.fa-user-friends::before { + content: "\f500"; } + +.fa-user-injured::before { + content: "\f728"; } + +.fa-user-large::before { + content: "\f406"; } + +.fa-user-alt::before { + content: "\f406"; } + +.fa-user-large-slash::before { + content: "\f4fa"; } + +.fa-user-alt-slash::before { + content: "\f4fa"; } + +.fa-user-lock::before { + content: "\f502"; } + +.fa-user-minus::before { + content: "\f503"; } + +.fa-user-ninja::before { + content: "\f504"; } + +.fa-user-nurse::before { + content: "\f82f"; } + +.fa-user-pen::before { + content: "\f4ff"; } + +.fa-user-edit::before { + content: "\f4ff"; } + +.fa-user-plus::before { + content: "\f234"; } + +.fa-user-secret::before { + content: "\f21b"; } + +.fa-user-shield::before { + content: "\f505"; } + +.fa-user-slash::before { + content: "\f506"; } + +.fa-user-tag::before { + content: "\f507"; } + +.fa-user-tie::before { + content: "\f508"; } + +.fa-user-xmark::before { + content: "\f235"; } + +.fa-user-times::before { + content: "\f235"; } + +.fa-users::before { + content: "\f0c0"; } + +.fa-users-between-lines::before { + content: "\e591"; } + +.fa-users-gear::before { + content: "\f509"; } + +.fa-users-cog::before { + content: "\f509"; } + +.fa-users-line::before { + content: "\e592"; } + +.fa-users-rays::before { + content: "\e593"; } + +.fa-users-rectangle::before { + content: "\e594"; } + +.fa-users-slash::before { + content: "\e073"; } + +.fa-users-viewfinder::before { + content: "\e595"; } + +.fa-utensils::before { + content: "\f2e7"; } + +.fa-cutlery::before { + content: "\f2e7"; } + +.fa-v::before { + content: "\56"; } + +.fa-van-shuttle::before { + content: "\f5b6"; } + +.fa-shuttle-van::before { + content: "\f5b6"; } + +.fa-vault::before { + content: "\e2c5"; } + +.fa-vector-square::before { + content: "\f5cb"; } + +.fa-venus::before { + content: "\f221"; } + +.fa-venus-double::before { + content: "\f226"; } + +.fa-venus-mars::before { + content: "\f228"; } + +.fa-vest::before { + content: "\e085"; } + +.fa-vest-patches::before { + content: "\e086"; } + +.fa-vial::before { + content: "\f492"; } + +.fa-vial-circle-check::before { + content: "\e596"; } + +.fa-vial-virus::before { + content: "\e597"; } + +.fa-vials::before { + content: "\f493"; } + +.fa-video::before { + content: "\f03d"; } + +.fa-video-camera::before { + content: "\f03d"; } + +.fa-video-slash::before { + content: "\f4e2"; } + +.fa-vihara::before { + content: "\f6a7"; } + +.fa-virus::before { + content: "\e074"; } + +.fa-virus-covid::before { + content: "\e4a8"; } + +.fa-virus-covid-slash::before { + content: "\e4a9"; } + +.fa-virus-slash::before { + content: "\e075"; } + +.fa-viruses::before { + content: "\e076"; } + +.fa-voicemail::before { + content: "\f897"; } + +.fa-volcano::before { + content: "\f770"; } + +.fa-volleyball::before { + content: "\f45f"; } + +.fa-volleyball-ball::before { + content: "\f45f"; } + +.fa-volume-high::before { + content: "\f028"; } + +.fa-volume-up::before { + content: "\f028"; } + +.fa-volume-low::before { + content: "\f027"; } + +.fa-volume-down::before { + content: "\f027"; } + +.fa-volume-off::before { + content: "\f026"; } + +.fa-volume-xmark::before { + content: "\f6a9"; } + +.fa-volume-mute::before { + content: "\f6a9"; } + +.fa-volume-times::before { + content: "\f6a9"; } + +.fa-vr-cardboard::before { + content: "\f729"; } + +.fa-w::before { + content: "\57"; } + +.fa-walkie-talkie::before { + content: "\f8ef"; } + +.fa-wallet::before { + content: "\f555"; } + +.fa-wand-magic::before { + content: "\f0d0"; } + +.fa-magic::before { + content: "\f0d0"; } + +.fa-wand-magic-sparkles::before { + content: "\e2ca"; } + +.fa-magic-wand-sparkles::before { + content: "\e2ca"; } + +.fa-wand-sparkles::before { + content: "\f72b"; } + +.fa-warehouse::before { + content: "\f494"; } + +.fa-water::before { + content: "\f773"; } + +.fa-water-ladder::before { + content: "\f5c5"; } + +.fa-ladder-water::before { + content: "\f5c5"; } + +.fa-swimming-pool::before { + content: "\f5c5"; } + +.fa-wave-square::before { + content: "\f83e"; } + +.fa-weight-hanging::before { + content: "\f5cd"; } + +.fa-weight-scale::before { + content: "\f496"; } + +.fa-weight::before { + content: "\f496"; } + +.fa-wheat-awn::before { + content: "\e2cd"; } + +.fa-wheat-alt::before { + content: "\e2cd"; } + +.fa-wheat-awn-circle-exclamation::before { + content: "\e598"; } + +.fa-wheelchair::before { + content: "\f193"; } + +.fa-wheelchair-move::before { + content: "\e2ce"; } + +.fa-wheelchair-alt::before { + content: "\e2ce"; } + +.fa-whiskey-glass::before { + content: "\f7a0"; } + +.fa-glass-whiskey::before { + content: "\f7a0"; } + +.fa-wifi::before { + content: "\f1eb"; } + +.fa-wifi-3::before { + content: "\f1eb"; } + +.fa-wifi-strong::before { + content: "\f1eb"; } + +.fa-wind::before { + content: "\f72e"; } + +.fa-window-maximize::before { + content: "\f2d0"; } + +.fa-window-minimize::before { + content: "\f2d1"; } + +.fa-window-restore::before { + content: "\f2d2"; } + +.fa-wine-bottle::before { + content: "\f72f"; } + +.fa-wine-glass::before { + content: "\f4e3"; } + +.fa-wine-glass-empty::before { + content: "\f5ce"; } + +.fa-wine-glass-alt::before { + content: "\f5ce"; } + +.fa-won-sign::before { + content: "\f159"; } + +.fa-krw::before { + content: "\f159"; } + +.fa-won::before { + content: "\f159"; } + +.fa-worm::before { + content: "\e599"; } + +.fa-wrench::before { + content: "\f0ad"; } + +.fa-x::before { + content: "\58"; } + +.fa-x-ray::before { + content: "\f497"; } + +.fa-xmark::before { + content: "\f00d"; } + +.fa-close::before { + content: "\f00d"; } + +.fa-multiply::before { + content: "\f00d"; } + +.fa-remove::before { + content: "\f00d"; } + +.fa-times::before { + content: "\f00d"; } + +.fa-xmarks-lines::before { + content: "\e59a"; } + +.fa-y::before { + content: "\59"; } + +.fa-yen-sign::before { + content: "\f157"; } + +.fa-cny::before { + content: "\f157"; } + +.fa-jpy::before { + content: "\f157"; } + +.fa-rmb::before { + content: "\f157"; } + +.fa-yen::before { + content: "\f157"; } + +.fa-yin-yang::before { + content: "\f6ad"; } + +.fa-z::before { + content: "\5a"; } + +.sr-only, +.fa-sr-only { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + margin: -1px; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border-width: 0; } + +.sr-only-focusable:not(:focus), +.fa-sr-only-focusable:not(:focus) { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + margin: -1px; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border-width: 0; } +:root, :host { + --fa-font-brands: normal 400 1em/1 "Font Awesome 6 Brands"; } + +@font-face { + font-family: 'Font Awesome 6 Brands'; + font-style: normal; + font-weight: 400; + font-display: block; + src: url("../webfonts/fa-brands-400.woff2") format("woff2"), url("../webfonts/fa-brands-400.ttf") format("truetype"); } + +.fab, +.fa-brands { + font-family: 'Font Awesome 6 Brands'; + font-weight: 400; } + +.fa-42-group:before { + content: "\e080"; } + +.fa-innosoft:before { + content: "\e080"; } + +.fa-500px:before { + content: "\f26e"; } + +.fa-accessible-icon:before { + content: "\f368"; } + +.fa-accusoft:before { + content: "\f369"; } + +.fa-adn:before { + content: "\f170"; } + +.fa-adversal:before { + content: "\f36a"; } + +.fa-affiliatetheme:before { + content: "\f36b"; } + +.fa-airbnb:before { + content: "\f834"; } + +.fa-algolia:before { + content: "\f36c"; } + +.fa-alipay:before { + content: "\f642"; } + +.fa-amazon:before { + content: "\f270"; } + +.fa-amazon-pay:before { + content: "\f42c"; } + +.fa-amilia:before { + content: "\f36d"; } + +.fa-android:before { + content: "\f17b"; } + +.fa-angellist:before { + content: "\f209"; } + +.fa-angrycreative:before { + content: "\f36e"; } + +.fa-angular:before { + content: "\f420"; } + +.fa-app-store:before { + content: "\f36f"; } + +.fa-app-store-ios:before { + content: "\f370"; } + +.fa-apper:before { + content: "\f371"; } + +.fa-apple:before { + content: "\f179"; } + +.fa-apple-pay:before { + content: "\f415"; } + +.fa-artstation:before { + content: "\f77a"; } + +.fa-asymmetrik:before { + content: "\f372"; } + +.fa-atlassian:before { + content: "\f77b"; } + +.fa-audible:before { + content: "\f373"; } + +.fa-autoprefixer:before { + content: "\f41c"; } + +.fa-avianex:before { + content: "\f374"; } + +.fa-aviato:before { + content: "\f421"; } + +.fa-aws:before { + content: "\f375"; } + +.fa-bandcamp:before { + content: "\f2d5"; } + +.fa-battle-net:before { + content: "\f835"; } + +.fa-behance:before { + content: "\f1b4"; } + +.fa-behance-square:before { + content: "\f1b5"; } + +.fa-bilibili:before { + content: "\e3d9"; } + +.fa-bimobject:before { + content: "\f378"; } + +.fa-bitbucket:before { + content: "\f171"; } + +.fa-bitcoin:before { + content: "\f379"; } + +.fa-bity:before { + content: "\f37a"; } + +.fa-black-tie:before { + content: "\f27e"; } + +.fa-blackberry:before { + content: "\f37b"; } + +.fa-blogger:before { + content: "\f37c"; } + +.fa-blogger-b:before { + content: "\f37d"; } + +.fa-bluetooth:before { + content: "\f293"; } + +.fa-bluetooth-b:before { + content: "\f294"; } + +.fa-bootstrap:before { + content: "\f836"; } + +.fa-bots:before { + content: "\e340"; } + +.fa-btc:before { + content: "\f15a"; } + +.fa-buffer:before { + content: "\f837"; } + +.fa-buromobelexperte:before { + content: "\f37f"; } + +.fa-buy-n-large:before { + content: "\f8a6"; } + +.fa-buysellads:before { + content: "\f20d"; } + +.fa-canadian-maple-leaf:before { + content: "\f785"; } + +.fa-cc-amazon-pay:before { + content: "\f42d"; } + +.fa-cc-amex:before { + content: "\f1f3"; } + +.fa-cc-apple-pay:before { + content: "\f416"; } + +.fa-cc-diners-club:before { + content: "\f24c"; } + +.fa-cc-discover:before { + content: "\f1f2"; } + +.fa-cc-jcb:before { + content: "\f24b"; } + +.fa-cc-mastercard:before { + content: "\f1f1"; } + +.fa-cc-paypal:before { + content: "\f1f4"; } + +.fa-cc-stripe:before { + content: "\f1f5"; } + +.fa-cc-visa:before { + content: "\f1f0"; } + +.fa-centercode:before { + content: "\f380"; } + +.fa-centos:before { + content: "\f789"; } + +.fa-chrome:before { + content: "\f268"; } + +.fa-chromecast:before { + content: "\f838"; } + +.fa-cloudflare:before { + content: "\e07d"; } + +.fa-cloudscale:before { + content: "\f383"; } + +.fa-cloudsmith:before { + content: "\f384"; } + +.fa-cloudversify:before { + content: "\f385"; } + +.fa-cmplid:before { + content: "\e360"; } + +.fa-codepen:before { + content: "\f1cb"; } + +.fa-codiepie:before { + content: "\f284"; } + +.fa-confluence:before { + content: "\f78d"; } + +.fa-connectdevelop:before { + content: "\f20e"; } + +.fa-contao:before { + content: "\f26d"; } + +.fa-cotton-bureau:before { + content: "\f89e"; } + +.fa-cpanel:before { + content: "\f388"; } + +.fa-creative-commons:before { + content: "\f25e"; } + +.fa-creative-commons-by:before { + content: "\f4e7"; } + +.fa-creative-commons-nc:before { + content: "\f4e8"; } + +.fa-creative-commons-nc-eu:before { + content: "\f4e9"; } + +.fa-creative-commons-nc-jp:before { + content: "\f4ea"; } + +.fa-creative-commons-nd:before { + content: "\f4eb"; } + +.fa-creative-commons-pd:before { + content: "\f4ec"; } + +.fa-creative-commons-pd-alt:before { + content: "\f4ed"; } + +.fa-creative-commons-remix:before { + content: "\f4ee"; } + +.fa-creative-commons-sa:before { + content: "\f4ef"; } + +.fa-creative-commons-sampling:before { + content: "\f4f0"; } + +.fa-creative-commons-sampling-plus:before { + content: "\f4f1"; } + +.fa-creative-commons-share:before { + content: "\f4f2"; } + +.fa-creative-commons-zero:before { + content: "\f4f3"; } + +.fa-critical-role:before { + content: "\f6c9"; } + +.fa-css3:before { + content: "\f13c"; } + +.fa-css3-alt:before { + content: "\f38b"; } + +.fa-cuttlefish:before { + content: "\f38c"; } + +.fa-d-and-d:before { + content: "\f38d"; } + +.fa-d-and-d-beyond:before { + content: "\f6ca"; } + +.fa-dailymotion:before { + content: "\e052"; } + +.fa-dashcube:before { + content: "\f210"; } + +.fa-deezer:before { + content: "\e077"; } + +.fa-delicious:before { + content: "\f1a5"; } + +.fa-deploydog:before { + content: "\f38e"; } + +.fa-deskpro:before { + content: "\f38f"; } + +.fa-dev:before { + content: "\f6cc"; } + +.fa-deviantart:before { + content: "\f1bd"; } + +.fa-dhl:before { + content: "\f790"; } + +.fa-diaspora:before { + content: "\f791"; } + +.fa-digg:before { + content: "\f1a6"; } + +.fa-digital-ocean:before { + content: "\f391"; } + +.fa-discord:before { + content: "\f392"; } + +.fa-discourse:before { + content: "\f393"; } + +.fa-dochub:before { + content: "\f394"; } + +.fa-docker:before { + content: "\f395"; } + +.fa-draft2digital:before { + content: "\f396"; } + +.fa-dribbble:before { + content: "\f17d"; } + +.fa-dribbble-square:before { + content: "\f397"; } + +.fa-dropbox:before { + content: "\f16b"; } + +.fa-drupal:before { + content: "\f1a9"; } + +.fa-dyalog:before { + content: "\f399"; } + +.fa-earlybirds:before { + content: "\f39a"; } + +.fa-ebay:before { + content: "\f4f4"; } + +.fa-edge:before { + content: "\f282"; } + +.fa-edge-legacy:before { + content: "\e078"; } + +.fa-elementor:before { + content: "\f430"; } + +.fa-ello:before { + content: "\f5f1"; } + +.fa-ember:before { + content: "\f423"; } + +.fa-empire:before { + content: "\f1d1"; } + +.fa-envira:before { + content: "\f299"; } + +.fa-erlang:before { + content: "\f39d"; } + +.fa-ethereum:before { + content: "\f42e"; } + +.fa-etsy:before { + content: "\f2d7"; } + +.fa-evernote:before { + content: "\f839"; } + +.fa-expeditedssl:before { + content: "\f23e"; } + +.fa-facebook:before { + content: "\f09a"; } + +.fa-facebook-f:before { + content: "\f39e"; } + +.fa-facebook-messenger:before { + content: "\f39f"; } + +.fa-facebook-square:before { + content: "\f082"; } + +.fa-fantasy-flight-games:before { + content: "\f6dc"; } + +.fa-fedex:before { + content: "\f797"; } + +.fa-fedora:before { + content: "\f798"; } + +.fa-figma:before { + content: "\f799"; } + +.fa-firefox:before { + content: "\f269"; } + +.fa-firefox-browser:before { + content: "\e007"; } + +.fa-first-order:before { + content: "\f2b0"; } + +.fa-first-order-alt:before { + content: "\f50a"; } + +.fa-firstdraft:before { + content: "\f3a1"; } + +.fa-flickr:before { + content: "\f16e"; } + +.fa-flipboard:before { + content: "\f44d"; } + +.fa-fly:before { + content: "\f417"; } + +.fa-font-awesome:before { + content: "\f2b4"; } + +.fa-font-awesome-flag:before { + content: "\f2b4"; } + +.fa-font-awesome-logo-full:before { + content: "\f2b4"; } + +.fa-fonticons:before { + content: "\f280"; } + +.fa-fonticons-fi:before { + content: "\f3a2"; } + +.fa-fort-awesome:before { + content: "\f286"; } + +.fa-fort-awesome-alt:before { + content: "\f3a3"; } + +.fa-forumbee:before { + content: "\f211"; } + +.fa-foursquare:before { + content: "\f180"; } + +.fa-free-code-camp:before { + content: "\f2c5"; } + +.fa-freebsd:before { + content: "\f3a4"; } + +.fa-fulcrum:before { + content: "\f50b"; } + +.fa-galactic-republic:before { + content: "\f50c"; } + +.fa-galactic-senate:before { + content: "\f50d"; } + +.fa-get-pocket:before { + content: "\f265"; } + +.fa-gg:before { + content: "\f260"; } + +.fa-gg-circle:before { + content: "\f261"; } + +.fa-git:before { + content: "\f1d3"; } + +.fa-git-alt:before { + content: "\f841"; } + +.fa-git-square:before { + content: "\f1d2"; } + +.fa-github:before { + content: "\f09b"; } + +.fa-github-alt:before { + content: "\f113"; } + +.fa-github-square:before { + content: "\f092"; } + +.fa-gitkraken:before { + content: "\f3a6"; } + +.fa-gitlab:before { + content: "\f296"; } + +.fa-gitter:before { + content: "\f426"; } + +.fa-glide:before { + content: "\f2a5"; } + +.fa-glide-g:before { + content: "\f2a6"; } + +.fa-gofore:before { + content: "\f3a7"; } + +.fa-golang:before { + content: "\e40f"; } + +.fa-goodreads:before { + content: "\f3a8"; } + +.fa-goodreads-g:before { + content: "\f3a9"; } + +.fa-google:before { + content: "\f1a0"; } + +.fa-google-drive:before { + content: "\f3aa"; } + +.fa-google-pay:before { + content: "\e079"; } + +.fa-google-play:before { + content: "\f3ab"; } + +.fa-google-plus:before { + content: "\f2b3"; } + +.fa-google-plus-g:before { + content: "\f0d5"; } + +.fa-google-plus-square:before { + content: "\f0d4"; } + +.fa-google-wallet:before { + content: "\f1ee"; } + +.fa-gratipay:before { + content: "\f184"; } + +.fa-grav:before { + content: "\f2d6"; } + +.fa-gripfire:before { + content: "\f3ac"; } + +.fa-grunt:before { + content: "\f3ad"; } + +.fa-guilded:before { + content: "\e07e"; } + +.fa-gulp:before { + content: "\f3ae"; } + +.fa-hacker-news:before { + content: "\f1d4"; } + +.fa-hacker-news-square:before { + content: "\f3af"; } + +.fa-hackerrank:before { + content: "\f5f7"; } + +.fa-hashnode:before { + content: "\e499"; } + +.fa-hips:before { + content: "\f452"; } + +.fa-hire-a-helper:before { + content: "\f3b0"; } + +.fa-hive:before { + content: "\e07f"; } + +.fa-hooli:before { + content: "\f427"; } + +.fa-hornbill:before { + content: "\f592"; } + +.fa-hotjar:before { + content: "\f3b1"; } + +.fa-houzz:before { + content: "\f27c"; } + +.fa-html5:before { + content: "\f13b"; } + +.fa-hubspot:before { + content: "\f3b2"; } + +.fa-ideal:before { + content: "\e013"; } + +.fa-imdb:before { + content: "\f2d8"; } + +.fa-instagram:before { + content: "\f16d"; } + +.fa-instagram-square:before { + content: "\e055"; } + +.fa-instalod:before { + content: "\e081"; } + +.fa-intercom:before { + content: "\f7af"; } + +.fa-internet-explorer:before { + content: "\f26b"; } + +.fa-invision:before { + content: "\f7b0"; } + +.fa-ioxhost:before { + content: "\f208"; } + +.fa-itch-io:before { + content: "\f83a"; } + +.fa-itunes:before { + content: "\f3b4"; } + +.fa-itunes-note:before { + content: "\f3b5"; } + +.fa-java:before { + content: "\f4e4"; } + +.fa-jedi-order:before { + content: "\f50e"; } + +.fa-jenkins:before { + content: "\f3b6"; } + +.fa-jira:before { + content: "\f7b1"; } + +.fa-joget:before { + content: "\f3b7"; } + +.fa-joomla:before { + content: "\f1aa"; } + +.fa-js:before { + content: "\f3b8"; } + +.fa-js-square:before { + content: "\f3b9"; } + +.fa-jsfiddle:before { + content: "\f1cc"; } + +.fa-kaggle:before { + content: "\f5fa"; } + +.fa-keybase:before { + content: "\f4f5"; } + +.fa-keycdn:before { + content: "\f3ba"; } + +.fa-kickstarter:before { + content: "\f3bb"; } + +.fa-kickstarter-k:before { + content: "\f3bc"; } + +.fa-korvue:before { + content: "\f42f"; } + +.fa-laravel:before { + content: "\f3bd"; } + +.fa-lastfm:before { + content: "\f202"; } + +.fa-lastfm-square:before { + content: "\f203"; } + +.fa-leanpub:before { + content: "\f212"; } + +.fa-less:before { + content: "\f41d"; } + +.fa-line:before { + content: "\f3c0"; } + +.fa-linkedin:before { + content: "\f08c"; } + +.fa-linkedin-in:before { + content: "\f0e1"; } + +.fa-linode:before { + content: "\f2b8"; } + +.fa-linux:before { + content: "\f17c"; } + +.fa-lyft:before { + content: "\f3c3"; } + +.fa-magento:before { + content: "\f3c4"; } + +.fa-mailchimp:before { + content: "\f59e"; } + +.fa-mandalorian:before { + content: "\f50f"; } + +.fa-markdown:before { + content: "\f60f"; } + +.fa-mastodon:before { + content: "\f4f6"; } + +.fa-maxcdn:before { + content: "\f136"; } + +.fa-mdb:before { + content: "\f8ca"; } + +.fa-medapps:before { + content: "\f3c6"; } + +.fa-medium:before { + content: "\f23a"; } + +.fa-medium-m:before { + content: "\f23a"; } + +.fa-medrt:before { + content: "\f3c8"; } + +.fa-meetup:before { + content: "\f2e0"; } + +.fa-megaport:before { + content: "\f5a3"; } + +.fa-mendeley:before { + content: "\f7b3"; } + +.fa-microblog:before { + content: "\e01a"; } + +.fa-microsoft:before { + content: "\f3ca"; } + +.fa-mix:before { + content: "\f3cb"; } + +.fa-mixcloud:before { + content: "\f289"; } + +.fa-mixer:before { + content: "\e056"; } + +.fa-mizuni:before { + content: "\f3cc"; } + +.fa-modx:before { + content: "\f285"; } + +.fa-monero:before { + content: "\f3d0"; } + +.fa-napster:before { + content: "\f3d2"; } + +.fa-neos:before { + content: "\f612"; } + +.fa-nfc-directional:before { + content: "\e530"; } + +.fa-nfc-symbol:before { + content: "\e531"; } + +.fa-nimblr:before { + content: "\f5a8"; } + +.fa-node:before { + content: "\f419"; } + +.fa-node-js:before { + content: "\f3d3"; } + +.fa-npm:before { + content: "\f3d4"; } + +.fa-ns8:before { + content: "\f3d5"; } + +.fa-nutritionix:before { + content: "\f3d6"; } + +.fa-octopus-deploy:before { + content: "\e082"; } + +.fa-odnoklassniki:before { + content: "\f263"; } + +.fa-odnoklassniki-square:before { + content: "\f264"; } + +.fa-old-republic:before { + content: "\f510"; } + +.fa-opencart:before { + content: "\f23d"; } + +.fa-openid:before { + content: "\f19b"; } + +.fa-opera:before { + content: "\f26a"; } + +.fa-optin-monster:before { + content: "\f23c"; } + +.fa-orcid:before { + content: "\f8d2"; } + +.fa-osi:before { + content: "\f41a"; } + +.fa-padlet:before { + content: "\e4a0"; } + +.fa-page4:before { + content: "\f3d7"; } + +.fa-pagelines:before { + content: "\f18c"; } + +.fa-palfed:before { + content: "\f3d8"; } + +.fa-patreon:before { + content: "\f3d9"; } + +.fa-paypal:before { + content: "\f1ed"; } + +.fa-perbyte:before { + content: "\e083"; } + +.fa-periscope:before { + content: "\f3da"; } + +.fa-phabricator:before { + content: "\f3db"; } + +.fa-phoenix-framework:before { + content: "\f3dc"; } + +.fa-phoenix-squadron:before { + content: "\f511"; } + +.fa-php:before { + content: "\f457"; } + +.fa-pied-piper:before { + content: "\f2ae"; } + +.fa-pied-piper-alt:before { + content: "\f1a8"; } + +.fa-pied-piper-hat:before { + content: "\f4e5"; } + +.fa-pied-piper-pp:before { + content: "\f1a7"; } + +.fa-pied-piper-square:before { + content: "\e01e"; } + +.fa-pinterest:before { + content: "\f0d2"; } + +.fa-pinterest-p:before { + content: "\f231"; } + +.fa-pinterest-square:before { + content: "\f0d3"; } + +.fa-pix:before { + content: "\e43a"; } + +.fa-playstation:before { + content: "\f3df"; } + +.fa-product-hunt:before { + content: "\f288"; } + +.fa-pushed:before { + content: "\f3e1"; } + +.fa-python:before { + content: "\f3e2"; } + +.fa-qq:before { + content: "\f1d6"; } + +.fa-quinscape:before { + content: "\f459"; } + +.fa-quora:before { + content: "\f2c4"; } + +.fa-r-project:before { + content: "\f4f7"; } + +.fa-raspberry-pi:before { + content: "\f7bb"; } + +.fa-ravelry:before { + content: "\f2d9"; } + +.fa-react:before { + content: "\f41b"; } + +.fa-reacteurope:before { + content: "\f75d"; } + +.fa-readme:before { + content: "\f4d5"; } + +.fa-rebel:before { + content: "\f1d0"; } + +.fa-red-river:before { + content: "\f3e3"; } + +.fa-reddit:before { + content: "\f1a1"; } + +.fa-reddit-alien:before { + content: "\f281"; } + +.fa-reddit-square:before { + content: "\f1a2"; } + +.fa-redhat:before { + content: "\f7bc"; } + +.fa-renren:before { + content: "\f18b"; } + +.fa-replyd:before { + content: "\f3e6"; } + +.fa-researchgate:before { + content: "\f4f8"; } + +.fa-resolving:before { + content: "\f3e7"; } + +.fa-rev:before { + content: "\f5b2"; } + +.fa-rocketchat:before { + content: "\f3e8"; } + +.fa-rockrms:before { + content: "\f3e9"; } + +.fa-rust:before { + content: "\e07a"; } + +.fa-safari:before { + content: "\f267"; } + +.fa-salesforce:before { + content: "\f83b"; } + +.fa-sass:before { + content: "\f41e"; } + +.fa-schlix:before { + content: "\f3ea"; } + +.fa-screenpal:before { + content: "\e570"; } + +.fa-scribd:before { + content: "\f28a"; } + +.fa-searchengin:before { + content: "\f3eb"; } + +.fa-sellcast:before { + content: "\f2da"; } + +.fa-sellsy:before { + content: "\f213"; } + +.fa-servicestack:before { + content: "\f3ec"; } + +.fa-shirtsinbulk:before { + content: "\f214"; } + +.fa-shopify:before { + content: "\e057"; } + +.fa-shopware:before { + content: "\f5b5"; } + +.fa-simplybuilt:before { + content: "\f215"; } + +.fa-sistrix:before { + content: "\f3ee"; } + +.fa-sith:before { + content: "\f512"; } + +.fa-sitrox:before { + content: "\e44a"; } + +.fa-sketch:before { + content: "\f7c6"; } + +.fa-skyatlas:before { + content: "\f216"; } + +.fa-skype:before { + content: "\f17e"; } + +.fa-slack:before { + content: "\f198"; } + +.fa-slack-hash:before { + content: "\f198"; } + +.fa-slideshare:before { + content: "\f1e7"; } + +.fa-snapchat:before { + content: "\f2ab"; } + +.fa-snapchat-ghost:before { + content: "\f2ab"; } + +.fa-snapchat-square:before { + content: "\f2ad"; } + +.fa-soundcloud:before { + content: "\f1be"; } + +.fa-sourcetree:before { + content: "\f7d3"; } + +.fa-speakap:before { + content: "\f3f3"; } + +.fa-speaker-deck:before { + content: "\f83c"; } + +.fa-spotify:before { + content: "\f1bc"; } + +.fa-square-font-awesome:before { + content: "\f425"; } + +.fa-square-font-awesome-stroke:before { + content: "\f35c"; } + +.fa-font-awesome-alt:before { + content: "\f35c"; } + +.fa-squarespace:before { + content: "\f5be"; } + +.fa-stack-exchange:before { + content: "\f18d"; } + +.fa-stack-overflow:before { + content: "\f16c"; } + +.fa-stackpath:before { + content: "\f842"; } + +.fa-staylinked:before { + content: "\f3f5"; } + +.fa-steam:before { + content: "\f1b6"; } + +.fa-steam-square:before { + content: "\f1b7"; } + +.fa-steam-symbol:before { + content: "\f3f6"; } + +.fa-sticker-mule:before { + content: "\f3f7"; } + +.fa-strava:before { + content: "\f428"; } + +.fa-stripe:before { + content: "\f429"; } + +.fa-stripe-s:before { + content: "\f42a"; } + +.fa-studiovinari:before { + content: "\f3f8"; } + +.fa-stumbleupon:before { + content: "\f1a4"; } + +.fa-stumbleupon-circle:before { + content: "\f1a3"; } + +.fa-superpowers:before { + content: "\f2dd"; } + +.fa-supple:before { + content: "\f3f9"; } + +.fa-suse:before { + content: "\f7d6"; } + +.fa-swift:before { + content: "\f8e1"; } + +.fa-symfony:before { + content: "\f83d"; } + +.fa-teamspeak:before { + content: "\f4f9"; } + +.fa-telegram:before { + content: "\f2c6"; } + +.fa-telegram-plane:before { + content: "\f2c6"; } + +.fa-tencent-weibo:before { + content: "\f1d5"; } + +.fa-the-red-yeti:before { + content: "\f69d"; } + +.fa-themeco:before { + content: "\f5c6"; } + +.fa-themeisle:before { + content: "\f2b2"; } + +.fa-think-peaks:before { + content: "\f731"; } + +.fa-tiktok:before { + content: "\e07b"; } + +.fa-trade-federation:before { + content: "\f513"; } + +.fa-trello:before { + content: "\f181"; } + +.fa-tumblr:before { + content: "\f173"; } + +.fa-tumblr-square:before { + content: "\f174"; } + +.fa-twitch:before { + content: "\f1e8"; } + +.fa-twitter:before { + content: "\f099"; } + +.fa-twitter-square:before { + content: "\f081"; } + +.fa-typo3:before { + content: "\f42b"; } + +.fa-uber:before { + content: "\f402"; } + +.fa-ubuntu:before { + content: "\f7df"; } + +.fa-uikit:before { + content: "\f403"; } + +.fa-umbraco:before { + content: "\f8e8"; } + +.fa-uncharted:before { + content: "\e084"; } + +.fa-uniregistry:before { + content: "\f404"; } + +.fa-unity:before { + content: "\e049"; } + +.fa-unsplash:before { + content: "\e07c"; } + +.fa-untappd:before { + content: "\f405"; } + +.fa-ups:before { + content: "\f7e0"; } + +.fa-usb:before { + content: "\f287"; } + +.fa-usps:before { + content: "\f7e1"; } + +.fa-ussunnah:before { + content: "\f407"; } + +.fa-vaadin:before { + content: "\f408"; } + +.fa-viacoin:before { + content: "\f237"; } + +.fa-viadeo:before { + content: "\f2a9"; } + +.fa-viadeo-square:before { + content: "\f2aa"; } + +.fa-viber:before { + content: "\f409"; } + +.fa-vimeo:before { + content: "\f40a"; } + +.fa-vimeo-square:before { + content: "\f194"; } + +.fa-vimeo-v:before { + content: "\f27d"; } + +.fa-vine:before { + content: "\f1ca"; } + +.fa-vk:before { + content: "\f189"; } + +.fa-vnv:before { + content: "\f40b"; } + +.fa-vuejs:before { + content: "\f41f"; } + +.fa-watchman-monitoring:before { + content: "\e087"; } + +.fa-waze:before { + content: "\f83f"; } + +.fa-weebly:before { + content: "\f5cc"; } + +.fa-weibo:before { + content: "\f18a"; } + +.fa-weixin:before { + content: "\f1d7"; } + +.fa-whatsapp:before { + content: "\f232"; } + +.fa-whatsapp-square:before { + content: "\f40c"; } + +.fa-whmcs:before { + content: "\f40d"; } + +.fa-wikipedia-w:before { + content: "\f266"; } + +.fa-windows:before { + content: "\f17a"; } + +.fa-wirsindhandwerk:before { + content: "\e2d0"; } + +.fa-wsh:before { + content: "\e2d0"; } + +.fa-wix:before { + content: "\f5cf"; } + +.fa-wizards-of-the-coast:before { + content: "\f730"; } + +.fa-wodu:before { + content: "\e088"; } + +.fa-wolf-pack-battalion:before { + content: "\f514"; } + +.fa-wordpress:before { + content: "\f19a"; } + +.fa-wordpress-simple:before { + content: "\f411"; } + +.fa-wpbeginner:before { + content: "\f297"; } + +.fa-wpexplorer:before { + content: "\f2de"; } + +.fa-wpforms:before { + content: "\f298"; } + +.fa-wpressr:before { + content: "\f3e4"; } + +.fa-xbox:before { + content: "\f412"; } + +.fa-xing:before { + content: "\f168"; } + +.fa-xing-square:before { + content: "\f169"; } + +.fa-y-combinator:before { + content: "\f23b"; } + +.fa-yahoo:before { + content: "\f19e"; } + +.fa-yammer:before { + content: "\f840"; } + +.fa-yandex:before { + content: "\f413"; } + +.fa-yandex-international:before { + content: "\f414"; } + +.fa-yarn:before { + content: "\f7e3"; } + +.fa-yelp:before { + content: "\f1e9"; } + +.fa-yoast:before { + content: "\f2b1"; } + +.fa-youtube:before { + content: "\f167"; } + +.fa-youtube-square:before { + content: "\f431"; } + +.fa-zhihu:before { + content: "\f63f"; } +:root, :host { + --fa-font-regular: normal 400 1em/1 "Font Awesome 6 Free"; } + +@font-face { + font-family: 'Font Awesome 6 Free'; + font-style: normal; + font-weight: 400; + font-display: block; + src: url("../webfonts/fa-regular-400.woff2") format("woff2"), url("../webfonts/fa-regular-400.ttf") format("truetype"); } + +.far, +.fa-regular { + font-family: 'Font Awesome 6 Free'; + font-weight: 400; } +:root, :host { + --fa-font-solid: normal 900 1em/1 "Font Awesome 6 Free"; } + +@font-face { + font-family: 'Font Awesome 6 Free'; + font-style: normal; + font-weight: 900; + font-display: block; + src: url("../webfonts/fa-solid-900.woff2") format("woff2"), url("../webfonts/fa-solid-900.ttf") format("truetype"); } + +.fas, +.fa-solid { + font-family: 'Font Awesome 6 Free'; + font-weight: 900; } +@font-face { + font-family: "Font Awesome 5 Brands"; + font-display: block; + font-weight: 400; + src: url("../webfonts/fa-brands-400.woff2") format("woff2"), url("../webfonts/fa-brands-400.ttf") format("truetype"); } + +@font-face { + font-family: "Font Awesome 5 Free"; + font-display: block; + font-weight: 900; + src: url("../webfonts/fa-solid-900.woff2") format("woff2"), url("../webfonts/fa-solid-900.ttf") format("truetype"); } + +@font-face { + font-family: "Font Awesome 5 Free"; + font-display: block; + font-weight: 400; + src: url("../webfonts/fa-regular-400.woff2") format("woff2"), url("../webfonts/fa-regular-400.ttf") format("truetype"); } +@font-face { + font-family: "FontAwesome"; + font-display: block; + src: url("../webfonts/fa-solid-900.woff2") format("woff2"), url("../webfonts/fa-solid-900.ttf") format("truetype"); } + +@font-face { + font-family: "FontAwesome"; + font-display: block; + src: url("../webfonts/fa-brands-400.woff2") format("woff2"), url("../webfonts/fa-brands-400.ttf") format("truetype"); } + +@font-face { + font-family: "FontAwesome"; + font-display: block; + src: url("../webfonts/fa-regular-400.woff2") format("woff2"), url("../webfonts/fa-regular-400.ttf") format("truetype"); + unicode-range: U+F003,U+F006,U+F014,U+F016-F017,U+F01A-F01B,U+F01D,U+F022,U+F03E,U+F044,U+F046,U+F05C-F05D,U+F06E,U+F070,U+F087-F088,U+F08A,U+F094,U+F096-F097,U+F09D,U+F0A0,U+F0A2,U+F0A4-F0A7,U+F0C5,U+F0C7,U+F0E5-F0E6,U+F0EB,U+F0F6-F0F8,U+F10C,U+F114-F115,U+F118-F11A,U+F11C-F11D,U+F133,U+F147,U+F14E,U+F150-F152,U+F185-F186,U+F18E,U+F190-F192,U+F196,U+F1C1-F1C9,U+F1D9,U+F1DB,U+F1E3,U+F1EA,U+F1F7,U+F1F9,U+F20A,U+F247-F248,U+F24A,U+F24D,U+F255-F25B,U+F25D,U+F271-F274,U+F278,U+F27B,U+F28C,U+F28E,U+F29C,U+F2B5,U+F2B7,U+F2BA,U+F2BC,U+F2BE,U+F2C0-F2C1,U+F2C3,U+F2D0,U+F2D2,U+F2D4,U+F2DC; } + +@font-face { + font-family: "FontAwesome"; + font-display: block; + src: url("../webfonts/fa-v4compatibility.woff2") format("woff2"), url("../webfonts/fa-v4compatibility.ttf") format("truetype"); + unicode-range: U+F041,U+F047,U+F065-F066,U+F07D-F07E,U+F080,U+F08B,U+F08E,U+F090,U+F09A,U+F0AC,U+F0AE,U+F0B2,U+F0D0,U+F0D6,U+F0E4,U+F0EC,U+F10A-F10B,U+F123,U+F13E,U+F148-F149,U+F14C,U+F156,U+F15E,U+F160-F161,U+F163,U+F175-F178,U+F195,U+F1F8,U+F219,U+F250,U+F252,U+F27A; } diff --git a/_extensions/quarto-ext/fontawesome/assets/css/latex-fontsize.css b/_extensions/quarto-ext/fontawesome/assets/css/latex-fontsize.css new file mode 100644 index 00000000..45545ecf --- /dev/null +++ b/_extensions/quarto-ext/fontawesome/assets/css/latex-fontsize.css @@ -0,0 +1,30 @@ +.fa-tiny { + font-size: 0.5em; +} +.fa-scriptsize { + font-size: 0.7em; +} +.fa-footnotesize { + font-size: 0.8em; +} +.fa-small { + font-size: 0.9em; +} +.fa-normalsize { + font-size: 1em; +} +.fa-large { + font-size: 1.2em; +} +.fa-Large { + font-size: 1.5em; +} +.fa-LARGE { + font-size: 1.75em; +} +.fa-huge { + font-size: 2em; +} +.fa-Huge { + font-size: 2.5em; +} diff --git a/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-brands-400.ttf b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-brands-400.ttf new file mode 100644 index 00000000..430a02ed Binary files /dev/null and b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-brands-400.ttf differ diff --git a/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-brands-400.woff2 b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-brands-400.woff2 new file mode 100644 index 00000000..4d904aab Binary files /dev/null and b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-brands-400.woff2 differ diff --git a/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-regular-400.ttf b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-regular-400.ttf new file mode 100644 index 00000000..23e3febe Binary files /dev/null and b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-regular-400.ttf differ diff --git a/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-regular-400.woff2 b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-regular-400.woff2 new file mode 100644 index 00000000..80e3b124 Binary files /dev/null and b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-regular-400.woff2 differ diff --git a/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-solid-900.ttf b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-solid-900.ttf new file mode 100644 index 00000000..da908242 Binary files /dev/null and b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-solid-900.ttf differ diff --git a/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-solid-900.woff2 b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-solid-900.woff2 new file mode 100644 index 00000000..360ba115 Binary files /dev/null and b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-solid-900.woff2 differ diff --git a/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-v4compatibility.ttf b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-v4compatibility.ttf new file mode 100644 index 00000000..e9545ed5 Binary files /dev/null and b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-v4compatibility.ttf differ diff --git a/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-v4compatibility.woff2 b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-v4compatibility.woff2 new file mode 100644 index 00000000..db5b0b99 Binary files /dev/null and b/_extensions/quarto-ext/fontawesome/assets/webfonts/fa-v4compatibility.woff2 differ diff --git a/_extensions/quarto-ext/fontawesome/fontawesome.lua b/_extensions/quarto-ext/fontawesome/fontawesome.lua new file mode 100644 index 00000000..ff64dcaa --- /dev/null +++ b/_extensions/quarto-ext/fontawesome/fontawesome.lua @@ -0,0 +1,84 @@ +local function ensureLatexDeps() + quarto.doc.use_latex_package("fontawesome5") +end + +local function ensureHtmlDeps() + quarto.doc.add_html_dependency({ + name = 'fontawesome6', + version = '0.1.0', + stylesheets = {'assets/css/all.css', 'assets/css/latex-fontsize.css'} + }) +end + +local function isEmpty(s) + return s == nil or s == '' +end + +local function isValidSize(size) + local validSizes = { + "tiny", + "scriptsize", + "footnotesize", + "small", + "normalsize", + "large", + "Large", + "LARGE", + "huge", + "Huge" + } + for _, v in ipairs(validSizes) do + if v == size then + return size + end + end + return "" +end + +return { + ["fa"] = function(args, kwargs) + + local group = "solid" + local icon = pandoc.utils.stringify(args[1]) + if #args > 1 then + group = icon + icon = pandoc.utils.stringify(args[2]) + end + + local title = pandoc.utils.stringify(kwargs["title"]) + if not isEmpty(title) then + title = " title=\"" .. title .. "\"" + end + + local label = pandoc.utils.stringify(kwargs["label"]) + if isEmpty(label) then + label = " aria-label=\"" .. icon .. "\"" + else + label = " aria-label=\"" .. label .. "\"" + end + + local size = pandoc.utils.stringify(kwargs["size"]) + + -- detect html (excluding epub which won't handle fa) + if quarto.doc.is_format("html:js") then + ensureHtmlDeps() + if not isEmpty(size) then + size = " fa-" .. size + end + return pandoc.RawInline( + 'html', + "" + ) + -- detect pdf / beamer / latex / etc + elseif quarto.doc.is_format("pdf") then + ensureLatexDeps() + if isEmpty(isValidSize(size)) then + return pandoc.RawInline('tex', "\\faIcon{" .. icon .. "}") + else + return pandoc.RawInline('tex', "{\\" .. size .. "\\faIcon{" .. icon .. "}}") + end + else + return pandoc.Null() + end + end +} diff --git a/_freeze/find/all/index/execute-results/html.json b/_freeze/find/all/index/execute-results/html.json new file mode 100644 index 00000000..a584cdf3 --- /dev/null +++ b/_freeze/find/all/index/execute-results/html.json @@ -0,0 +1,18 @@ +{ + "hash": "0b4b66eb1f04be2653f28b5f21037050", + "result": { + "markdown": "---\ntitle: \"Search all of tidymodels\"\ntoc: true\ntoc-depth: 0\ninclude-after-body: ../../resources.html\n---\n\n\n\nHere are all the functions available across all of the tidymodels packages. Click on the link in the topic column to find the relevant reference documentation.\n\n\n\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": { + "include-in-header": [ + "\n\n\n\n\n\n\n\n\n\n\n\n\n" + ] + }, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/find/broom/index/execute-results/html.json b/_freeze/find/broom/index/execute-results/html.json new file mode 100644 index 00000000..76059c35 --- /dev/null +++ b/_freeze/find/broom/index/execute-results/html.json @@ -0,0 +1,18 @@ +{ + "hash": "d9ba8ac914f98666a49eb29c39c49db6", + "result": { + "markdown": "---\nsubtitle: Broom\ntitle: Search broom methods\nweight: 3\ndescription: | \n Find `tidy()`, `augment()`, and `glance()` methods for different objects.\ntoc: true\ntoc-depth: 0\ninclude-after-body: ../../resources.html\n---\n\n\nHere are all the broom functions available across CRAN packages. Click on the link in the topic column to find more information.\n\n\n\n\n\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": { + "include-in-header": [ + "\n\n\n\n\n\n\n\n\n\n\n\n\n" + ] + }, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/find/parsnip/index/execute-results/html.json b/_freeze/find/parsnip/index/execute-results/html.json new file mode 100644 index 00000000..b456132e --- /dev/null +++ b/_freeze/find/parsnip/index/execute-results/html.json @@ -0,0 +1,18 @@ +{ + "hash": "52d629718a3b103e78f395cf3405d546", + "result": { + "markdown": "---\ntitle: Search parsnip models\nweight: 2\ndescription: | \n Find model types, engines, and arguments to fit and predict in the tidymodels framework.\ntoc: true\ntoc-depth: 0\ninclude-after-body: ../../resources.html\n---\n\n\nTo learn about the parsnip package, see [*Get Started: Build a Model*](/start/models/). Use the tables below to find [model types and engines](#models).\n\n\n\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": { + "include-in-header": [ + "\n\n\n\n\n\n\n\n\n\n\n\n\n" + ] + }, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/find/recipes/index/execute-results/html.json b/_freeze/find/recipes/index/execute-results/html.json new file mode 100644 index 00000000..d05fb9de --- /dev/null +++ b/_freeze/find/recipes/index/execute-results/html.json @@ -0,0 +1,18 @@ +{ + "hash": "adaa35caeb3f6c636e0fe482a16cacfa", + "result": { + "markdown": "---\nsubtitle: Recipes\ntitle: Search recipe steps\nweight: 3\ndescription: | \n Find recipe steps in the tidymodels framework to help you prep your data for modeling.\ntoc: true\ntoc-depth: 0\ninclude-after-body: ../../resources.html\n---\n\n\n\nTo learn about the recipes package, see [*Get Started: Preprocess your data with recipes*](/start/recipes/). The table below allows you to search for recipe steps across tidymodels packages.\n\n\n\n\n\n::: {.cell}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": { + "include-in-header": [ + "\n\n\n\n\n\n\n\n\n\n\n\n\n" + ] + }, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/develop/broom/index/execute-results/html.json b/_freeze/learn/develop/broom/index/execute-results/html.json new file mode 100644 index 00000000..155c360a --- /dev/null +++ b/_freeze/learn/develop/broom/index/execute-results/html.json @@ -0,0 +1,18 @@ +{ + "hash": "e2cda3d9c85af617e409adc2a978068c", + "result": { + "markdown": "---\ntitle: \"Create your own broom tidier methods\"\ncategories:\n - developer tools\ntype: learn-subsection\nweight: 5\ndescription: | \n Write tidy(), glance(), and augment() methods for new model objects.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: generics, tidymodels, tidyverse, and usethis.\n\nThe broom package provides tools to summarize key information about models in tidy `tibble()`s. The package provides three verbs, or \"tidiers,\" to help make model objects easier to work with:\n\n* `tidy()` summarizes information about model components\n* `glance()` reports information about the entire model\n* `augment()` adds information about observations to a dataset\n\nEach of the three verbs above are _generic_, in that they do not define a procedure to tidy a given model object, but instead redirect to the relevant _method_ implemented to tidy a specific type of model object. The broom package provides methods for model objects from over 100 modeling packages along with nearly all of the model objects in the stats package that comes with base R. However, for maintainability purposes, the broom package authors now ask that requests for new methods be first directed to the parent package (i.e. the package that supplies the model object) rather than to broom. New methods will generally only be integrated into broom in the case that the requester has already asked the maintainers of the model-owning package to implement tidier methods in the parent package.\n\nWe'd like to make implementing external tidier methods as painless as possible. The general process for doing so is:\n\n* re-export the tidier generics\n* implement tidying methods\n* document the new methods\n\nIn this article, we'll walk through each of the above steps in detail, giving examples and pointing out helpful functions when possible.\n\n## Re-export the tidier generics\n\nThe first step is to re-export the generic functions for `tidy()`, `glance()`, and/or `augment()`. You could do so from `broom` itself, but we've provided an alternative, much lighter dependency called `generics`.\n\nFirst you'll need to add the [generics](https://github.com/r-lib/generics) package to `Imports`. We recommend using the [usethis](https://github.com/r-lib/usethis) package for this:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_9890cbc4ec42f61834efb425089f40ef'}\n\n```{.r .cell-code}\nusethis::use_package(\"generics\", \"Imports\")\n```\n:::\n\n\nNext, you'll need to re-export the appropriate tidying methods. If you plan to implement a `glance()` method, for example, you can re-export the `glance()` generic by adding the following somewhere inside the `/R` folder of your package:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-4_38cd34fad95df0770cbd636f223a96e7'}\n\n```{.r .cell-code}\n#' @importFrom generics glance\n#' @export\ngenerics::glance\n```\n:::\n\n\nOftentimes it doesn't make sense to define one or more of these methods for a particular model. In this case, only implement the methods that do make sense.\n\n::: {.callout-warning}\n Please do not define `tidy()`, `glance()`, or `augment()` generics in your package. This will result in namespace conflicts whenever your package is used along other packages that also export tidying methods. \n:::\n\n## Implement tidying methods\n\nYou'll now need to implement specific tidying methods for each of the generics you've re-exported in the above step. For each of `tidy()`, `glance()`, and `augment()`, we'll walk through the big picture, an example, and helpful resources.\n\nIn this article, we'll use the base R dataset `trees`, giving the tree girth (in inches), height (in feet), and volume (in cubic feet), to fit an example linear model using the base R `lm()` function. \n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-5_14c6f51df99d72397978bc741263b776'}\n\n```{.r .cell-code}\n# load in the trees dataset\ndata(trees)\n\n# take a look!\nstr(trees)\n#> 'data.frame':\t31 obs. of 3 variables:\n#> $ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...\n#> $ Height: num 70 65 63 72 81 83 66 75 80 75 ...\n#> $ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...\n\n# fit the timber volume as a function of girth and height\ntrees_model <- lm(Volume ~ Girth + Height, data = trees)\n```\n:::\n\n\nLet's take a look at the `summary()` of our `trees_model` fit.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-6_4f89f95475d520f5553d536286380800'}\n\n```{.r .cell-code}\nsummary(trees_model)\n#> \n#> Call:\n#> lm(formula = Volume ~ Girth + Height, data = trees)\n#> \n#> Residuals:\n#> Min 1Q Median 3Q Max \n#> -6.4065 -2.6493 -0.2876 2.2003 8.4847 \n#> \n#> Coefficients:\n#> Estimate Std. Error t value Pr(>|t|) \n#> (Intercept) -57.9877 8.6382 -6.713 2.75e-07 ***\n#> Girth 4.7082 0.2643 17.816 < 2e-16 ***\n#> Height 0.3393 0.1302 2.607 0.0145 * \n#> ---\n#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#> \n#> Residual standard error: 3.882 on 28 degrees of freedom\n#> Multiple R-squared: 0.948,\tAdjusted R-squared: 0.9442 \n#> F-statistic: 255 on 2 and 28 DF, p-value: < 2.2e-16\n```\n:::\n\n\nThis output gives some summary statistics on the residuals (which would be described more fully in an `augment()` output), model coefficients (which, in this case, make up the `tidy()` output), and some model-level summarizations such as RSE, $R^2$, etc. (which make up the `glance()` output.)\n\n### Implementing the `tidy()` method\n\nThe `tidy(x, ...)` method will return a tibble where each row contains information about a component of the model. The `x` input is a model object, and the dots (`...`) are an optional argument to supply additional information to any calls inside your method. New `tidy()` methods can take additional arguments, but _must_ include the `x` and `...` arguments to be compatible with the generic function. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).) Examples of model components include regression coefficients (for regression models), clusters (for classification/clustering models), etc. These `tidy()` methods are useful for inspecting model details and creating custom model visualizations.\n\nReturning to the example of our linear model on timber volume, we'd like to extract information on the model components. In this example, the components are the regression coefficients. After taking a look at the model object and its `summary()`, you might notice that you can extract the regression coefficients as follows:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-7_2d06ddc139453420a1f9c3a700076bce'}\n\n```{.r .cell-code}\nsummary(trees_model)$coefficients\n#> Estimate Std. Error t value Pr(>|t|)\n#> (Intercept) -57.9876589 8.6382259 -6.712913 2.749507e-07\n#> Girth 4.7081605 0.2642646 17.816084 8.223304e-17\n#> Height 0.3392512 0.1301512 2.606594 1.449097e-02\n```\n:::\n\n\nThis object contains the model coefficients as a table, where the information giving which coefficient is being described in each row is given in the row names. Converting to a tibble where the row names are contained in a column, you might write:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-8_1c9d567aab67cfe9d789e4d092820760'}\n\n```{.r .cell-code}\ntrees_model_tidy <- summary(trees_model)$coefficients %>% \n as_tibble(rownames = \"term\")\n\ntrees_model_tidy\n#> # A tibble: 3 × 5\n#> term Estimate `Std. Error` `t value` `Pr(>|t|)`\n#> \n#> 1 (Intercept) -58.0 8.64 -6.71 2.75e- 7\n#> 2 Girth 4.71 0.264 17.8 8.22e-17\n#> 3 Height 0.339 0.130 2.61 1.45e- 2\n```\n:::\n\n\nThe broom package standardizes common column names used to describe coefficients. In this case, the column names are:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-9_d3b69cc8d7b0204354a5642b201db1e8'}\n\n```{.r .cell-code}\ncolnames(trees_model_tidy) <- c(\"term\", \"estimate\", \"std.error\", \"statistic\", \"p.value\")\n```\n:::\n\n\nA glossary giving the currently acceptable column names outputted by `tidy()` methods can be found [at the end of this article](#glossary). As a rule of thumb, column names resulting from `tidy()` methods should be all lowercase and contain only alphanumerics or periods (though there are plenty of exceptions).\n\nFinally, it is common for `tidy()` methods to include an option to calculate confidence/credible intervals for each component based on the model, when possible. In this example, the `confint()` function can be used to calculate confidence intervals from a model object resulting from `lm()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-10_21580f34c068d2b20c54019dfd267709'}\n\n```{.r .cell-code}\nconfint(trees_model)\n#> 2.5 % 97.5 %\n#> (Intercept) -75.68226247 -40.2930554\n#> Girth 4.16683899 5.2494820\n#> Height 0.07264863 0.6058538\n```\n:::\n\n\nWith these considerations in mind, a reasonable `tidy()` method for `lm()` might look something like:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-11_f6d9f15ccd6cf42ff842df57e4438e6d'}\n\n```{.r .cell-code}\ntidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {\n \n result <- summary(x)$coefficients %>%\n tibble::as_tibble(rownames = \"term\") %>%\n dplyr::rename(estimate = Estimate,\n std.error = `Std. Error`,\n statistic = `t value`,\n p.value = `Pr(>|t|)`)\n \n if (conf.int) {\n ci <- confint(x, level = conf.level)\n result <- dplyr::left_join(result, ci, by = \"term\")\n }\n \n result\n}\n```\n:::\n\n\n::: {.callout-note}\n If you're interested, the actual `tidy.lm()` source can be found [here](https://github.com/tidymodels/broom/blob/master/R/stats-lm-tidiers.R)! It's not too different from the version above except for some argument checking and additional columns. \n:::\n\nWith this method exported, then, if a user calls `tidy(fit)`, where `fit` is an output from `lm()`, the `tidy()` generic would \"redirect\" the call to the `tidy.lm()` function above.\n\nSome things to keep in mind while writing your `tidy()` method:\n\n* Sometimes a model will have several different types of components. For example, in mixed models, there is different information associated with fixed effects and random effects. Since this information doesn't have the same interpretation, it doesn't make sense to summarize the fixed and random effects in the same table. In cases like this you should add an argument that allows the user to specify which type of information they want. For example, you might implement an interface along the lines of:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-12_ec29c6d574e594c3ef38ee8eaa1897c8'}\n\n```{.r .cell-code}\nmodel <- mixed_model(...)\ntidy(model, effects = \"fixed\")\ntidy(model, effects = \"random\")\n```\n:::\n\n\n* How are missing values encoded in the model object and its `summary()`? Ensure that rows are included even when the associated model component is missing or rank deficient.\n* Are there other measures specific to each component that could reasonably be expected to be included in their summarizations? Some common arguments to `tidy()` methods include:\n - `conf.int`: A logical indicating whether or not to calculate confidence/credible intervals. This should default to `FALSE`.\n - `conf.level`: The confidence level to use for the interval when `conf.int = TRUE`. Typically defaults to `.95`.\n - `exponentiate`: A logical indicating whether or not model terms should be presented on an exponential scale (typical for logistic regression).\n\n### Implementing the `glance()` method\n\n`glance()` returns a one-row tibble providing model-level summarizations (e.g. goodness of fit measures and related statistics). This is useful to check for model misspecification and to compare many models. Again, the `x` input is a model object, and the `...` is an optional argument to supply additional information to any calls inside your method. New `glance()` methods can also take additional arguments and _must_ include the `x` and `...` arguments. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).)\n\nReturning to the `trees_model` example, we could pull out the $R^2$ value with the following code:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-13_2c1d97c34148db31a7158546bb447925'}\n\n```{.r .cell-code}\nsummary(trees_model)$r.squared\n#> [1] 0.94795\n```\n:::\n\n\nSimilarly, for the adjusted $R^2$:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-14_39b89051fa403289ab9a48d9960cd1ed'}\n\n```{.r .cell-code}\nsummary(trees_model)$adj.r.squared\n#> [1] 0.9442322\n```\n:::\n\n\nUnfortunately, for many model objects, the extraction of model-level information is largely a manual process. You will likely need to build a `tibble()` element-by-element by subsetting the `summary()` object repeatedly. The `with()` function, however, can help make this process a bit less tedious by evaluating expressions inside of the `summary(trees_model)` environment. To grab those those same two model elements from above using `with()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-15_523943d96a851fb13ebcd46a651285c2'}\n\n```{.r .cell-code}\nwith(summary(trees_model),\n tibble::tibble(r.squared = r.squared,\n adj.r.squared = adj.r.squared))\n#> # A tibble: 1 × 2\n#> r.squared adj.r.squared\n#> \n#> 1 0.948 0.944\n```\n:::\n\n\nA reasonable `glance()` method for `lm()`, then, might look something like:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-16_562078fcbb69c81f75881559d1cd7b6a'}\n\n```{.r .cell-code}\nglance.lm <- function(x, ...) {\n with(\n summary(x),\n tibble::tibble(\n r.squared = r.squared,\n adj.r.squared = adj.r.squared,\n sigma = sigma,\n statistic = fstatistic[\"value\"],\n p.value = pf(\n fstatistic[\"value\"],\n fstatistic[\"numdf\"],\n fstatistic[\"dendf\"],\n lower.tail = FALSE\n ),\n df = fstatistic[\"numdf\"],\n logLik = as.numeric(stats::logLik(x)),\n AIC = stats::AIC(x),\n BIC = stats::BIC(x),\n deviance = stats::deviance(x),\n df.residual = df.residual(x),\n nobs = stats::nobs(x)\n )\n )\n}\n```\n:::\n\n\n::: {.callout-note}\nThis is the actual definition of `glance.lm()` provided by broom! \n:::\n\nSome things to keep in mind while writing `glance()` methods:\n* Output should not include the name of the modeling function or any arguments given to the modeling function.\n* In some cases, you may wish to provide model-level diagnostics not returned by the original object. For example, the above `glance.lm()` calculates `AIC` and `BIC` from the model fit. If these are easy to compute, feel free to add them. However, tidier methods are generally not an appropriate place to implement complex or time consuming calculations.\n* The `glance` method should always return the same columns in the same order when given an object of a given model class. If a summary metric (such as `AIC`) is not defined in certain circumstances, use `NA`.\n\n### Implementing the `augment()` method\n\n`augment()` methods add columns to a dataset containing information such as fitted values, residuals or cluster assignments. All columns added to a dataset have a `.` prefix to prevent existing columns from being overwritten. (Currently acceptable column names are given in [the glossary](#glossary).) The `x` and `...` arguments share their meaning with the two functions described above. `augment` methods also optionally accept a `data` argument that is a `data.frame` (or `tibble`) to add observation-level information to, returning a `tibble` object with the same number of rows as `data`. Many `augment()` methods also accept a `newdata` argument, following the same conventions as the `data` argument, except with the underlying assumption that the model has not \"seen\" the data yet. As a result, `newdata` arguments need not contain the response columns in `data`. Only one of `data` or `newdata` should be supplied. A full glossary of acceptable arguments to `augment()` methods can be found at [the end of this article](#glossary).\n\nIf a `data` argument is not specified, `augment()` should try to reconstruct the original data as much as possible from the model object. This may not always be possible, and often it will not be possible to recover columns not used by the model.\n\nWith this is mind, we can look back to our `trees_model` example. For one, the `model` element inside of the `trees_model` object will allow us to recover the original data:\n\n\n::: {.cell layout-align=\"center\" rows.print='5' hash='cache/unnamed-chunk-17_7c6e8b627700bd9f5e2a735a3d928bc6'}\n\n```{.r .cell-code}\ntrees_model$model\n#> Volume Girth Height\n#> 1 10.3 8.3 70\n#> 2 10.3 8.6 65\n#> 3 10.2 8.8 63\n#> 4 16.4 10.5 72\n#> 5 18.8 10.7 81\n#> 6 19.7 10.8 83\n#> 7 15.6 11.0 66\n#> 8 18.2 11.0 75\n#> 9 22.6 11.1 80\n#> 10 19.9 11.2 75\n#> 11 24.2 11.3 79\n#> 12 21.0 11.4 76\n#> 13 21.4 11.4 76\n#> 14 21.3 11.7 69\n#> 15 19.1 12.0 75\n#> 16 22.2 12.9 74\n#> 17 33.8 12.9 85\n#> 18 27.4 13.3 86\n#> 19 25.7 13.7 71\n#> 20 24.9 13.8 64\n#> 21 34.5 14.0 78\n#> 22 31.7 14.2 80\n#> 23 36.3 14.5 74\n#> 24 38.3 16.0 72\n#> 25 42.6 16.3 77\n#> 26 55.4 17.3 81\n#> 27 55.7 17.5 82\n#> 28 58.3 17.9 80\n#> 29 51.5 18.0 80\n#> 30 51.0 18.0 80\n#> 31 77.0 20.6 87\n```\n:::\n\n\nSimilarly, the fitted values and residuals can be accessed with the following code:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-18_54bcee9723318351916df71231a948e6'}\n\n```{.r .cell-code}\nhead(trees_model$fitted.values)\n#> 1 2 3 4 5 6 \n#> 4.837660 4.553852 4.816981 15.874115 19.869008 21.018327\nhead(trees_model$residuals)\n#> 1 2 3 4 5 6 \n#> 5.4623403 5.7461484 5.3830187 0.5258848 -1.0690084 -1.3183270\n```\n:::\n\n\nAs with `glance()` methods, it's fine (and encouraged!) to include common metrics associated with observations if they are not computationally intensive to compute. A common metric associated with linear models, for example, is the standard error of fitted values:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-19_c1e56b6e58b3dfef31c1fd4f075b0be6'}\n\n```{.r .cell-code}\nse.fit <- predict(trees_model, newdata = trees, se.fit = TRUE)$se.fit %>%\n unname()\n\nhead(se.fit)\n#> [1] 1.3211285 1.4893775 1.6325024 0.9444212 1.3484251 1.5319772\n```\n:::\n\n\nThus, a reasonable `augment()` method for `lm` might look something like this:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-20_1099546007cdf74e838cc6ce7ff9bc2a'}\n\n```{.r .cell-code}\naugment.lm <- function(x, data = x$model, newdata = NULL, ...) {\n if (is.null(newdata)) {\n dplyr::bind_cols(tibble::as_tibble(data),\n tibble::tibble(.fitted = x$fitted.values,\n .se.fit = predict(x, \n newdata = data, \n se.fit = TRUE)$se.fit,\n .resid = x$residuals))\n } else {\n predictions <- predict(x, newdata = newdata, se.fit = TRUE)\n dplyr::bind_cols(tibble::as_tibble(newdata),\n tibble::tibble(.fitted = predictions$fit,\n .se.fit = predictions$se.fit))\n }\n}\n```\n:::\n\n\nSome other things to keep in mind while writing `augment()` methods:\n* The `newdata` argument should default to `NULL`. Users should only ever specify one of `data` or `newdata`. Providing both `data` and `newdata` should result in an error. The `newdata` argument should accept both `data.frame`s and `tibble`s.\n* Data given to the `data` argument must have both the original predictors and the original response. Data given to the `newdata` argument only needs to have the original predictors. This is important because there may be important information associated with training data that is not associated with test data. This means that the `original_data` object in `augment(model, data = original_data)` should provide `.fitted` and `.resid` columns (in most cases), whereas `test_data` in `augment(model, data = test_data)` only needs a `.fitted` column, even if the response is present in `test_data`.\n* If the `data` or `newdata` is specified as a `data.frame` with rownames, `augment` should return them in a column called `.rownames`.\n* For observations where no fitted values or summaries are available (where there's missing data, for example), return `NA`.\n* *The `augment()` method should always return as many rows as were in `data` or `newdata`*, depending on which is supplied\n\n::: {.callout-note}\nThe recommended interface and functionality for `augment()` methods may change soon. \n:::\n\n## Document the new methods\n\nThe only remaining step is to integrate the new methods into the parent package! To do so, just drop the methods into a `.R` file inside of the `/R` folder and document them using roxygen2. If you're unfamiliar with the process of documenting objects, you can read more about it [here](http://r-pkgs.had.co.nz/man.html). Here's an example of how our `tidy.lm()` method might be documented:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-21_bed5f753c0ac47c186d0574f61f2942a'}\n\n```{.r .cell-code}\n#' Tidy a(n) lm object\n#'\n#' @param x A `lm` object.\n#' @param conf.int Logical indicating whether or not to include \n#' a confidence interval in the tidied output. Defaults to FALSE.\n#' @param conf.level The confidence level to use for the confidence \n#' interval if conf.int = TRUE. Must be strictly greater than 0 \n#' and less than 1. Defaults to 0.95, which corresponds to a \n#' 95 percent confidence interval.\n#' @param ... Unused, included for generic consistency only.\n#' @return A tidy [tibble::tibble()] summarizing component-level\n#' information about the model\n#'\n#' @examples\n#' # load the trees dataset\n#' data(trees)\n#' \n#' # fit a linear model on timber volume\n#' trees_model <- lm(Volume ~ Girth + Height, data = trees)\n#'\n#' # summarize model coefficients in a tidy tibble!\n#' tidy(trees_model)\n#'\n#' @export\ntidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {\n\n # ... the rest of the function definition goes here!\n```\n:::\n\n\nOnce you've documented each of your new methods and executed `devtools::document()`, you're done! Congrats on implementing your own broom tidier methods for a new model object!\n\n## Glossaries\n\n\n\n\n\n\n### Arguments\n\nTidier methods have a standardized set of acceptable argument and output column names. The currently acceptable argument names by tidier method are:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-23_d64cbb8b19e213d52d5c8c97634481a4'}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n\n### Column Names\n\nThe currently acceptable column names by tidier method are:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-24_53d73b4cb01f15e416ba42c248920e7d'}\n::: {.cell-output-display}\n```{=html}\n
\n\n```\n:::\n:::\n\n\nThe [alexpghayes/modeltests](https://github.com/alexpghayes/modeltests) package provides unit testing infrastructure to check your new tidier methods. Please file an issue there to request new arguments/columns to be added to the glossaries!\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> generics * 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": { + "include-in-header": [ + "\n\n\n\n\n\n\n\n\n\n\n\n\n" + ] + }, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/develop/metrics/index/execute-results/html.json b/_freeze/learn/develop/metrics/index/execute-results/html.json new file mode 100644 index 00000000..a1c16cb1 --- /dev/null +++ b/_freeze/learn/develop/metrics/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "a3c651685d7a717af97a8b934c8e982f", + "result": { + "markdown": "---\ntitle: \"Custom performance metrics\"\ncategories:\n - developer tools\ntype: learn-subsection\nweight: 3\ndescription: | \n Create a new performance metric and integrate it with yardstick functions.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: rlang and tidymodels.\n\nThe [yardstick](https://yardstick.tidymodels.org/) package already includes a large number of metrics, but there's obviously a chance that you might have a custom metric that hasn't been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics.\n\nWhy create custom metrics? With the infrastructure yardstick provides, you get:\n\n- Standardization between your metric and other preexisting metrics\n- Automatic error handling for types and lengths\n- Automatic selection of binary / multiclass metric implementations\n- Automatic `NA` handling\n- Support for grouped data frames\n- Support for use alongside other metrics in `metric_set()`\n\nThe implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of `roc_auc()` after reading this vignette if you want to work on a class probability metric.\n\n## Numeric example: MSE\n\nMean squared error (sometimes MSE or from here on, `mse()`) is a numeric metric that measures the average of the squared errors. Numeric metrics are generally the simplest to create with yardstick, as they do not have multiclass implementations. The formula for `mse()` is:\n\n$$ MSE = \\frac{1}{N} \\sum_{i=1}^{N} (truth_i - estimate_i) ^ 2 = mean( (truth - estimate) ^ 2) $$\n\nAll metrics should have a data frame version, and a vector version. The data frame version here will be named `mse()`, and the vector version will be `mse_vec()`.\n\n### Vector implementation\n\nTo start, create the vector version. Generally, all metrics have the same arguments unless the metric requires an extra parameter (such as `beta` in `f_meas()`). To create the vector function, you need to do two things:\n\n1) Create an internal implementation function, `mse_impl()`.\n2) Pass on that implementation function to `metric_vec_template()`.\n\nBelow, `mse_impl()` contains the actual implementation of the metric, and takes `truth` and `estimate` as arguments along with any metric specific arguments.\n\nThe yardstick function `metric_vec_template()` accepts the implementation function along with the other arguments to `mse_vec()` and actually executes `mse_impl()`. Additionally, it has a `cls` argument to specify the allowed class type of `truth` and `estimate`. If the classes are the same, a single character class can be passed, and if they are different a character vector of length 2 can be supplied.\n\nThe `metric_vec_template()` helper handles the removal of `NA` values in your metric, so your implementation function does not have to worry about them. It performs type checking using `cls` and also checks that the `estimator` is valid, the second of which is covered in the classification example. This way, all you have to worry about is the core implementation.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_40aa10f80ee1b7ea2aa7f36a779d70b1'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\n\nmse_vec <- function(truth, estimate, na_rm = TRUE, ...) {\n \n mse_impl <- function(truth, estimate) {\n mean((truth - estimate) ^ 2)\n }\n \n metric_vec_template(\n metric_impl = mse_impl,\n truth = truth, \n estimate = estimate,\n na_rm = na_rm,\n cls = \"numeric\",\n ...\n )\n \n}\n```\n:::\n\n\nAt this point, you've created the vector version of the mean squared error metric.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-4_d550e4bdba84740582e612ff889e8142'}\n\n```{.r .cell-code}\ndata(\"solubility_test\")\n\nmse_vec(\n truth = solubility_test$solubility, \n estimate = solubility_test$prediction\n)\n#> Warning: `metric_vec_template()` was deprecated in yardstick 1.2.0.\n#> ℹ Please use `check_numeric_metric()`, `check_class_metric()`,\n#> `check_class_metric()`, `yardstick_remove_missing()`, and\n#> `yardstick_any_missing()` instead.\n#> [1] 0.5214438\n```\n:::\n\n\nIntelligent error handling is immediately available.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-5_4bf3c418c6588cb409c4d210eb940ce6'}\n\n```{.r .cell-code}\nmse_vec(truth = \"apple\", estimate = 1)\n#> Error in `validate_class()`:\n#> ! `truth` should be a numeric but a character was supplied.\n\nmse_vec(truth = 1, estimate = factor(\"xyz\"))\n#> Error in `validate_class()`:\n#> ! `estimate` should be a numeric but a factor was supplied.\n```\n:::\n\n\n`NA` values are removed if `na_rm = TRUE` (the default). If `na_rm = FALSE` and any `NA` values are detected, then the metric automatically returns `NA`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-6_78a790915bfde0b439d9d0662a01e6ec'}\n\n```{.r .cell-code}\n# NA values removed\nmse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5))\n#> [1] 0.01\n\n# NA returned\nmse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5), na_rm = FALSE)\n#> [1] NA\n```\n:::\n\n\n### Data frame implementation\n\nThe data frame version of the metric should be fairly simple. It is a generic function with a `data.frame` method that calls the yardstick helper, `metric_summarizer()`, and passes along the `mse_vec()` function to it along with versions of `truth` and `estimate` that have been wrapped in `rlang::enquo()` and then unquoted with `!!` so that non-standard evaluation can be supported.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-7_c6db20e9f927bb6bb347eb57583aa6a6'}\n\n```{.r .cell-code}\nlibrary(rlang)\n\nmse <- function(data, ...) {\n UseMethod(\"mse\")\n}\n\nmse <- new_numeric_metric(mse, direction = \"minimize\")\n\nmse.data.frame <- function(data, truth, estimate, na_rm = TRUE, ...) {\n \n metric_summarizer(\n metric_nm = \"mse\",\n metric_fn = mse_vec,\n data = data,\n truth = !! enquo(truth),\n estimate = !! enquo(estimate), \n na_rm = na_rm,\n ...\n )\n \n}\n```\n:::\n\n\nAnd that's it. The yardstick package handles the rest with an internal call to `summarise()`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-8_311f848a765d18c3ed89af83e8c36fbd'}\n\n```{.r .cell-code}\nmse(solubility_test, truth = solubility, estimate = prediction)\n\n# Error handling\nmse(solubility_test, truth = solubility, estimate = factor(\"xyz\"))\n```\n:::\n\n\nLet's test it out on a grouped data frame.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-9_8826c75b88bc25aea3d6ec123b1ad1ef'}\n\n```{.r .cell-code}\nlibrary(dplyr)\n\nset.seed(1234)\nsize <- 100\ntimes <- 10\n\n# create 10 resamples\nsolubility_resampled <- bind_rows(\n replicate(\n n = times,\n expr = sample_n(solubility_test, size, replace = TRUE),\n simplify = FALSE\n ),\n .id = \"resample\"\n)\n\nsolubility_resampled %>%\n group_by(resample) %>%\n mse(solubility, prediction)\n#> Warning: `metric_summarizer()` was deprecated in yardstick 1.2.0.\n#> ℹ Please use `numeric_metric_summarizer()`,\n#> `class_metric_summarizer()`, `prob_metric_summarizer()`, or\n#> `curve_metric_summarizer()` instead.\n#> # A tibble: 10 × 4\n#> resample .metric .estimator .estimate\n#> \n#> 1 1 mse standard 0.512\n#> 2 10 mse standard 0.454\n#> 3 2 mse standard 0.513\n#> 4 3 mse standard 0.414\n#> 5 4 mse standard 0.543\n#> 6 5 mse standard 0.456\n#> 7 6 mse standard 0.652\n#> 8 7 mse standard 0.642\n#> 9 8 mse standard 0.404\n#> 10 9 mse standard 0.479\n```\n:::\n\n\n## Class example: miss rate\n\nMiss rate is another name for the false negative rate, and is a classification metric in the same family as `sens()` and `spec()`. It follows the formula:\n\n$$ miss\\_rate = \\frac{FN}{FN + TP} $$\n\nThis metric, like other classification metrics, is more easily computed when expressed as a confusion matrix. As you will see in the example, you can achieve this with a call to `base::table(estimate, truth)` which correctly puts the \"correct\" result in the columns of the confusion matrix.\n\nClassification metrics are more complicated than numeric ones because you have to think about extensions to the multiclass case. For now, let's start with the binary case.\n\n### Vector implementation\n\nThe vector implementation for classification metrics initially has the same setup as numeric metrics, but has an additional argument, `estimator` that determines the type of estimator to use (binary or some kind of multiclass implementation or averaging). This argument is auto-selected for the user, so default it to `NULL`. Additionally, pass it along to `metric_vec_template()` so that it can check the provided `estimator` against the classes of `truth` and `estimate` to see if they are allowed.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-10_e2a3d155fd663cc3e0b690df2b91eaea'}\n\n```{.r .cell-code}\n# Logic for `event_level`\nevent_col <- function(xtab, event_level) {\n if (identical(event_level, \"first\")) {\n colnames(xtab)[[1]]\n } else {\n colnames(xtab)[[2]]\n }\n}\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n estimator <- finalize_estimator(truth, estimator)\n \n miss_rate_impl <- function(truth, estimate) {\n # Create \n xtab <- table(estimate, truth)\n col <- event_col(xtab, event_level)\n col2 <- setdiff(colnames(xtab), col)\n \n tp <- xtab[col, col]\n fn <- xtab[col2, col]\n \n fn / (fn + tp)\n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n```\n:::\n\n\nAnother change from the numeric metric is that a call to `finalize_estimator()` is made. This is the infrastructure that auto-selects the type of estimator to use.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-11_c182ce73ca2334a963cd46213c8ce1ce'}\n\n```{.r .cell-code}\ndata(\"two_class_example\")\nmiss_rate_vec(two_class_example$truth, two_class_example$predicted)\n#> [1] 0.120155\n```\n:::\n\n\nWhat happens if you try and pass in a multiclass result?\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-12_8f6e15521b8c0445c0d66710d116a6c0'}\n\n```{.r .cell-code}\ndata(\"hpc_cv\")\nfold1 <- filter(hpc_cv, Resample == \"Fold01\")\nmiss_rate_vec(fold1$obs, fold1$pred)\n#> F M L \n#> 0.06214689 0.00000000 0.00000000\n```\n:::\n\n\nThis isn't great, as currently multiclass `miss_rate()` isn't supported and it would have been better to throw an error if the `estimator` was not `\"binary\"`. Currently, `finalize_estimator()` uses its default implementation which selected `\"macro\"` as the `estimator` since `truth` was a factor with more than 2 classes. When we implement multiclass averaging, this is what you want, but if your metric only works with a binary implementation (or has other specialized multiclass versions), you might want to guard against this.\n\nTo fix this, a generic counterpart to `finalize_estimator()`, called `finalize_estimator_internal()`, exists that helps you restrict the input types. If you provide a method to `finalize_estimator_internal()` where the method name is the same as your metric name, and then set the `metric_class` argument in `finalize_estimator()` to be the same thing, you can control how the auto-selection of the `estimator` is handled.\n\nDon't worry about the `metric_dispatcher` argument. This is handled for you and just exists as a dummy argument to dispatch off of.\n\nIt is also good practice to call `validate_estimator()` which handles the case where a user passed in the estimator themselves. This validates that the supplied `estimator` is one of the allowed types and error otherwise.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-13_8f33ceab6c955ce1dbb3f84db769794d'}\n\n```{.r .cell-code}\nfinalize_estimator_internal.miss_rate <- function(metric_dispatcher, x, estimator) {\n \n validate_estimator(estimator, estimator_override = \"binary\")\n if (!is.null(estimator)) {\n return(estimator)\n }\n \n lvls <- levels(x)\n if (length(lvls) > 2) {\n stop(\"A multiclass `truth` input was provided, but only `binary` is supported.\")\n } \n \"binary\"\n}\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n # calls finalize_estimator_internal() internally\n estimator <- finalize_estimator(truth, estimator, metric_class = \"miss_rate\")\n \n miss_rate_impl <- function(truth, estimate) {\n # Create \n xtab <- table(estimate, truth)\n col <- event_col(xtab, event_level)\n col2 <- setdiff(colnames(xtab), col)\n \n tp <- xtab[col, col]\n fn <- xtab[col2, col]\n \n fn / (fn + tp)\n \n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\n# Error thrown by our custom handler\n# miss_rate_vec(fold1$obs, fold1$pred)\n\n# Error thrown by validate_estimator()\n# miss_rate_vec(fold1$obs, fold1$pred, estimator = \"macro\")\n```\n:::\n\n\n### Supporting multiclass miss rate\n\nLike many other classification metrics such as `precision()` or `recall()`, miss rate does not have a natural multiclass extension, but one can be created using methods such as macro, weighted macro, and micro averaging. If you have not, I encourage you to read `vignette(\"multiclass\", \"yardstick\")` for more information about how these methods work.\n\nGenerally, they require more effort to get right than the binary case, especially if you want to have a performant version. Luckily, a somewhat standard template is used in yardstick and can be used here as well.\n\nLet's first remove the \"binary\" restriction we created earlier.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-14_642ada2cc50c512d049f5ffd21c05624'}\n\n```{.r .cell-code}\nrm(finalize_estimator_internal.miss_rate)\n```\n:::\n\n\nThe main changes below are:\n\n- The binary implementation is moved to `miss_rate_binary()`.\n\n- `miss_rate_estimator_impl()` is a helper function for switching between binary and multiclass implementations. It also applies the weighting required for multiclass estimators. It is called from `miss_rate_impl()` and also accepts the `estimator` argument using R's function scoping rules.\n\n- `miss_rate_multiclass()` provides the implementation for the multiclass case. It calculates the true positive and false negative values as vectors with one value per class. For the macro case, it returns a vector of miss rate calculations, and for micro, it first sums the individual pieces and returns a single miss rate calculation. In the macro case, the vector is then weighted appropriately in `miss_rate_estimator_impl()` depending on whether or not it was macro or weighted macro.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-15_1a95d3d6c9ebb361abbe13a1dd7dac70'}\n\n```{.r .cell-code}\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n # calls finalize_estimator_internal() internally\n estimator <- finalize_estimator(truth, estimator, metric_class = \"miss_rate\")\n \n miss_rate_impl <- function(truth, estimate) {\n xtab <- table(estimate, truth)\n # Rather than implement the actual method here, we rely on\n # an *_estimator_impl() function that can handle binary\n # and multiclass cases\n miss_rate_estimator_impl(xtab, estimator, event_level)\n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\n\n# This function switches between binary and multiclass implementations\nmiss_rate_estimator_impl <- function(data, estimator, event_level) {\n if(estimator == \"binary\") {\n miss_rate_binary(data, event_level)\n } else {\n # Encapsulates the macro, macro weighted, and micro cases\n wt <- get_weights(data, estimator)\n res <- miss_rate_multiclass(data, estimator)\n weighted.mean(res, wt)\n }\n}\n\n\nmiss_rate_binary <- function(data, event_level) {\n col <- event_col(data, event_level)\n col2 <- setdiff(colnames(data), col)\n \n tp <- data[col, col]\n fn <- data[col2, col]\n \n fn / (fn + tp)\n}\n\nmiss_rate_multiclass <- function(data, estimator) {\n \n # We need tp and fn for all classes individually\n # we can get this by taking advantage of the fact\n # that tp + fn = colSums(data)\n tp <- diag(data)\n tpfn <- colSums(data)\n fn <- tpfn - tp\n \n # If using a micro estimator, we sum the individual\n # pieces before performing the miss rate calculation\n if (estimator == \"micro\") {\n tp <- sum(tp)\n fn <- sum(fn)\n }\n \n # return the vector \n tp / (tp + fn)\n}\n```\n:::\n\n\nFor the macro case, this separation of weighting from the core implementation might seem strange, but there is good reason for it. Some metrics are combinations of other metrics, and it is nice to be able to reuse code when calculating more complex metrics. For example, `f_meas()` is a combination of `recall()` and `precision()`. When calculating a macro averaged `f_meas()`, the weighting must be applied 1 time, at the very end of the calculation. `recall_multiclass()` and `precision_multiclass()` are defined similarly to how `miss_rate_multiclass()` is defined and returns the unweighted vector of calculations. This means we can directly use this in `f_meas()`, and then weight everything once at the end of that calculation.\n\nLet's try it out now:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-16_7e8336cb6ff907f90fb914111e7bf279'}\n\n```{.r .cell-code}\n# two class\nmiss_rate_vec(two_class_example$truth, two_class_example$predicted)\n#> [1] 0.120155\n\n# multiclass\nmiss_rate_vec(fold1$obs, fold1$pred)\n#> [1] 0.5483506\n```\n:::\n\n\n#### Data frame implementation\n\nLuckily, the data frame implementation is as simple as the numeric case, we just need to add an extra `estimator` argument and pass that through.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-17_741d329e41fb40b8d8a02cab3e2cc4dc'}\n\n```{.r .cell-code}\nmiss_rate <- function(data, ...) {\n UseMethod(\"miss_rate\")\n}\n\nmiss_rate <- new_class_metric(miss_rate, direction = \"minimize\")\n\nmiss_rate.data.frame <- function(data, \n truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n metric_summarizer(\n metric_nm = \"miss_rate\",\n metric_fn = miss_rate_vec,\n data = data,\n truth = !! enquo(truth),\n estimate = !! enquo(estimate), \n estimator = estimator,\n na_rm = na_rm,\n event_level = event_level,\n ...\n )\n}\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-18_37dddc8d1f30e4e126853d7430c0f8c3'}\n\n```{.r .cell-code}\n# Macro weighted automatically selected\nfold1 %>%\n miss_rate(obs, pred)\n\n# Switch to micro\nfold1 %>%\n miss_rate(obs, pred, estimator = \"micro\")\n\n# Macro weighted by resample\nhpc_cv %>%\n group_by(Resample) %>%\n miss_rate(obs, pred, estimator = \"macro_weighted\")\n\n# Error handling\nmiss_rate(hpc_cv, obs, VF)\n```\n:::\n\n\n## Using custom metrics\n\nThe `metric_set()` function validates that all metric functions are of the same metric type by checking the class of the function. If any metrics are not of the right class, `metric_set()` fails. By using `new_numeric_metric()` and `new_class_metric()` in the above custom metrics, they work out of the box without any additional adjustments.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-19_8332e2c7eb70c5408f4edb0449a94e4d'}\n\n```{.r .cell-code}\nnumeric_mets <- metric_set(mse, rmse)\n\nnumeric_mets(solubility_test, solubility, prediction)\n#> # A tibble: 2 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 mse standard 0.521\n#> 2 rmse standard 0.722\n```\n:::\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_b0adcc0cbf400d613cf82d9561b6fba7'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang * 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/develop/models/index/execute-results/html.json b/_freeze/learn/develop/models/index/execute-results/html.json new file mode 100644 index 00000000..895bc259 --- /dev/null +++ b/_freeze/learn/develop/models/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "332a3e44e6f187f8c13419c9768f7383", + "result": { + "markdown": "---\ntitle: \"How to build a parsnip model\"\ncategories:\n - developer tools\ntype: learn-subsection\nweight: 2\ndescription: | \n Create a parsnip model function from an existing model implementation.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: mda, modeldata, and tidymodels.\n\nThe parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this:\n\n * It eliminates a lot of duplicate code.\n * Since the expressions are not evaluated until fitting, it eliminates many package dependencies.\n\nA parsnip model function is itself very general. For example, the `logistic_reg()` function itself doesn't have any model code within it. Instead, each model function is associated with one or more computational _engines_. These might be different R packages or some function in another language (that can be evaluated by R). \n\nThis article describes the process of creating a new model function. Before proceeding, take a minute and read our [guidelines on creating modeling packages](https://tidymodels.github.io/model-implementation-principles/) to understand the general themes and conventions that we use. \n\n## An example model\n\nAs an example, we'll create a function for _mixture discriminant analysis_. There are [a few packages](http://search.r-project.org/cgi-bin/namazu.cgi?query=%22mixture+discriminant%22&max=100&result=normal&sort=score&idxname=functions) that implement this but we'll focus on `mda::mda`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/mda-str_2507ce72a9bea74f5a66c14f1b9a6f0c'}\n\n```{.r .cell-code}\nstr(mda::mda)\n#> function (formula = formula(data), data = sys.frame(sys.parent()), subclasses = 3, \n#> sub.df = NULL, tot.df = NULL, dimension = sum(subclasses) - 1, eps = 100 * \n#> .Machine$double.eps, iter = 5, weights = mda.start(x, g, subclasses, \n#> trace, ...), method = polyreg, keep.fitted = (n * dimension < 5000), \n#> trace = FALSE, ...)\n```\n:::\n\n\nThe main hyperparameter is the number of subclasses. We'll name our function `discrim_mixture`. \n\n## Aspects of models\n\nBefore proceeding, it helps to to review how parsnip categorizes models:\n\n* The model _type_ is related to the structural aspect of the model. For example, the model type `linear_reg` represents linear models (slopes and intercepts) that model a numeric outcome. Other model types in the package are `nearest_neighbor`, `decision_tree`, and so on. \n\n* Within a model type is the _mode_, related to the modeling goal. Currently the two modes in the package are regression and classification. Some models have methods for both models (e.g. nearest neighbors) while others have only a single mode (e.g. logistic regression). \n\n* The computation _engine_ is a combination of the estimation method and the implementation. For example, for linear regression, one engine is `\"lm\"` which uses ordinary least squares analysis via the `lm()` function. Another engine is `\"stan\"` which uses the Stan infrastructure to estimate parameters using Bayes rule. \n\nWhen adding a model into parsnip, the user has to specify which modes and engines are used. The package also enables users to add a new mode or engine to an existing model. \n\n## The general process\n\nThe parsnip package stores information about the models in an internal environment object. The environment can be accessed via the function `get_model_env()`. The package includes a variety of functions that can get or set the different aspects of the models. \n\nIf you are adding a new model from your own package, you can use these functions to add new entries into the model environment. \n\n### Step 1. Register the model, modes, and arguments\n\nWe will add the MDA model using the model type `discrim_mixture`. Since this is a classification method, we only have to register a single mode:\n\n\n::: {.cell layout-align=\"center\" hash='cache/mda-reg_813c25564c4902aaf7134053970a9517'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\nset_new_model(\"discrim_mixture\")\nset_model_mode(model = \"discrim_mixture\", mode = \"classification\")\nset_model_engine(\n \"discrim_mixture\", \n mode = \"classification\", \n eng = \"mda\"\n)\nset_dependency(\"discrim_mixture\", eng = \"mda\", pkg = \"mda\")\n```\n:::\n\n\nThese functions should silently finish. There is also a function that can be used to show what aspects of the model have been added to parsnip: \n\n\n::: {.cell layout-align=\"center\" hash='cache/mda-show-1_9396d1b849a4beb0cdfd6d51aa181c18'}\n\n```{.r .cell-code}\nshow_model_info(\"discrim_mixture\")\n#> Information for `discrim_mixture`\n#> modes: unknown, classification \n#> \n#> engines: \n#> classification: mdaNA\n#> \n#> ¹The model can use case weights.\n#> \n#> no registered arguments.\n#> \n#> no registered fit modules.\n#> \n#> no registered prediction modules.\n```\n:::\n\n\nThe next step would be to declare the main arguments to the model. These are declared independent of the mode. To specify the argument, there are a few slots to fill in:\n\n * The name that parsnip uses for the argument. In general, we try to use non-jargony names for arguments (e.g. \"penalty\" instead of \"lambda\" for regularized regression). We recommend consulting [the model argument table available here](/find/parsnip/) to see if an existing argument name can be used before creating a new one. \n \n * The argument name that is used by the underlying modeling function. \n \n * A function reference for a _constructor_ that will be used to generate tuning parameter values. This should be a character vector with a named element called `fun` that is the constructor function. There is an optional element `pkg` that can be used to call the function using its namespace. If referencing functions from the dials package, quantitative parameters can have additional arguments in the list for `trans` and `range` while qualitative parameters can pass `values` via this list. \n \n * A logical value for whether the argument can be used to generate multiple predictions for a single R object. For example, for boosted trees, if a model is fit with 10 boosting iterations, many modeling packages allow the model object to make predictions for any iterations less than the one used to fit the model. In general this is not the case so one would use `has_submodels = FALSE`. \n \nFor `mda::mda()`, the main tuning parameter is `subclasses` which we will rewrite as `sub_classes`. \n\n\n::: {.cell layout-align=\"center\" hash='cache/mda-args_b8661eaf9c0755d84dc9208615fbd80f'}\n\n```{.r .cell-code}\nset_model_arg(\n model = \"discrim_mixture\",\n eng = \"mda\",\n parsnip = \"sub_classes\",\n original = \"subclasses\",\n func = list(pkg = \"foo\", fun = \"bar\"),\n has_submodel = FALSE\n)\nshow_model_info(\"discrim_mixture\")\n#> Information for `discrim_mixture`\n#> modes: unknown, classification \n#> \n#> engines: \n#> classification: mdaNA\n#> \n#> ¹The model can use case weights.\n#> \n#> arguments: \n#> mda: \n#> sub_classes --> subclasses\n#> \n#> no registered fit modules.\n#> \n#> no registered prediction modules.\n```\n:::\n\n\n### Step 2. Create the model function\n\nThis is a fairly simple function that can follow a basic template. The main arguments to our function will be:\n\n * The mode. If the model can do more than one mode, you might default this to \"unknown\". In our case, since it is only a classification model, it makes sense to default it to that mode so that the users won't have to specify it. \n \n * The argument names (`sub_classes` here). These should be defaulted to `NULL`.\n\nA basic version of the function is:\n\n\n::: {.cell layout-align=\"center\" hash='cache/model-fun_8731a20c3f2b9dfc4d820df0de753fde'}\n\n```{.r .cell-code}\ndiscrim_mixture <-\n function(mode = \"classification\", sub_classes = NULL) {\n # Check for correct mode\n if (mode != \"classification\") {\n rlang::abort(\"`mode` should be 'classification'\")\n }\n \n # Capture the arguments in quosures\n args <- list(sub_classes = rlang::enquo(sub_classes))\n \n # Save some empty slots for future parts of the specification\n new_model_spec(\n \"discrim_mixture\",\n args = args,\n eng_args = NULL,\n mode = mode,\n method = NULL,\n engine = NULL\n )\n }\n```\n:::\n\n\nThis is pretty simple since the data are not exposed to this function. \n\n::: {.callout-warning}\n We strongly suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former return better traceback results and have safer defaults for handling call objects. \n:::\n\n### Step 3. Add a fit module\n\nNow that parsnip knows about the model, mode, and engine, we can give it the information on fitting the model for our engine. The information needed to fit the model is contained in another list. The elements are:\n\n * `interface` is a single character value that could be \"formula\", \"data.frame\", or \"matrix\". This defines the type of interface used by the underlying fit function (`mda::mda`, in this case). This helps the translation of the data to be in an appropriate format for the that function. \n \n * `protect` is an optional list of function arguments that **should not be changeable** by the user. In this case, we probably don't want users to pass data values to these arguments (until the `fit()` function is called).\n \n * `func` is the package and name of the function that will be called. If you are using a locally defined function, only `fun` is required. \n \n * `defaults` is an optional list of arguments to the fit function that the user can change, but whose defaults can be set here. This isn't needed in this case, but is described later in this document.\n\nFor the first engine:\n\n\n::: {.cell layout-align=\"center\" hash='cache/fit-mod_cf52907288c4d4390ed78ff6c73ce98b'}\n\n```{.r .cell-code}\nset_fit(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n value = list(\n interface = \"formula\",\n protect = c(\"formula\", \"data\"),\n func = c(pkg = \"mda\", fun = \"mda\"),\n defaults = list()\n )\n)\n\nshow_model_info(\"discrim_mixture\")\n#> Information for `discrim_mixture`\n#> modes: unknown, classification \n#> \n#> engines: \n#> classification: mda\n#> \n#> ¹The model can use case weights.\n#> \n#> arguments: \n#> mda: \n#> sub_classes --> subclasses\n#> \n#> fit modules:\n#> engine mode\n#> mda classification\n#> \n#> no registered prediction modules.\n```\n:::\n\n\nWe also set up the information on how the predictors should be handled. These options ensure that the data that parsnip gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly.\n\n * `predictor_indicators` describes whether and how to create indicator/dummy variables from factor predictors. There are three options: `\"none\"` (do not expand factor predictors), `\"traditional\"` (apply the standard `model.matrix()` encodings), and `\"one_hot\"` (create the complete set including the baseline level for all factors). \n \n * `compute_intercept` controls whether `model.matrix()` should include the intercept in its formula. This affects more than the inclusion of an intercept column. With an intercept, `model.matrix()` computes dummy variables for all but one factor level. Without an intercept, `model.matrix()` computes a full set of indicators for the first factor variable, but an incomplete set for the remainder.\n \n * `remove_intercept` removes the intercept column *after* `model.matrix()` is finished. This can be useful if the model function (e.g. `lm()`) automatically generates an intercept.\n\n* `allow_sparse_x` specifies whether the model can accommodate a sparse representation for predictors during fitting and tuning.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-9_976d33fa6c8f55ff45f773c90c747280'}\n\n```{.r .cell-code}\nset_encoding(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n options = list(\n predictor_indicators = \"traditional\",\n compute_intercept = TRUE,\n remove_intercept = TRUE,\n allow_sparse_x = FALSE\n )\n)\n```\n:::\n\n\n\n### Step 4. Add modules for prediction\n\nSimilar to the fitting module, we specify the code for making different types of predictions. To make hard class predictions, the `class` object contains the details. The elements of the list are:\n\n * `pre` and `post` are optional functions that can preprocess the data being fed to the prediction code and to postprocess the raw output of the predictions. These won't be needed for this example, but a section below has examples of how these can be used when the model code is not easy to use. If the data being predicted has a simple type requirement, you can avoid using a `pre` function with the `args` below. \n * `func` is the prediction function (in the same format as above). In many cases, packages have a predict method for their model's class but this is typically not exported. In this case (and the example below), it is simple enough to make a generic call to `predict()` with no associated package. \n * `args` is a list of arguments to pass to the prediction function. These will most likely be wrapped in `rlang::expr()` so that they are not evaluated when defining the method. For mda, the code would be `predict(object, newdata, type = \"class\")`. What is actually given to the function is the parsnip model fit object, which includes a sub-object called `fit()` that houses the mda model object. If the data need to be a matrix or data frame, you could also use `newdata = quote(as.data.frame(newdata))` or similar. \n\nThe parsnip prediction code will expect the result to be an unnamed character string or factor. This will be coerced to a factor with the same levels as the original data. \n\nTo add this method to the model environment, a similar `set()` function is used:\n\n\n::: {.cell layout-align=\"center\" hash='cache/mds-class_5c5ee27330cfbd92a4c3a1ec35650c25'}\n\n```{.r .cell-code}\nclass_info <- \n list(\n pre = NULL,\n post = NULL,\n func = c(fun = \"predict\"),\n args =\n # These lists should be of the form:\n # {predict.mda argument name} = {values provided from parsnip objects}\n list(\n # We don't want the first two arguments evaluated right now\n # since they don't exist yet. `type` is a simple object that\n # doesn't need to have its evaluation deferred. \n object = quote(object$fit),\n newdata = quote(new_data),\n type = \"class\"\n )\n )\n\nset_pred(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n type = \"class\",\n value = class_info\n)\n```\n:::\n\n\nA similar call can be used to define the class probability module (if they can be computed). The format is identical to the `class` module but the output is expected to be a tibble with columns for each factor level. \n\nAs an example of the `post` function, the data frame created by `mda:::predict.mda()` will be converted to a tibble. The arguments are `x` (the raw results coming from the predict method) and `object` (the parsnip model fit object). The latter has a sub-object called `lvl` which is a character string of the outcome's factor levels (if any). \n\nWe register the probability module. There is a template function that makes this slightly easier to format the objects:\n\n\n::: {.cell layout-align=\"center\" hash='cache/mda-prob_3a0051a27dbbcd0cd670ead843f2b834'}\n\n```{.r .cell-code}\nprob_info <-\n pred_value_template(\n post = function(x, object) {\n tibble::as_tibble(x)\n },\n func = c(fun = \"predict\"),\n # Now everything else is put into the `args` slot\n object = quote(object$fit),\n newdata = quote(new_data),\n type = \"posterior\"\n )\n\nset_pred(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n type = \"prob\",\n value = prob_info\n)\n\nshow_model_info(\"discrim_mixture\")\n#> Information for `discrim_mixture`\n#> modes: unknown, classification \n#> \n#> engines: \n#> classification: mda\n#> \n#> ¹The model can use case weights.\n#> \n#> arguments: \n#> mda: \n#> sub_classes --> subclasses\n#> \n#> fit modules:\n#> engine mode\n#> mda classification\n#> \n#> prediction modules:\n#> mode engine methods\n#> classification mda class, prob\n```\n:::\n\n\nIf this model could be used for regression situations, we could also add a \"numeric\" module. For `pred`, the model requires an unnamed numeric vector output (usually). \n\nExamples are [here](https://github.com/tidymodels/parsnip/blob/master/R/linear_reg_data.R) and [here](https://github.com/tidymodels/parsnip/blob/master/R/rand_forest_data.R). \n\n\n### Does it work? \n\nAs a developer, one thing that may come in handy is the `translate()` function. This will tell you what the model's eventual syntax will be. \n\nFor example:\n\n\n::: {.cell layout-align=\"center\" hash='cache/mda-code_1554fa6e9a05b793b88344760c6dd217'}\n\n```{.r .cell-code}\ndiscrim_mixture(sub_classes = 2) %>%\n translate(engine = \"mda\")\n#> discrim mixture Model Specification (classification)\n#> \n#> Main Arguments:\n#> sub_classes = 2\n#> \n#> Computational engine: mda \n#> \n#> Model fit template:\n#> mda::mda(formula = missing_arg(), data = missing_arg(), subclasses = 2)\n```\n:::\n\n\nLet's try it on a data set from the modeldata package:\n\n\n::: {.cell layout-align=\"center\" hash='cache/mda-data_3c82cce5ac335857cac771447abc0954'}\n\n```{.r .cell-code}\ndata(\"two_class_dat\", package = \"modeldata\")\nset.seed(4622)\nexample_split <- initial_split(two_class_dat, prop = 0.99)\nexample_train <- training(example_split)\nexample_test <- testing(example_split)\n\nmda_spec <- discrim_mixture(sub_classes = 2) %>% \n set_engine(\"mda\")\n\nmda_fit <- mda_spec %>%\n fit(Class ~ ., data = example_train, engine = \"mda\")\nmda_fit\n#> parsnip model object\n#> \n#> Call:\n#> mda::mda(formula = Class ~ ., data = data, subclasses = ~2)\n#> \n#> Dimension: 2 \n#> \n#> Percent Between-Group Variance Explained:\n#> v1 v2 \n#> 82.63 100.00 \n#> \n#> Degrees of Freedom (per dimension): 3 \n#> \n#> Training Misclassification Error: 0.17241 ( N = 783 )\n#> \n#> Deviance: 671.391\n\npredict(mda_fit, new_data = example_test, type = \"prob\") %>%\n bind_cols(example_test %>% select(Class))\n#> # A tibble: 8 × 3\n#> .pred_Class1 .pred_Class2 Class \n#> \n#> 1 0.679 0.321 Class1\n#> 2 0.690 0.310 Class1\n#> 3 0.384 0.616 Class2\n#> 4 0.300 0.700 Class1\n#> 5 0.0262 0.974 Class2\n#> 6 0.405 0.595 Class2\n#> 7 0.793 0.207 Class1\n#> 8 0.0949 0.905 Class2\n\npredict(mda_fit, new_data = example_test) %>% \n bind_cols(example_test %>% select(Class))\n#> # A tibble: 8 × 2\n#> .pred_class Class \n#> \n#> 1 Class1 Class1\n#> 2 Class1 Class1\n#> 3 Class2 Class2\n#> 4 Class2 Class1\n#> 5 Class2 Class2\n#> 6 Class2 Class2\n#> 7 Class1 Class1\n#> 8 Class2 Class2\n```\n:::\n\n\n\n## Add an engine\n\nThe process for adding an engine to an existing model is _almost_ the same as building a new model but simpler with fewer steps. You only need to add the engine-specific aspects of the model. For example, if we wanted to fit a linear regression model using M-estimation, we could only add a new engine. The code for the `rlm()` function in MASS is pretty similar to `lm()`, so we can copy that code and change the package/function names:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rlm_1efd4fceb00e524a782846ca6bfcd5c0'}\n\n```{.r .cell-code}\nset_model_engine(\"linear_reg\", \"regression\", eng = \"rlm\")\nset_dependency(\"linear_reg\", eng = \"rlm\", pkg = \"MASS\")\n\nset_fit(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n value = list(\n interface = \"formula\",\n protect = c(\"formula\", \"data\", \"weights\"),\n func = c(pkg = \"MASS\", fun = \"rlm\"),\n defaults = list()\n )\n)\n\nset_encoding(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n options = list(\n predictor_indicators = \"traditional\",\n compute_intercept = TRUE,\n remove_intercept = TRUE,\n allow_sparse_x = FALSE\n )\n)\n\nset_pred(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n type = \"numeric\",\n value = list(\n pre = NULL,\n post = NULL,\n func = c(fun = \"predict\"),\n args =\n list(\n object = expr(object$fit),\n newdata = expr(new_data),\n type = \"response\"\n )\n )\n)\n\n# testing:\nlinear_reg() %>% \n set_engine(\"rlm\") %>% \n fit(mpg ~ ., data = mtcars)\n#> parsnip model object\n#> \n#> Call:\n#> rlm(formula = mpg ~ ., data = data)\n#> Converged in 8 iterations\n#> \n#> Coefficients:\n#> (Intercept) cyl disp hp drat wt \n#> 17.82250038 -0.27878615 0.01593890 -0.02536343 0.46391132 -4.14355431 \n#> qsec vs am gear carb \n#> 0.65307203 0.24975463 1.43412689 0.85943158 -0.01078897 \n#> \n#> Degrees of freedom: 32 total; 21 residual\n#> Scale estimate: 2.15\n```\n:::\n\n\n## Add parsnip models to another package\n\nThe process here is almost the same. All of the previous functions are still required but their execution is a little different. \n\nFor parsnip to register them, that package must already be loaded. For this reason, it makes sense to have parsnip in the \"Depends\" category. \n\nThe first difference is that the functions that define the model must be inside of a wrapper function that is called when your package is loaded. For our example here, this might look like: \n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-15_c86c7bad2cde1ced1e94b19098904ee1'}\n\n```{.r .cell-code}\nmake_discrim_mixture_mda <- function() {\n parsnip::set_new_model(\"discrim_mixture\")\n\n parsnip::set_model_mode(\"discrim_mixture\", \"classification\")\n\n # and so one...\n}\n```\n:::\n\n\nThis function is then executed when your package is loaded: \n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-16_12a6d5fdc03df118fde7c97baba4a5ed'}\n\n```{.r .cell-code}\n.onLoad <- function(libname, pkgname) {\n # This defines discrim_mixture in the model database\n make_discrim_mixture_mda()\n}\n```\n:::\n\n\nFor an example package that uses parsnip definitions, take a look at the [discrim](https://github.com/tidymodels/discrim) package.\n\n::: {.callout-warning}\n To use a new model and/or engine in the broader tidymodels infrastructure, we recommend your model definition declarations (e.g. `set_new_model()` and similar) reside in a package. If these definitions are in a script only, the new model may not work with the tune package, for example for parallel processing. \n:::\n\nIt is also important for parallel processing support to **list the home package as a dependency**. If the `discrim_mixture()` function lived in a package called `mixedup`, include the line:\n\n```r\nset_dependency(\"discrim_mixture\", eng = \"mda\", pkg = \"mixedup\")\n```\n\nParallel processing requires this explicit dependency setting. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux will load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a parsnip model is not loaded in the worker processes, the model will not have an entry in parsnip's internal database (and produce an error). \n\n\n## Your model, tuning parameters, and you\n\nThe tune package can be used to find reasonable values of model arguments via tuning. There are some S3 methods that are useful to define for your model. `discrim_mixture()` has one main tuning parameter: `sub_classes`. To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. \n\n`tunable()` takes the model specification as its argument and returns a tibble with columns: \n\n* `name`: The name of the argument. \n\n* `call_info`: A list that describes how to call a function that returns a dials parameter object. \n\n* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `\"model_spec\"`. \n\n* `component`: A character string with more information about the source. For models, this is just the name of the function (e.g. `\"discrim_mixture\"`). \n\n* `component_id`: A character string to indicate where a unique identifier is for the object. For a model, this is indicates the type of model argument (e.g. \"main\"). \n\nThe main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. \n\nFor example, for a nearest-neighbors `neighbors` parameter, this value is just: \n\n\n::: {.cell layout-align=\"center\" hash='cache/mtry_3c03e505855845f8e5bb7a077fd5b825'}\n\n```{.r .cell-code}\ninfo <- list(pkg = \"dials\", fun = \"neighbors\")\n\n# FYI: how it is used under-the-hood: \nnew_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)\nrlang::eval_tidy(new_param_call)\n#> # Nearest Neighbors (quantitative)\n#> Range: [1, 10]\n```\n:::\n\n\nFor `discrim_mixture()`, a dials object is needed that returns an integer that is the number of sub-classes that should be create. We can create a dials parameter function for this:\n\n\n::: {.cell layout-align=\"center\" hash='cache/sub-classes_f1d2aa270057a937d0fb1b22437c4b6b'}\n\n```{.r .cell-code}\nsub_classes <- function(range = c(1L, 10L), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(sub_classes = \"# Sub-Classes\"),\n finalize = NULL\n )\n}\n```\n:::\n\n\nIf this were in the dials package, we could use: \n\n\n::: {.cell layout-align=\"center\" hash='cache/tunable_6f690f2f8f8fd9613d4fa6d96183784b'}\n\n```{.r .cell-code}\ntunable.discrim_mixture <- function(x, ...) {\n tibble::tibble(\n name = c(\"sub_classes\"),\n call_info = list(list(pkg = NULL, fun = \"sub_classes\")),\n source = \"model_spec\",\n component = \"discrim_mixture\",\n component_id = \"main\"\n )\n}\n```\n:::\n\n\nOnce this method is in place, the tuning functions can be used: \n\n\n::: {.cell layout-align=\"center\" hash='cache/tune-mda_0dcd9b8898400f7cba95d8d28f58a8ec'}\n\n```{.r .cell-code}\nmda_spec <- \n discrim_mixture(sub_classes = tune()) %>% \n set_engine(\"mda\")\n\nset.seed(452)\ncv <- vfold_cv(example_train)\nmda_tune_res <- mda_spec %>%\n tune_grid(Class ~ ., cv, grid = 4)\nshow_best(mda_tune_res, metric = \"roc_auc\")\n#> # A tibble: 4 × 7\n#> sub_classes .metric .estimator mean n std_err .config \n#> \n#> 1 2 roc_auc binary 0.890 10 0.0143 Preprocessor1_Model3\n#> 2 3 roc_auc binary 0.889 10 0.0142 Preprocessor1_Model4\n#> 3 6 roc_auc binary 0.884 10 0.0147 Preprocessor1_Model2\n#> 4 8 roc_auc binary 0.881 10 0.0146 Preprocessor1_Model1\n```\n:::\n\n\n\n\n## Pro-tips, what-ifs, exceptions, FAQ, and minutiae\n\nThere are various things that came to mind while developing this resource.\n\n**Do I have to return a simple vector for `predict` and `predict_class`?**\n\nPreviously, when discussing the `pred` information:\n\n> For `pred`, the model requires an unnamed numeric vector output **(usually)**.\n\nThere are some models (e.g. `glmnet`, `plsr`, `Cubist`, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so, for these cases, the current convention is to return a tibble with the prediction in a column called `values` and have extra columns for any parameters that define the different sub-models. \n\nFor example, if I fit a linear regression model via `glmnet` and get four values of the regularization parameter (`lambda`):\n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet_be2808b89c377b91701228133ccb2349'}\n\n```{.r .cell-code}\nlinear_reg() %>%\n set_engine(\"glmnet\", nlambda = 4) %>% \n fit(mpg ~ ., data = mtcars) %>%\n multi_predict(new_data = mtcars[1:3, -1])\n```\n:::\n\n\n_However_, the API is still being developed. Currently, there is not an interface in the prediction functions to pass in the values of the parameters to make predictions with (`lambda`, in this case). \n\n**What do I do about how my model handles factors or categorical data?**\n\nSome modeling functions in R create indicator/dummy variables from categorical data when you use a model formula (typically using `model.matrix()`), and some do not. Some examples of models that do _not_ create indicator variables include tree-based models, naive Bayes models, and multilevel or hierarchical models. The tidymodels ecosystem assumes a `model.matrix()`-like default encoding for categorical data used in a model formula, but you can change this encoding using `set_encoding()`. For example, you can set predictor encodings that say, \"leave my data alone,\" and keep factors as is:\n\n\n::: {.cell layout-align=\"center\" hash='cache/encodinginfo_3e86c6fbe0bbb2a3fbc9efd669daa223'}\n\n```{.r .cell-code}\nset_encoding(\n model = \"decision_tree\",\n eng = \"rpart\",\n mode = \"regression\",\n options = list(\n predictor_indicators = \"none\",\n compute_intercept = FALSE,\n remove_intercept = FALSE\n )\n)\n```\n:::\n\n\n::: {.callout-note}\nThere are three options for `predictor_indicators`: \n- \"none\" (do not expand factor predictors)\n- \"traditional\" (apply the standard `model.matrix()` encoding)\n- \"one_hot\" (create the complete set including the baseline level for all factors) \n:::\n\nTo learn more about encoding categorical predictors, check out [this blog post](https://www.tidyverse.org/blog/2020/07/parsnip-0-1-2/#predictor-encoding-consistency).\n\n**What is the `defaults` slot and why do I need it?**\n\nYou might want to set defaults that can be overridden by the user. For example, for logistic regression with `glm`, it make sense to default `family = binomial`. However, if someone wants to use a different link function, they should be able to do that. For that model/engine definition, it has:\n\n\n::: {.cell layout-align=\"center\" hash='cache/glm-alt_457c8ae71c6e3b2c6afe4b41c18ddd3f'}\n\n```{.r .cell-code}\ndefaults = list(family = expr(binomial))\n```\n:::\n\n\nSo that is the default:\n\n\n::: {.cell layout-align=\"center\" hash='cache/glm-alt-show_0871388099af74cb137d4ac5b98cadc6'}\n\n```{.r .cell-code}\nlogistic_reg() %>% translate(engine = \"glm\")\n\n# but you can change it:\n\nlogistic_reg() %>%\n set_engine(\"glm\", family = expr(binomial(link = \"probit\"))) %>% \n translate()\n```\n:::\n\n\nThat's what `defaults` are for. \n\nNote that we wrapped `binomial` inside of `expr()`. If we didn't, it would substitute the results of executing `binomial()` inside of the expression (and that's a mess). \n\n**What if I want more complex defaults?**\n\nThe `translate` function can be used to check values or set defaults once the model's mode is known. To do this, you can create a model-specific S3 method that first calls the general method (`translate.model_spec()`) and then makes modifications or conducts error traps. \n\nFor example, the ranger and randomForest package functions have arguments for calculating importance. One is a logical and the other is a string. Since this is likely to lead to a bunch of frustration and GitHub issues, we can put in a check:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-trans_2befe6ee20fcf17345860f655481c147'}\n\n```{.r .cell-code}\n# Simplified version\ntranslate.rand_forest <- function (x, engine, ...){\n # Run the general method to get the real arguments in place\n x <- translate.default(x, engine, ...)\n \n # Check and see if they make sense for the engine and/or mode:\n if (x$engine == \"ranger\") {\n if (any(names(x$method$fit$args) == \"importance\")) \n if (is.logical(x$method$fit$args$importance)) \n rlang::abort(\"`importance` should be a character value. See ?ranger::ranger.\")\n }\n x\n}\n```\n:::\n\n\nAs another example, `nnet::nnet()` has an option for the final layer to be linear (called `linout`). If `mode = \"regression\"`, that should probably be set to `TRUE`. You couldn't do this with the `args` (described above) since you need the function translated first. \n\n\n**My model fit requires more than one function call. So....?**\n\nThe best course of action is to write wrapper so that it can be one call. This was the case with xgboost and keras. \n\n**Why would I preprocess my data?**\n\nThere might be non-trivial transformations that the model prediction code requires (such as converting to a sparse matrix representation, etc.)\n\nThis would **not** include making dummy variables and `model.matrix` stuff. The parsnip infrastructure already does that for you. \n\n\n**Why would I post-process my predictions?**\n\nWhat comes back from some R functions may be somewhat... arcane or problematic. As an example, for xgboost, if you fit a multi-class boosted tree, you might expect the class probabilities to come back as a matrix (*narrator: they don't*). If you have four classes and make predictions on three samples, you get a vector of 12 probability values. You need to convert these to a rectangular data set. \n\nAnother example is the predict method for ranger, which encapsulates the actual predictions in a more complex object structure. \n\nThese are the types of problems that the post-processor will solve. \n\n**Are there other modes?**\n\nNot yet but there will be. For example, it might make sense to have a different mode when doing risk-based modeling via Cox regression models. That would enable different classes of objects and those might be needed since the types of models don't make direct predictions of the outcome. \n\nIf you have a suggestion, please add a [GitHub issue](https://github.com/tidymodels/parsnip/issues) to discuss it. \n\n \n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> mda * 0.5-3 2022-05-05 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/develop/parameters/index/execute-results/html.json b/_freeze/learn/develop/parameters/index/execute-results/html.json new file mode 100644 index 00000000..433b392b --- /dev/null +++ b/_freeze/learn/develop/parameters/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "e5bfdc078ca2199dc3e07b6825658dda", + "result": { + "markdown": "---\ntitle: \"How to create a tuning parameter function\"\ncategories:\n - developer tools\ntype: learn-subsection\nweight: 4\ndescription: | \n Build functions to use in tuning both quantitative and qualitative parameters.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: dials and scales.\n\nSome models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using `new_quant_param()` or `new_qual_param()`, respectively. This article is a guide to creating new parameters.\n\n## Quantitative parameters\n\nAs an example, let's consider the multivariate adaptive regression spline ([MARS](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline)) model, which creates nonlinear features from predictors and adds them to a linear regression models. The earth package is an excellent implementation of this method.\n\nMARS creates an initial set of features and then prunes them back to an appropriate size. This can be done automatically by `earth::earth()` or the number of final terms can be set by the user. The parsnip function `mars()` has a parameter called `num_terms` that defines this.\n\nWhat if we want to create a parameter for the number of *initial terms* included in the model. There is no argument in `parsnip::mars()` for this but we will make one now. The argument name in `earth::earth()` is `nk`, which is not very descriptive. Our parameter will be called `num_initial_terms`.\n\nWe use the `new_quant_param()` function since this is a numeric parameter. The main two arguments to a numeric parameter function are `range` and `trans`.\n\nThe `range` specifies the possible values of the parameter. For our example, a minimal value might be one or two. What is the upper limit? The default in the earth package is\n\n\n::: {.cell layout-align=\"center\" hash='cache/eart_0aaa451856e86c8fdc7e0c3f099c8de4'}\n\n```{.r .cell-code}\nmin(200, max(20, 2 * ncol(x))) + 1\n```\n:::\n\n\nwhere `x` is the predictor matrix. We often put in values that are either sensible defaults or are minimal enough to work for the majority of data sets. For now, let's specify an upper limit of 10 but this will be discussed more in the next section.\n\nThe other argument is `trans`, which represents a transformation that should be applied to the parameter values when working with them. For example, many regularization methods have a `penalty` parameter that tends to range between zero and some upper bound (let's say 1). The effect of going from a penalty value of 0.01 to 0.1 is much more impactful than going from 0.9 to 1.0. In such a case, it might make sense to work with this parameter in transformed units (such as the log, in this example). If new parameter values are generated at random, it helps if they are uniformly simulated in the transformed units and then converted back to the original units.\n\nThe `trans` parameter accepts a transformation object from the scales package. For example:\n\n\n::: {.cell layout-align=\"center\" hash='cache/scales_b25ae3bb346dde06d2a7e463ba1f4c4d'}\n\n```{.r .cell-code}\nlibrary(scales)\nlsf.str(\"package:scales\", pattern = \"_trans$\")\n#> asn_trans : function () \n#> atanh_trans : function () \n#> boxcox_trans : function (p, offset = 0) \n#> compose_trans : function (...) \n#> date_trans : function () \n#> exp_trans : function (base = exp(1)) \n#> hms_trans : function () \n#> identity_trans : function () \n#> log_trans : function (base = exp(1)) \n#> log10_trans : function () \n#> log1p_trans : function () \n#> log2_trans : function () \n#> logit_trans : function () \n#> modulus_trans : function (p, offset = 1) \n#> probability_trans : function (distribution, ...) \n#> probit_trans : function () \n#> pseudo_log_trans : function (sigma = 1, base = exp(1)) \n#> reciprocal_trans : function () \n#> reverse_trans : function () \n#> sqrt_trans : function () \n#> time_trans : function (tz = NULL) \n#> yj_trans : function (p)\nscales::log10_trans()\n#> Transformer: log-10 [1e-100, Inf]\n```\n:::\n\n\nA value of `NULL` means that no transformation should be used.\n\nA quantitative parameter function should have these two arguments and, in the function body, a call `new_quant_param()`. There are a few arguments to this function:\n\n\n::: {.cell layout-align=\"center\" hash='cache/new_quant_param_dc898a8030c6fa2847b68be9c2db5701'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\nargs(new_quant_param)\n#> function (type = c(\"double\", \"integer\"), range = NULL, inclusive = NULL, \n#> default = deprecated(), trans = NULL, values = NULL, label = NULL, \n#> finalize = NULL, ..., call = caller_env()) \n#> NULL\n```\n:::\n\n\n- Possible types are double precision and integers. The value of `type` should agree with the values of `range` in the function definition.\n\n- It's OK for our tuning to include the minimum or maximum, so we'll use `c(TRUE, TRUE)` for `inclusive`. If the value cannot include one end of the range, set one or both of these values to `FALSE`.\n\n- The `label` should be a named character string where the name is the parameter name and the value represents what will be printed automatically.\n\n- `finalize` is an argument that can set parts of the range. This is discussed more below.\n\nHere's an example of a basic quantitative parameter object:\n\n\n::: {.cell layout-align=\"center\" hash='cache/num-initial-terms_d602318800beb1ef90a7bde3e6959438'}\n\n```{.r .cell-code}\nnum_initial_terms <- function(range = c(1L, 10L), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = NULL\n )\n}\n\nnum_initial_terms()\n#> # Initial MARS Terms (quantitative)\n#> Range: [1, 10]\n\n# Sample from the parameter:\nset.seed(4832856)\nnum_initial_terms() %>% value_sample(5)\n#> [1] 6 4 9 10 4\n```\n:::\n\n\n### Finalizing parameters\n\nIt might be the case that the range of the parameter is unknown. For example, parameters that are related to the number of columns in a data set cannot be exactly specified in the absence of data. In those cases, a placeholder of `unknown()` can be added. This will force the user to \"finalize\" the parameter object for their particular data set. Let's redefine our function with an `unknown()` value:\n\n\n::: {.cell layout-align=\"center\" hash='cache/num-initial-terms-unk_9de7d72b673760c5098403e4f395b8d8'}\n\n```{.r .cell-code}\nnum_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = NULL\n )\n}\nnum_initial_terms()\n\n# Can we sample? \nnum_initial_terms() %>% value_sample(5)\n```\n:::\n\n\nThe `finalize` argument of `num_initial_terms()` can take a function that uses data to set the range. For example, the package already includes a few functions for finalization:\n\n\n::: {.cell layout-align=\"center\" hash='cache/dials-final-funcs_13c8f3f4f0f277ecb3c76d762cb7a32c'}\n\n```{.r .cell-code}\nlsf.str(\"package:dials\", pattern = \"^get_\")\n#> get_batch_sizes : function (object, x, frac = c(1/10, 1/3), ...) \n#> get_log_p : function (object, x, ...) \n#> get_n : function (object, x, log_vals = FALSE, ...) \n#> get_n_frac : function (object, x, log_vals = FALSE, frac = 1/3, ...) \n#> get_n_frac_range : function (object, x, log_vals = FALSE, frac = c(1/10, 5/10), ...) \n#> get_p : function (object, x, log_vals = FALSE, ...) \n#> get_rbf_range : function (object, x, seed = sample.int(10^5, 1), ...)\n```\n:::\n\n\nThese functions generally take a data frame of predictors (in an argument called `x`) and add the range of the parameter object. Using the formula in the earth package, we might use:\n\n\n::: {.cell layout-align=\"center\" hash='cache/earth-range_e1c9bf6b8f535f761d22d3b738ea8bb2'}\n\n```{.r .cell-code}\nget_initial_mars_terms <- function(object, x) {\n upper_bound <- min(200, max(20, 2 * ncol(x))) + 1\n upper_bound <- as.integer(upper_bound)\n bounds <- range_get(object)\n bounds$upper <- upper_bound\n range_set(object, bounds)\n}\n\n# Use the mtcars are the finalize the upper bound: \nnum_initial_terms() %>% get_initial_mars_terms(x = mtcars[, -1])\n#> # Initial MARS Terms (quantitative)\n#> Range: [1, 21]\n```\n:::\n\n\nOnce we add this function to the object, the general `finalize()` method can be used:\n\n\n::: {.cell layout-align=\"center\" hash='cache/final-obj_9b8361428190d870490441c3eecf012e'}\n\n```{.r .cell-code}\nnum_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = get_initial_mars_terms\n )\n}\n\nnum_initial_terms() %>% finalize(x = mtcars[, -1])\n#> # Initial MARS Terms (quantitative)\n#> Range: [1, 21]\n```\n:::\n\n\n## Qualitative parameters\n\nNow let's look at an example of a qualitative parameter. If a model includes a data aggregation step, we want to allow users to tune how our parameters are aggregated. For example, in embedding methods, possible values might be `min`, `max`, `mean`, `sum`, or to not aggregate at all (\"none\"). Since these cannot be put on a numeric scale, they are possible values of a qualitative parameter. We'll take \"character\" input (not \"logical\"), and we must specify the allowed values. By default we won't aggregate.\n\n\n::: {.cell layout-align=\"center\" hash='cache/aggregation_39f71033809bc19015c698dbeca6311d'}\n\n```{.r .cell-code}\naggregation <- function(values = c(\"none\", \"min\", \"max\", \"mean\", \"sum\")) {\n new_qual_param(\n type = \"character\",\n values = values,\n # By default, the first value is selected as default. We'll specify that to\n # make it clear.\n default = \"none\",\n label = c(aggregation = \"Aggregation Method\")\n )\n}\n```\n:::\n\n\nWithin the dials package, the convention is to have the values contained in a separate vector whose name starts with `values_`. For example:\n\n\n::: {.cell layout-align=\"center\" hash='cache/aggregation-vec_7dcc9f145f63cccd19b24d9fa23f4d10'}\n\n```{.r .cell-code}\nvalues_aggregation <- c(\"none\", \"min\", \"max\", \"mean\", \"sum\")\naggregation <- function(values = values_aggregation) {\n new_qual_param(\n type = \"character\",\n values = values,\n # By default, the first value is selected as default. We'll specify that to\n # make it clear.\n default = \"none\",\n label = c(aggregation = \"Aggregation Method\")\n )\n}\n```\n:::\n\n\nThis step may not make sense if you are using the function in a script and not keeping it within a package.\n\nWe can use our `aggregation` parameters with dials functions.\n\n\n::: {.cell layout-align=\"center\" hash='cache/aggregation-use_9ac92377d564310ae312bf63617fedad'}\n\n```{.r .cell-code}\naggregation()\n#> Warning: The `default` argument of `new_qual_param()` is deprecated as of\n#> dials 1.1.0.\n#> Aggregation Method (qualitative)\n#> 5 possible values include:\n#> 'none', 'min', 'max', 'mean' and 'sum'\naggregation() %>% value_sample(3)\n#> [1] \"min\" \"sum\" \"mean\"\n```\n:::\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/develop/recipes/index/execute-results/html.json b/_freeze/learn/develop/recipes/index/execute-results/html.json new file mode 100644 index 00000000..9c930fb9 --- /dev/null +++ b/_freeze/learn/develop/recipes/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "61076caefdda4927deedaf16b02443e4", + "result": { + "markdown": "---\ntitle: \"Create your own recipe step function\"\ncategories:\n - developer tools\ntype: learn-subsection\nweight: 1\ndescription: | \n Write a new recipe step for data preprocessing.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: modeldata and tidymodels.\n\nThere are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages [can be found here](/find/recipes/). However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for [centering](https://github.com/tidymodels/recipes/blob/master/R/center.R) or [PCA](https://github.com/tidymodels/recipes/blob/master/R/pca.R) to start. \n\nFor check operations (e.g. `check_class()`), the process is very similar. Notes on this are available at the end of this article. \n\nThe general process to follow is to:\n\n1. Define a step constructor function.\n\n2. Create the minimal S3 methods for `prep()`, `bake()`, and `print()`. \n\n3. Optionally add some extra methods to work with other tidymodels packages, such as `tunable()` and `tidy()`. \n\nAs an example, we will create a step for converting data into percentiles. \n\n## A new step definition\n\nLet's create a step that replaces the value of a variable with its percentile from the training set. The example data we'll use is from the modeldata package:\n\n\n::: {.cell layout-align=\"center\" hash='cache/initial_e9b15d7263a13cf19c62529abfd10bdb'}\n\n```{.r .cell-code}\nlibrary(modeldata)\ndata(biomass)\nstr(biomass)\n#> 'data.frame':\t536 obs. of 8 variables:\n#> $ sample : chr \"Akhrot Shell\" \"Alabama Oak Wood Waste\" \"Alder\" \"Alfalfa\" ...\n#> $ dataset : chr \"Training\" \"Training\" \"Training\" \"Training\" ...\n#> $ carbon : num 49.8 49.5 47.8 45.1 46.8 ...\n#> $ hydrogen: num 5.64 5.7 5.8 4.97 5.4 5.75 5.99 5.7 5.5 5.9 ...\n#> $ oxygen : num 42.9 41.3 46.2 35.6 40.7 ...\n#> $ nitrogen: num 0.41 0.2 0.11 3.3 1 2.04 2.68 1.7 0.8 1.2 ...\n#> $ sulfur : num 0 0 0.02 0.16 0.02 0.1 0.2 0.2 0 0.1 ...\n#> $ HHV : num 20 19.2 18.3 18.2 18.4 ...\n\nbiomass_tr <- biomass[biomass$dataset == \"Training\",]\nbiomass_te <- biomass[biomass$dataset == \"Testing\",]\n```\n:::\n\n\nTo illustrate the transformation with the `carbon` variable, note the training set distribution of this variable with a vertical line below for the first value of the test set. \n\n\n::: {.cell layout-align=\"center\" hash='cache/carbon_dist_1ea4da99e7a470221927e2615897ebcd'}\n\n```{.r .cell-code}\nlibrary(ggplot2)\ntheme_set(theme_bw())\nggplot(biomass_tr, aes(x = carbon)) + \n geom_histogram(binwidth = 5, col = \"blue\", fill = \"blue\", alpha = .5) + \n geom_vline(xintercept = biomass_te$carbon[1], lty = 2)\n```\n\n::: {.cell-output-display}\n![](figs/carbon_dist-1.svg){fig-align='center' width=100%}\n:::\n:::\n\n\nBased on the training set, 42.1% of the data are less than a value of 46.35. There are some applications where it might be advantageous to represent the predictor values as percentiles rather than their original values. \n\nOur new step will do this computation for any numeric variables of interest. We will call this new recipe step `step_percentile()`. The code below is designed for illustration and not speed or best practices. We've left out a lot of error trapping that we would want in a real implementation. \n\n## Create the function\n\nTo start, there is a _user-facing_ function. Let's call that `step_percentile()`. This is just a simple wrapper around a _constructor function_, which defines the rules for any step object that defines a percentile transformation. We'll call this constructor `step_percentile_new()`. \n\nThe function `step_percentile()` takes the same arguments as your function and simply adds it to a new recipe. The `...` signifies the variable selectors that can be used.\n\n\n::: {.cell layout-align=\"center\" hash='cache/initial_def_9e13ec18326669f1593d727bab539fa6'}\n\n```{.r .cell-code}\nstep_percentile <- function(\n recipe, \n ..., \n role = NA, \n trained = FALSE, \n ref_dist = NULL,\n options = list(probs = (0:100)/100, names = TRUE),\n skip = FALSE,\n id = rand_id(\"percentile\")\n ) {\n\n ## The variable selectors are not immediately evaluated by using\n ## the `quos()` function in `rlang`. `ellipse_check()` captures \n ## the values and also checks to make sure that they are not empty. \n terms <- ellipse_check(...) \n\n add_step(\n recipe, \n step_percentile_new(\n terms = terms, \n trained = trained,\n role = role, \n ref_dist = ref_dist,\n options = options,\n skip = skip,\n id = id\n )\n )\n}\n```\n:::\n\n\nYou should always keep the first four arguments (`recipe` though `trained`) the same as listed above. Some notes:\n\n * the `role` argument is used when you either 1) create new variables and want their role to be pre-set or 2) replace the existing variables with new values. The latter is what we will be doing and using `role = NA` will leave the existing role intact. \n * `trained` is set by the package when the estimation step has been run. You should default your function definition's argument to `FALSE`. \n * `skip` is a logical. Whenever a recipe is prepped, each step is trained and then baked. However, there are some steps that should not be applied when a call to `bake()` is used. For example, if a step is applied to the variables with roles of \"outcomes\", these data would not be available for new samples. \n * `id` is a character string that can be used to identify steps in package code. `rand_id()` will create an ID that has the prefix and a random character sequence. \n\nWe can estimate the percentiles of new data points based on the percentiles from the training set with `approx()`. Our `step_percentile` contains a `ref_dist` object to store these percentiles (pre-computed from the training set in `prep()`) for later use in `bake()`.\n\nWe will use `stats::quantile()` to compute the grid. However, we might also want to have control over the granularity of this grid, so the `options` argument will be used to define how that calculation is done. We could use the ellipses (aka `...`) so that any options passed to `step_percentile()` that are not one of its arguments will then be passed to `stats::quantile()`. However, we recommend making a separate list object with the options and use these inside the function because `...` is already used to define the variable selection. \n\nIt is also important to consider if there are any _main arguments_ to the step. For example, for spline-related steps such as `step_ns()`, users typically want to adjust the argument for the degrees of freedom in the spline (e.g. `splines::ns(x, df)`). Rather than letting users add `df` to the `options` argument: \n\n* Allow the important arguments to be main arguments to the step function. \n\n* Follow the tidymodels [conventions for naming arguments](https://tidymodels.github.io/model-implementation-principles/standardized-argument-names.html). Whenever possible, avoid jargon and keep common argument names. \n\nThere are benefits to following these principles (as shown below). \n\n## Initialize a new object\n\nNow, the constructor function can be created.\n\nThe function cascade is: \n\n```\nstep_percentile() calls recipes::add_step()\n└──> recipes::add_step() calls step_percentile_new()\n └──> step_percentile_new() calls recipes::step()\n```\n\n`step()` is a general constructor for recipes that mainly makes sure that the resulting step object is a list with an appropriate S3 class structure. Using `subclass = \"percentile\"` will set the class of new objects to `\"step_percentile\"`. \n\n\n::: {.cell layout-align=\"center\" hash='cache/initialize_565d7c1989cea598cb438eb330637305'}\n\n```{.r .cell-code}\nstep_percentile_new <- \n function(terms, role, trained, ref_dist, options, skip, id) {\n step(\n subclass = \"percentile\", \n terms = terms,\n role = role,\n trained = trained,\n ref_dist = ref_dist,\n options = options,\n skip = skip,\n id = id\n )\n }\n```\n:::\n\n\nThis constructor function should have no default argument values. Defaults should be set in the user-facing step object. \n\n## Create the `prep` method\n\nYou will need to create a new `prep()` method for your step's class. To do this, three arguments that the method should have are:\n\n```r\nfunction(x, training, info = NULL)\n```\n\nwhere\n\n * `x` will be the `step_percentile` object,\n * `training` will be a _tibble_ that has the training set data, and\n * `info` will also be a tibble that has information on the current set of data available. This information is updated as each step is evaluated by its specific `prep()` method so it may not have the variables from the original data. The columns in this tibble are `variable` (the variable name), `type` (currently either \"numeric\" or \"nominal\"), `role` (defining the variable's role), and `source` (either \"original\" or \"derived\" depending on where it originated).\n\nYou can define other arguments as well. \n\nThe first thing that you might want to do in the `prep()` function is to translate the specification listed in the `terms` argument to column names in the current data. There is a function called `recipes_eval_select()` that can be used to obtain this. \n\n::: {.callout-warning}\n The `recipes_eval_select()` function is not one you interact with as a typical recipes user, but it is helpful if you develop your own custom recipe steps. \n:::\n\n\n::: {.cell layout-align=\"center\" hash='cache/prep_1_e9cc5b4aa66319e4eca05dbda8b4cc2c'}\n\n```{.r .cell-code}\nprep.step_percentile <- function(x, training, info = NULL, ...) {\n col_names <- recipes_eval_select(x$terms, training, info) \n # TODO finish the rest of the function\n}\n```\n:::\n\n\nAfter this function call, it is a good idea to check that the selected columns have the appropriate type (e.g. numeric for this example). See `recipes::check_type()` to do this for basic types. \n\nOnce we have this, we can save the approximation grid. For the grid, we will use a helper function that enables us to run `rlang::exec()` to splice in any extra arguments contained in the `options` list to the call to `quantile()`: \n\n\n::: {.cell layout-align=\"center\" hash='cache/splice_d7f87c1fe0ee11958f763f1b2ab1ee72'}\n\n```{.r .cell-code}\nget_train_pctl <- function(x, args = NULL) {\n res <- rlang::exec(\"quantile\", x = x, !!!args)\n # Remove duplicate percentile values\n res[!duplicated(res)]\n}\n\n# For example:\nget_train_pctl(biomass_tr$carbon, list(probs = 0:1))\n#> 0% 100% \n#> 14.61 97.18\nget_train_pctl(biomass_tr$carbon)\n#> 0% 25% 50% 75% 100% \n#> 14.610 44.715 47.100 49.725 97.180\n```\n:::\n\n\nNow, the `prep()` method can be created: \n\n\n::: {.cell layout-align=\"center\" hash='cache/prep-2_7c79c53e94b392f35b346414e8b3f731'}\n\n```{.r .cell-code}\nprep.step_percentile <- function(x, training, info = NULL, ...) {\n col_names <- recipes_eval_select(x$terms, training, info)\n ## You can add error trapping for non-numeric data here and so on. \n \n ## We'll use the names later so make sure they are available\n if (x$options$names == FALSE) {\n rlang::abort(\"`names` should be set to TRUE\")\n }\n \n if (!any(names(x$options) == \"probs\")) {\n x$options$probs <- (0:100)/100\n } else {\n x$options$probs <- sort(unique(x$options$probs))\n }\n \n # Compute percentile grid\n ref_dist <- purrr::map(training[, col_names], get_train_pctl, args = x$options)\n\n ## Use the constructor function to return the updated object. \n ## Note that `trained` is now set to TRUE\n \n step_percentile_new(\n terms = x$terms, \n trained = TRUE,\n role = x$role, \n ref_dist = ref_dist,\n options = x$options,\n skip = x$skip,\n id = x$id\n )\n}\n```\n:::\n\n\nWe suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former can be used for better traceback results.\n\n\n## Create the `bake` method\n\nRemember that the `prep()` function does not _apply_ the step to the data; it only estimates any required values such as `ref_dist`. We will need to create a new method for our `step_percentile()` class. The minimum arguments for this are\n\n```r\nfunction(object, new_data, ...)\n```\n\nwhere `object` is the updated step function that has been through the corresponding `prep()` code and `new_data` is a tibble of data to be processed. \n\nHere is the code to convert the new data to percentiles. The input data (`x` below) comes in as a numeric vector and the output is a vector of approximate percentiles: \n\n\n::: {.cell layout-align=\"center\" hash='cache/bake-helpers_26b81d4ec4ac1dca48a0bd67178379d4'}\n\n```{.r .cell-code}\npctl_by_approx <- function(x, ref) {\n # In case duplicates were removed, get the percentiles from\n # the names of the reference object\n grid <- as.numeric(gsub(\"%$\", \"\", names(ref))) \n approx(x = ref, y = grid, xout = x)$y/100\n}\n```\n:::\n\n\nThese computations are done column-wise using `purrr::map2_dfc()` to modify the new data in-place:\n\n\n::: {.cell layout-align=\"center\" hash='cache/bake-method_c5dab3d658e8739f1bff0630466624e5'}\n\n```{.r .cell-code}\nbake.step_percentile <- function(object, new_data, ...) {\n ## For illustration (and not speed), we will loop through the affected variables\n ## and do the computations\n vars <- names(object$ref_dist)\n \n new_data[, vars] <-\n purrr::map2_dfc(new_data[, vars], object$ref_dist, pctl_by_approx)\n \n ## Always convert to tibbles on the way out\n tibble::as_tibble(new_data)\n}\n```\n:::\n\n\n::: {.callout-note}\nYou need to import `recipes::prep()` and `recipes::bake()` to create your own step function in a package. \n:::\n\n## Run the example\n\nLet's use the example data to make sure that it works: \n\n\n::: {.cell layout-align=\"center\" hash='cache/example_d3f9751b8b4c8df7fa734a80bdffd799'}\n\n```{.r .cell-code}\nrec_obj <- \n recipe(HHV ~ ., data = biomass_tr) %>%\n step_percentile(ends_with(\"gen\")) %>%\n prep(training = biomass_tr)\n\nbiomass_te %>% select(ends_with(\"gen\")) %>% slice(1:2)\nbake(rec_obj, biomass_te %>% slice(1:2), ends_with(\"gen\"))\n\n# Checking to get approximate result: \nmean(biomass_tr$hydrogen <= biomass_te$hydrogen[1])\nmean(biomass_tr$oxygen <= biomass_te$oxygen[1])\n```\n:::\n\n\nThe plot below shows how the original hydrogen percentiles line up with the estimated values:\n\n\n::: {.cell layout-align=\"center\" hash='cache/cdf_plot_7fa4a8e3206391cb29364184e76efaf8'}\n\n```{.r .cell-code}\nhydrogen_values <- \n bake(rec_obj, biomass_te, hydrogen) %>% \n bind_cols(biomass_te %>% select(original = hydrogen))\n\nggplot(biomass_tr, aes(x = hydrogen)) + \n # Plot the empirical distribution function of the \n # hydrogen training set values as a black line\n stat_ecdf() + \n # Overlay the estimated percentiles for the new data: \n geom_point(data = hydrogen_values, \n aes(x = original, y = hydrogen), \n col = \"red\", alpha = .5, cex = 2) + \n labs(x = \"New Hydrogen Values\", y = \"Percentile Based on Training Set\")\n```\n:::\n\n\nThese line up very nicely! \n\n## Custom check operations \n\nThe process here is exactly the same as steps; the internal functions have a similar naming convention: \n\n * `add_check()` instead of `add_step()`\n * `check()` instead of `step()`, and so on. \n \nIt is strongly recommended that:\n \n 1. The operations start with `check_` (i.e. `check_range()` and `check_range_new()`)\n 1. The check uses `rlang::abort(paste0(...))` when the conditions are not met\n 1. The original data are returned (unaltered) by the check when the conditions are satisfied. \n\n## Other step methods\n\nThere are a few other S3 methods that can be created for your step function. They are not required unless you plan on using your step in the broader tidymodels package set. \n\n### A print method\n\nIf you don't add a print method for `step_percentile`, it will still print but it will be printed as a list of (potentially large) objects and look a bit ugly. The recipes package contains a helper function called `printer()` that should be useful in most cases. We are using it here for the custom print method for `step_percentile`. It requires the original terms specification and the column names this specification is evaluated to by `prep()`. For the former, our step object is structured so that the list object `ref_dist` has the names of the selected variables: \n\n\n::: {.cell layout-align=\"center\" hash='cache/print-method_d4ebd3848eef1bd46e28cfb4c415ba1d'}\n\n```{.r .cell-code}\nprint.step_percentile <-\n function(x, width = max(20, options()$width - 35), ...) {\n cat(\"Percentile transformation on \", sep = \"\")\n printer(\n # Names before prep (could be selectors)\n untr_obj = x$terms,\n # Names after prep:\n tr_obj = names(x$ref_dist),\n # Has it been prepped? \n trained = x$trained,\n # An estimate of how many characters to print on a line: \n width = width\n )\n invisible(x)\n }\n\n# Results before `prep()`:\nrecipe(HHV ~ ., data = biomass_tr) %>%\n step_percentile(ends_with(\"gen\"))\n\n# Results after `prep()`: \nrec_obj\n```\n:::\n\n \n### Methods for declaring required packages\n\nSome recipe steps use functions from other packages. When this is the case, the `step_*()` function should check to see if the package is installed. The function `recipes::recipes_pkg_check()` will do this. For example: \n\n```\n> recipes::recipes_pkg_check(\"some_package\")\n1 package is needed for this step and is not installed. (some_package). Start \na clean R session then run: install.packages(\"some_package\")\n```\n\nThere is an S3 method that can be used to declare what packages should be loaded when using the step. For a hypothetical step that relies on the `hypothetical` package, this might look like: \n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-15_ef766f12d6be3f28d8352aa2616aa143'}\n\n```{.r .cell-code}\nrequired_pkgs.step_hypothetical <- function(x, ...) {\n c(\"hypothetical\", \"myrecipespkg\")\n}\n```\n:::\n\n\nIn this example, `myrecipespkg` is the package where the step resides (if it is in a package).\n\nThe reason to declare what packages should be loaded is parallel processing. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a recipe step is not loaded in the worker processes, the `prep()` methods cannot be found and an error occurs. \n\nIf this S3 method is used for your step, you can rely on this for checking the installation: \n \n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-16_bb0b7690de51c7e735663ee46cceddfd'}\n\n```{.r .cell-code}\nrecipes::recipes_pkg_check(required_pkgs.step_hypothetical())\n```\n:::\n\n\nIf you'd like an example of this in a package, please take a look at the [embed](https://github.com/tidymodels/embed/) or [themis](https://github.com/tidymodels/themis/) package.\n\n### A tidy method\n\nThe `broom::tidy()` method is a means to return information about the step in a usable format. For our step, it would be helpful to know the reference values. \n\nWhen the recipe has been prepped, those data are in the list `ref_dist`. A small function can be used to reformat that data into a tibble. It is customary to return the main values as `value`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/tidy-calcs_729f85bdabca51988e5c753558835e58'}\n\n```{.r .cell-code}\nformat_pctl <- function(x) {\n tibble::tibble(\n value = unname(x),\n percentile = as.numeric(gsub(\"%$\", \"\", names(x))) \n )\n}\n\n# For example: \npctl_step_object <- rec_obj$steps[[1]]\npctl_step_object\nformat_pctl(pctl_step_object$ref_dist[[\"hydrogen\"]])\n```\n:::\n\n\nThe tidy method could return these values for each selected column. Before `prep()`, missing values can be used as placeholders. \n\n\n::: {.cell layout-align=\"center\" hash='cache/tidy_6651875bdb0348cb7beed42d9960198c'}\n\n```{.r .cell-code}\ntidy.step_percentile <- function(x, ...) {\n if (is_trained(x)) {\n res <- map_dfr(x$ref_dist, format_pctl, .id = \"term\")\n }\n else {\n term_names <- sel2char(x$terms)\n res <-\n tibble(\n terms = term_names,\n value = rlang::na_dbl,\n percentile = rlang::na_dbl\n )\n }\n # Always return the step id: \n res$id <- x$id\n res\n}\n\ntidy(rec_obj, number = 1)\n```\n:::\n\n\n### Methods for tuning parameters\n\nThe tune package can be used to find reasonable values of step arguments by model tuning. There are some S3 methods that are useful to define for your step. The percentile example doesn't really have any tunable parameters, so we will demonstrate using `step_poly()`, which returns a polynomial expansion of selected columns. Its function definition has the arguments: \n\n\n::: {.cell layout-align=\"center\" hash='cache/poly-args_7c1ac91efd9c85d357f850499f13a78c'}\n\n```{.r .cell-code}\nargs(step_poly)\n```\n:::\n\n\nThe argument `degree` is tunable.\n\nTo work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. \n\n`tunable()` takes the step object as its argument and returns a tibble with columns: \n\n* `name`: The name of the argument. \n\n* `call_info`: A list that describes how to call a function that returns a dials parameter object. \n\n* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `\"recipe\"`. \n\n* `component`: A character string with more information about the source. For recipes, this is just the name of the step (e.g. `\"step_poly\"`). \n\n* `component_id`: A character string to indicate where a unique identifier is for the object. For recipes, this is just the `id` value of the step object. \n\nThe main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. \n\nFor example, for a nearest-neighbors `neighbors` parameter, this value is just: \n\n\n::: {.cell layout-align=\"center\" hash='cache/mtry_29d760ef7533e5f40bc9232a20172a80'}\n\n```{.r .cell-code}\ninfo <- list(pkg = \"dials\", fun = \"neighbors\")\n\n# FYI: how it is used under-the-hood: \nnew_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)\nrlang::eval_tidy(new_param_call)\n```\n:::\n\n\nFor `step_poly()`, a dials object is needed that returns an integer that is the number of new columns to create. It turns out that there are a few different types of tuning parameters related to degree: \n\n```r\n> lsf.str(\"package:dials\", pattern = \"degree\")\ndegree : function (range = c(1, 3), trans = NULL) \ndegree_int : function (range = c(1L, 3L), trans = NULL) \nprod_degree : function (range = c(1L, 2L), trans = NULL) \nspline_degree : function (range = c(3L, 10L), trans = NULL) \n```\n\nLooking at the `range` values, some return doubles and others return integers. For our problem, `degree_int()` would be a good choice. \n\nFor `step_poly()` the `tunable()` S3 method could be: \n\n\n::: {.cell layout-align=\"center\" hash='cache/tunable_16f4fa39f3ddb8145e6a67412664dadb'}\n\n```{.r .cell-code}\ntunable.step_poly <- function (x, ...) {\n tibble::tibble(\n name = c(\"degree\"),\n call_info = list(list(pkg = \"dials\", fun = \"degree_int\")),\n source = \"recipe\",\n component = \"step_poly\",\n component_id = x$id\n )\n}\n```\n:::\n\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/models/coefficients/index/execute-results/html.json b/_freeze/learn/models/coefficients/index/execute-results/html.json new file mode 100644 index 00000000..828ed41f --- /dev/null +++ b/_freeze/learn/models/coefficients/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "a490f645e90769355dc4559aa199c331", + "result": { + "markdown": "---\ntitle: \"Working with model coefficients\"\ncategories:\n - model fitting\n - tidying results\n - linear regression\n - model tuning\ntype: learn-subsection\nweight: 5\ndescription: | \n Create models that use coefficients, extract them from fitted models, and visualize them.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n## Introduction \n\nThere are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients. \n\nThis article describes how to retrieve the estimated coefficients from models fit using tidymodels. To use code in this article, you will need to install the following packages: glmnet and tidymodels.\n\n## Linear regression\n\nLet's start with a linear regression model: \n\n$$\\hat{y} = \\hat{\\beta}_0 + \\hat{\\beta}_1x_1 + \\ldots + \\hat{\\beta}_px_p$$ \n\nThe $\\beta$ values are the coefficients and the $x_j$ are model predictors, or features. \n\nLet's use the [Chicago train data](https://bookdown.org/max/FES/chicago-intro.html) where we predict the ridership at the Clark and Lake station (column name: `ridership`) with the previous ridership data 14 days prior at three of the stations. \n\nThe data are in the modeldata package: \n\n\n::: {.cell layout-align=\"center\" hash='cache/setup-tm_f86571269e0aaa992d4e4c6d8e5b4abb'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\ntidymodels_prefer()\ntheme_set(theme_bw())\n\ndata(Chicago)\n\nChicago <- Chicago %>% select(ridership, Clark_Lake, Austin, Harlem)\n```\n:::\n\n\n### A single model\n\nLet's start by fitting only a single parsnip model object. We'll create a model specification using `linear_reg()`. \n\n::: {.callout-note}\nThe default engine is `\"lm\"` so no call to `set_engine()` is required. \n:::\n\nThe `fit()` function estimates the model coefficients, given a formula and data set. \n\n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-single_01a332c8b03f35833c62f041f177e350'}\n\n```{.r .cell-code}\nlm_spec <- linear_reg()\nlm_fit <- fit(lm_spec, ridership ~ ., data = Chicago)\nlm_fit\n#> parsnip model object\n#> \n#> \n#> Call:\n#> stats::lm(formula = ridership ~ ., data = data)\n#> \n#> Coefficients:\n#> (Intercept) Clark_Lake Austin Harlem \n#> 1.6778 0.9035 0.6123 -0.5550\n```\n:::\n\n\nThe best way to retrieve the fitted parameters is to use the `tidy()` method. This function, in the broom package, returns the coefficients and their associated statistics in a data frame with standardized column names: \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-tidy_42d5030dba13291edb12d26473a41b42'}\n\n```{.r .cell-code}\ntidy(lm_fit)\n#> # A tibble: 4 × 5\n#> term estimate std.error statistic p.value\n#> \n#> 1 (Intercept) 1.68 0.156 10.7 1.11e- 26\n#> 2 Clark_Lake 0.904 0.0280 32.3 5.14e-210\n#> 3 Austin 0.612 0.320 1.91 5.59e- 2\n#> 4 Harlem -0.555 0.165 -3.36 7.85e- 4\n```\n:::\n\n\nWe'll use this function in subsequent sections. \n\n### Resampled or tuned models\n\nThe tidymodels framework emphasizes the use of resampling methods to evaluate and characterize how well a model works. While time series resampling methods are appropriate for these data, we can also use the [bootstrap](https://www.tmwr.org/resampling.html#bootstrap) to resample the data. This is a standard resampling approach when evaluating the uncertainty in statistical estimates. \n\nWe'll use five bootstrap resamples of the data to simplify the plots and output (normally, we would use a larger number of resamples for more reliable estimates).\n\n\n::: {.cell layout-align=\"center\" hash='cache/bootstraps_d10f30a0e2c72506abfc4dbed0505479'}\n\n```{.r .cell-code}\nset.seed(123)\nbt <- bootstraps(Chicago, times = 5)\n```\n:::\n\n\nWith resampling, we fit the same model to the different simulated versions of the data set produced by resampling. The tidymodels function [`fit_resamples()`](https://www.tmwr.org/resampling.html#resampling-performance) is the recommended approach for doing so. \n\n::: {.callout-warning}\n The `fit_resamples()` function does not automatically save the model objects for each resample since these can be quite large and its main purpose is estimating performance. However, we can pass a function to `fit_resamples()` that _can_ save the model object or any other aspect of the fit. \n:::\n\nThis function takes a single argument that represents the fitted [workflow object](https://www.tmwr.org/workflows.html) (even if you don't give `fit_resamples()` a workflow).\n\nFrom this, we can extract the model fit. There are two \"levels\" of model objects that are available: \n\n* The parsnip model object, which wraps the underlying model object. We retrieve this using the `extract_fit_parsnip()` function. \n\n* The underlying model object (a.k.a. the engine fit) via the `extract_fit_engine()`. \n\nWe'll use the latter option and then tidy this model object as we did in the previous section. Let's add this to the control function so that we can re-use it. \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-ctrl_46a72a356a6c211536207a8a329dabfe'}\n\n```{.r .cell-code}\nget_lm_coefs <- function(x) {\n x %>% \n # get the lm model object\n extract_fit_engine() %>% \n # transform its format\n tidy()\n}\ntidy_ctrl <- control_grid(extract = get_lm_coefs)\n```\n:::\n\n\nThis argument is then passed to `fit_resamples()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-resampled_b16c936385c25cc64817fecdd21c2f07'}\n\n```{.r .cell-code}\nlm_res <- \n lm_spec %>% \n fit_resamples(ridership ~ ., resamples = bt, control = tidy_ctrl)\nlm_res\n#> # Resampling results\n#> # Bootstrap sampling \n#> # A tibble: 5 × 5\n#> splits id .metrics .notes .extracts\n#> \n#> 1 Bootstrap1 \n#> 2 Bootstrap2 \n#> 3 Bootstrap3 \n#> 4 Bootstrap4 \n#> 5 Bootstrap5 \n```\n:::\n\n\nNote that there is a `.extracts` column in our resampling results. This object contains the output of our `get_lm_coefs()` function for each resample. The structure of the elements of this column is a little complex. Let's start by looking at the first element (which corresponds to the first resample): \n\n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-extract-ex_3c8ff2e6eeff483ad88d6cbf258ca1ed'}\n\n```{.r .cell-code}\nlm_res$.extracts[[1]]\n#> # A tibble: 1 × 2\n#> .extracts .config \n#> \n#> 1 Preprocessor1_Model1\n```\n:::\n\n\nThere is _another_ column in this element called `.extracts` that has the results of the `tidy()` function call: \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-extract-again_6cfd6b1b2a7860f481fd57fe06485401'}\n\n```{.r .cell-code}\nlm_res$.extracts[[1]]$.extracts[[1]]\n#> # A tibble: 4 × 5\n#> term estimate std.error statistic p.value\n#> \n#> 1 (Intercept) 1.40 0.157 8.90 7.23e- 19\n#> 2 Clark_Lake 0.842 0.0280 30.1 2.39e-184\n#> 3 Austin 1.46 0.320 4.54 5.70e- 6\n#> 4 Harlem -0.637 0.163 -3.92 9.01e- 5\n```\n:::\n\n\nThese nested columns can be flattened via the purrr `unnest()` function: \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-extract-almost_1821d1a99b106428f7194b6ad34364d5'}\n\n```{.r .cell-code}\nlm_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) \n#> # A tibble: 5 × 3\n#> id .extracts .config \n#> \n#> 1 Bootstrap1 Preprocessor1_Model1\n#> 2 Bootstrap2 Preprocessor1_Model1\n#> 3 Bootstrap3 Preprocessor1_Model1\n#> 4 Bootstrap4 Preprocessor1_Model1\n#> 5 Bootstrap5 Preprocessor1_Model1\n```\n:::\n\n\nWe still have a column of nested tibbles, so we can run the same command again to get the data into a more useful format: \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-extract-final_06a6ce3f205fce04bda269c0e6666579'}\n\n```{.r .cell-code}\nlm_coefs <- \n lm_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n unnest(.extracts)\n\nlm_coefs %>% select(id, term, estimate, p.value)\n#> # A tibble: 20 × 4\n#> id term estimate p.value\n#> \n#> 1 Bootstrap1 (Intercept) 1.40 7.23e- 19\n#> 2 Bootstrap1 Clark_Lake 0.842 2.39e-184\n#> 3 Bootstrap1 Austin 1.46 5.70e- 6\n#> 4 Bootstrap1 Harlem -0.637 9.01e- 5\n#> 5 Bootstrap2 (Intercept) 1.69 2.87e- 28\n#> 6 Bootstrap2 Clark_Lake 0.911 1.06e-219\n#> 7 Bootstrap2 Austin 0.595 5.93e- 2\n#> 8 Bootstrap2 Harlem -0.580 3.88e- 4\n#> 9 Bootstrap3 (Intercept) 1.27 3.43e- 16\n#> 10 Bootstrap3 Clark_Lake 0.859 5.03e-194\n#> 11 Bootstrap3 Austin 1.09 6.77e- 4\n#> 12 Bootstrap3 Harlem -0.470 4.34e- 3\n#> 13 Bootstrap4 (Intercept) 1.95 2.91e- 34\n#> 14 Bootstrap4 Clark_Lake 0.974 1.47e-233\n#> 15 Bootstrap4 Austin -0.116 7.21e- 1\n#> 16 Bootstrap4 Harlem -0.620 2.11e- 4\n#> 17 Bootstrap5 (Intercept) 1.87 1.98e- 33\n#> 18 Bootstrap5 Clark_Lake 0.901 1.16e-210\n#> 19 Bootstrap5 Austin 0.494 1.15e- 1\n#> 20 Bootstrap5 Harlem -0.512 1.73e- 3\n```\n:::\n\n\nThat's better! Now, let's plot the model coefficients for each resample: \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-plot_2c303e3adc2f6dd3046a5360142aa7a6'}\n\n```{.r .cell-code}\nlm_coefs %>%\n filter(term != \"(Intercept)\") %>% \n ggplot(aes(x = term, y = estimate, group = id, col = id)) + \n geom_hline(yintercept = 0, lty = 3) + \n geom_line(alpha = 0.3, lwd = 1.2) + \n labs(y = \"Coefficient\", x = NULL) +\n theme(legend.position = \"top\")\n```\n\n::: {.cell-output-display}\n![](figs/lm-plot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThere seems to be a lot of uncertainty in the coefficient for the Austin station data, but less for the other two. \n\nLooking at the code for unnesting the results, you may find the double-nesting structure excessive or cumbersome. However, the extraction functionality is flexible, and a simpler structure would prevent many use cases. \n\n## More complex: a glmnet model\n\nThe glmnet model can fit the same linear regression model structure shown above. It uses regularization (a.k.a penalization) to estimate the model parameters. This has the benefit of shrinking the coefficients towards zero, important in situations where there are strong correlations between predictors or if some feature selection is required. Both of these cases are true for our Chicago train data set. \n\nThere are two types of penalization that this model uses: \n\n* Lasso (a.k.a. $L_1$) penalties can shrink the model terms so much that they are absolute zero (i.e. their effect is entirely removed from the model). \n\n* Weight decay (a.k.a ridge regression or $L_2$) uses a different type of penalty that is most useful for highly correlated predictors. \n\nThe glmnet model has two primary tuning parameters, the total amount of penalization and the mixture of the two penalty types. For example, this specification:\n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-spec_9bc30453f801ce14ae951adc06b15382'}\n\n```{.r .cell-code}\nglmnet_spec <- \n linear_reg(penalty = 0.1, mixture = 0.95) %>% \n set_engine(\"glmnet\")\n```\n:::\n\n\nhas a penalty that is 95% lasso and 5% weight decay. The total amount of these two penalties is 0.1 (which is fairly high). \n\n::: {.callout-note}\nModels with regularization require that predictors are all on the same scale. The ridership at our three stations are very different, but glmnet [automatically centers and scales the data](https://parsnip.tidymodels.org/reference/details_linear_reg_glmnet.html). You can use recipes to [center and scale your data yourself](https://recipes.tidymodels.org/reference/step_normalize.html). \n:::\n\nLet's combine the model specification with a formula in a model `workflow()` and then fit the model to the data:\n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-wflow_28ae9506b04e31712317af13c32fc492'}\n\n```{.r .cell-code}\nglmnet_wflow <- \n workflow() %>% \n add_model(glmnet_spec) %>% \n add_formula(ridership ~ .)\n\nglmnet_fit <- fit(glmnet_wflow, Chicago)\nglmnet_fit\n#> ══ Workflow [trained] ════════════════════════════════════════════════\n#> Preprocessor: Formula\n#> Model: linear_reg()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> ridership ~ .\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> \n#> Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = \"gaussian\", alpha = ~0.95) \n#> \n#> Df %Dev Lambda\n#> 1 0 0.00 6.1040\n#> 2 1 12.75 5.5620\n#> 3 1 23.45 5.0680\n#> 4 1 32.43 4.6180\n#> 5 1 39.95 4.2070\n#> 6 1 46.25 3.8340\n#> 7 1 51.53 3.4930\n#> 8 1 55.94 3.1830\n#> 9 1 59.62 2.9000\n#> 10 1 62.70 2.6420\n#> 11 2 65.28 2.4080\n#> 12 2 67.44 2.1940\n#> 13 2 69.23 1.9990\n#> 14 2 70.72 1.8210\n#> 15 2 71.96 1.6600\n#> 16 2 73.00 1.5120\n#> 17 2 73.86 1.3780\n#> 18 2 74.57 1.2550\n#> 19 2 75.17 1.1440\n#> 20 2 75.66 1.0420\n#> 21 2 76.07 0.9496\n#> 22 2 76.42 0.8653\n#> 23 2 76.70 0.7884\n#> 24 2 76.94 0.7184\n#> 25 2 77.13 0.6545\n#> 26 2 77.30 0.5964\n#> 27 2 77.43 0.5434\n#> 28 2 77.55 0.4951\n#> 29 2 77.64 0.4512\n#> 30 2 77.72 0.4111\n#> 31 2 77.78 0.3746\n#> 32 2 77.84 0.3413\n#> 33 2 77.88 0.3110\n#> 34 2 77.92 0.2833\n#> 35 2 77.95 0.2582\n#> 36 2 77.98 0.2352\n#> 37 2 78.00 0.2143\n#> 38 2 78.01 0.1953\n#> 39 2 78.03 0.1779\n#> 40 2 78.04 0.1621\n#> 41 2 78.05 0.1477\n#> 42 2 78.06 0.1346\n#> 43 2 78.07 0.1226\n#> 44 2 78.07 0.1118\n#> 45 2 78.08 0.1018\n#> 46 2 78.08 0.0928\n#> \n#> ...\n#> and 9 more lines.\n```\n:::\n\n\nIn this output, the term `lambda` is used to represent the penalty. \n\nNote that the output shows many values of the penalty despite our specification of `penalty = 0.1`. It turns out that this model fits a \"path\" of penalty values. Even though we are interested in a value of 0.1, we can get the model coefficients for many associated values of the penalty from the same model object. \n\nLet's look at two different approaches to obtaining the coefficients. Both will use the `tidy()` method. One will tidy a glmnet object and the other will tidy a tidymodels object. \n\n### Using glmnet penalty values\n\nThis glmnet fit contains multiple penalty values which depend on the data set; changing the data (or the mixture amount) often produces a different set of values. For this data set, there are 55 penalties available. To get the set of penalties produced for this data set, we can extract the engine fit and tidy: \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-tidy_05bd3dbb5c7d515388ba9419ca8efc13'}\n\n```{.r .cell-code}\nglmnet_fit %>% \n extract_fit_engine() %>% \n tidy() %>% \n rename(penalty = lambda) %>% # <- for consistent naming\n filter(term != \"(Intercept)\")\n#> # A tibble: 99 × 5\n#> term step estimate penalty dev.ratio\n#> \n#> 1 Clark_Lake 2 0.0753 5.56 0.127\n#> 2 Clark_Lake 3 0.145 5.07 0.234\n#> 3 Clark_Lake 4 0.208 4.62 0.324\n#> 4 Clark_Lake 5 0.266 4.21 0.400\n#> 5 Clark_Lake 6 0.319 3.83 0.463\n#> 6 Clark_Lake 7 0.368 3.49 0.515\n#> 7 Clark_Lake 8 0.413 3.18 0.559\n#> 8 Clark_Lake 9 0.454 2.90 0.596\n#> 9 Clark_Lake 10 0.491 2.64 0.627\n#> 10 Clark_Lake 11 0.526 2.41 0.653\n#> # ℹ 89 more rows\n```\n:::\n\n\nThis works well but, it turns out that our penalty value (0.1) is not in the list produced by the model! The underlying package has functions that use interpolation to produce coefficients for this specific value, but the `tidy()` method for glmnet objects does not use it. \n\n### Using specific penalty values\n\nIf we run the `tidy()` method on the workflow or parsnip object, a different function is used that returns the coefficients for the penalty value that we specified: \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-tidy-parsnip_d049f1afcd038ce9abf1afbf8f90957b'}\n\n```{.r .cell-code}\ntidy(glmnet_fit)\n#> # A tibble: 4 × 3\n#> term estimate penalty\n#> \n#> 1 (Intercept) 1.69 0.1\n#> 2 Clark_Lake 0.846 0.1\n#> 3 Austin 0.271 0.1\n#> 4 Harlem 0 0.1\n```\n:::\n\n\nFor any another (single) penalty, we can use an additional argument:\n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-tidy-parsnip-alt_9d44dd6fb9c558ff47086ce935f6ab75'}\n\n```{.r .cell-code}\ntidy(glmnet_fit, penalty = 5.5620) # A value from above\n#> # A tibble: 4 × 3\n#> term estimate penalty\n#> \n#> 1 (Intercept) 12.6 5.56\n#> 2 Clark_Lake 0.0753 5.56\n#> 3 Austin 0 5.56\n#> 4 Harlem 0 5.56\n```\n:::\n\n\nThe reason for having two `tidy()` methods is that, with tidymodels, the focus is on using a specific penalty value. \n\n\n### Tuning a glmnet model\n\nIf we know a priori acceptable values for penalty and mixture, we can use the `fit_resamples()` function as we did before with linear regression. Otherwise, we can tune those parameters with the tidymodels `tune_*()` functions. \n\nLet's tune our glmnet model over both parameters with this grid: \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-grid_82eded1ed9042d072754a1e6b1f63e07'}\n\n```{.r .cell-code}\npen_vals <- 10^seq(-3, 0, length.out = 10)\ngrid <- crossing(penalty = pen_vals, mixture = c(0.1, 1.0))\n```\n:::\n\n\nHere is where more glmnet-related complexity comes in: we know that each resample and each value of `mixture` will probably produce a different set of penalty values contained in the model object. _How can we look at the coefficients at the specific penalty values that we are using to tune?_\n\nThe approach that we suggest is to use the special `path_values` option for glmnet. Details are described in the [technical documentation about glmnet and tidymodels](https://parsnip.tidymodels.org/reference/glmnet-details.html#arguments) but in short, this parameter will assign the collection of penalty values used by each glmnet fit (regardless of the data or value of mixture). \n\nWe can pass these as an engine argument and then update our previous workflow object:\n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-tune_c3d440d17e9fbbe4a453c338f995621a'}\n\n```{.r .cell-code}\nglmnet_tune_spec <- \n linear_reg(penalty = tune(), mixture = tune()) %>% \n set_engine(\"glmnet\", path_values = pen_vals)\n\nglmnet_wflow <- \n glmnet_wflow %>% \n update_model(glmnet_tune_spec)\n```\n:::\n\n\nNow we will use an extraction function similar to when we used ordinary least squares. We add an additional argument to retain coefficients that are shrunk to zero by the lasso penalty: \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-tuning_7df34aa8bf325e4213ccd9670bfec87e'}\n\n```{.r .cell-code}\nget_glmnet_coefs <- function(x) {\n x %>% \n extract_fit_engine() %>% \n tidy(return_zeros = TRUE) %>% \n rename(penalty = lambda)\n}\nparsnip_ctrl <- control_grid(extract = get_glmnet_coefs)\n\nglmnet_res <- \n glmnet_wflow %>% \n tune_grid(\n resamples = bt,\n grid = grid,\n control = parsnip_ctrl\n )\nglmnet_res\n#> # Tuning results\n#> # Bootstrap sampling \n#> # A tibble: 5 × 5\n#> splits id .metrics .notes .extracts\n#> \n#> 1 Bootstrap1 \n#> 2 Bootstrap2 \n#> 3 Bootstrap3 \n#> 4 Bootstrap4 \n#> 5 Bootstrap5 \n```\n:::\n\n\nAs noted before, the elements of the main `.extracts` column have an embedded list column with the results of `get_glmnet_coefs()`: \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-extract-single_2c5149b5c7a6bde62e52e3556127134f'}\n\n```{.r .cell-code}\nglmnet_res$.extracts[[1]] %>% head()\n#> # A tibble: 6 × 4\n#> penalty mixture .extracts .config \n#> \n#> 1 1 0.1 Preprocessor1_Model01\n#> 2 1 0.1 Preprocessor1_Model02\n#> 3 1 0.1 Preprocessor1_Model03\n#> 4 1 0.1 Preprocessor1_Model04\n#> 5 1 0.1 Preprocessor1_Model05\n#> 6 1 0.1 Preprocessor1_Model06\n\nglmnet_res$.extracts[[1]]$.extracts[[1]] %>% head()\n#> # A tibble: 6 × 5\n#> term step estimate penalty dev.ratio\n#> \n#> 1 (Intercept) 1 0.568 1 0.769\n#> 2 (Intercept) 2 0.432 0.464 0.775\n#> 3 (Intercept) 3 0.607 0.215 0.779\n#> 4 (Intercept) 4 0.846 0.1 0.781\n#> 5 (Intercept) 5 1.06 0.0464 0.782\n#> 6 (Intercept) 6 1.22 0.0215 0.783\n```\n:::\n\n\nAs before, we'll have to use a double `unnest()`. Since the penalty value is in both the top-level and lower-level `.extracts`, we'll use `select()` to get rid of the first version (but keep `mixture`):\n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-extract-1_ef92df5ddd31141dbe968ce22e65f4df'}\n\n```{.r .cell-code}\nglmnet_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n select(id, mixture, .extracts) %>% # <- removes the first penalty column\n unnest(.extracts)\n```\n:::\n\n\nBut wait! We know that each glmnet fit contains all of the coefficients. This means, for a specific resample and value of `mixture`, the results are the same: \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-extract-dups_f08a739e7c0ca19ceffa7c9209dd09b4'}\n\n```{.r .cell-code}\nall.equal(\n # First bootstrap, first `mixture`, first `penalty`\n glmnet_res$.extracts[[1]]$.extracts[[1]],\n # First bootstrap, first `mixture`, second `penalty`\n glmnet_res$.extracts[[1]]$.extracts[[2]]\n)\n#> [1] TRUE\n```\n:::\n\n\nFor this reason, we'll add a `slice(1)` when grouping by `id` and `mixture`. This will get rid of the replicated results. \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-extract-final_3d4b5e7e5454083a1ab18477970bfdd1'}\n\n```{.r .cell-code}\nglmnet_coefs <- \n glmnet_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n select(id, mixture, .extracts) %>% \n group_by(id, mixture) %>% # ┐\n slice(1) %>% # │ Remove the redundant results\n ungroup() %>% # ┘\n unnest(.extracts)\n\nglmnet_coefs %>% \n select(id, penalty, mixture, term, estimate) %>% \n filter(term != \"(Intercept)\")\n#> # A tibble: 300 × 5\n#> id penalty mixture term estimate\n#> \n#> 1 Bootstrap1 1 0.1 Clark_Lake 0.391\n#> 2 Bootstrap1 0.464 0.1 Clark_Lake 0.485\n#> 3 Bootstrap1 0.215 0.1 Clark_Lake 0.590\n#> 4 Bootstrap1 0.1 0.1 Clark_Lake 0.680\n#> 5 Bootstrap1 0.0464 0.1 Clark_Lake 0.746\n#> 6 Bootstrap1 0.0215 0.1 Clark_Lake 0.793\n#> 7 Bootstrap1 0.01 0.1 Clark_Lake 0.817\n#> 8 Bootstrap1 0.00464 0.1 Clark_Lake 0.828\n#> 9 Bootstrap1 0.00215 0.1 Clark_Lake 0.834\n#> 10 Bootstrap1 0.001 0.1 Clark_Lake 0.837\n#> # ℹ 290 more rows\n```\n:::\n\n\nNow we have the coefficients. Let's look at how they behave as more regularization is used: \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmnet-plot_447f6477762ae31ebd27ef7db9ebff5e'}\n\n```{.r .cell-code}\nglmnet_coefs %>% \n filter(term != \"(Intercept)\") %>% \n mutate(mixture = format(mixture)) %>% \n ggplot(aes(x = penalty, y = estimate, col = mixture, groups = id)) + \n geom_hline(yintercept = 0, lty = 3) +\n geom_line(alpha = 0.5, lwd = 1.2) + \n facet_wrap(~ term) + \n scale_x_log10() +\n scale_color_brewer(palette = \"Accent\") +\n labs(y = \"coefficient\") +\n theme(legend.position = \"top\")\n```\n\n::: {.cell-output-display}\n![](figs/glmnet-plot-1.svg){fig-align='center' width=816}\n:::\n:::\n\n\nNotice a couple of things: \n\n* With a pure lasso model (i.e., `mixture = 1`), the Austin station predictor is selected out in each resample. With a mixture of both penalties, its influence increases. Also, as the penalty increases, the uncertainty in this coefficient decreases. \n\n* The Harlem predictor is either quickly selected out of the model or goes from negative to positive. \n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> glmnet * 4.1-7 2023-03-23 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/models/parsnip-nnet/index/execute-results/html.json b/_freeze/learn/models/parsnip-nnet/index/execute-results/html.json new file mode 100644 index 00000000..b00d5ebf --- /dev/null +++ b/_freeze/learn/models/parsnip-nnet/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "13b13e7b7da8957a9990bc6a65a0d470", + "result": { + "markdown": "---\ntitle: \"Classification models using a neural network\"\ncategories:\n - model fitting\n - torch\n - neural networks\ntype: learn-subsection\nweight: 2\ndescription: | \n Train a classification model and evaluate its performance.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: AppliedPredictiveModeling, brulee, and tidymodels. You will also need the python torch library installed (see `?torch::install_torch()`).\n\nWe can create classification models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict categorical quantities or class labels. Here, let's fit a single classification model using a neural network and evaluate using a validation set. While the [tune](https://tune.tidymodels.org/) package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage. \n\n## Fitting a neural network\n\n\nLet's fit a model to a small, two predictor classification data set. The data are in the modeldata package (part of tidymodels) and have been split into training, validation, and test data sets. In this analysis, the test set is left untouched; this article tries to emulate a good data usage methodology where the test set would only be evaluated once at the end after a variety of models have been considered. \n\n\n\n::: {.cell layout-align=\"center\" hash='cache/biv--split_a52be8694e1630c82c8a997715818d2f'}\n\n```{.r .cell-code}\nlibrary(AppliedPredictiveModeling)\n\nset.seed(321)\ncls_train <- quadBoundaryFunc(2000) %>% select(A = X1, B = X2, class)\ncls_val <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)\ncls_test <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)\n```\n:::\n\n\nA plot of the data shows two right-skewed predictors: \n\n\n::: {.cell layout-align=\"center\" hash='cache/biv-plot_01f6128bd0ca79ee6725e5aa44a43ca8'}\n\n```{.r .cell-code}\nggplot(cls_train, aes(x = A, y = B, col = class)) + \n geom_point(alpha = 1 / 4, cex = 3) + \n coord_fixed()\n```\n\n::: {.cell-output-display}\n![](figs/biv-plot-1.svg){fig-align='center' width=576}\n:::\n:::\n\n\nLet's use a single hidden layer neural network to predict the outcome. To do this, we transform the predictor columns to be more symmetric (via the `step_BoxCox()` function) and on a common scale (using `step_normalize()`). We can use [recipes](https://recipes.tidymodels.org/) to do so:\n\n\n::: {.cell layout-align=\"center\" hash='cache/biv--proc_ffa29913458a770549c19a00ec0100e9'}\n\n```{.r .cell-code}\nbiv_rec <- \n recipe(class ~ ., data = cls_train) %>%\n step_normalize(all_predictors())\n```\n:::\n\n\nThis recipe is not directly executed; the steps will be estimated when the model is fit. \n\nWe can use the brulee package to fit a model with 5 hidden units and a 10% dropout rate, to regularize the model:\n\n\n::: {.cell layout-align=\"center\" hash='cache/biv-nnet_bb4c7ebfa6b8b8a735ac07f1e5db3d0c'}\n\n```{.r .cell-code}\nnnet_spec <- \n mlp(epochs = 1000, hidden_units = 10, penalty = 0.01, learn_rate = 0.1) %>% \n set_engine(\"brulee\", validation = 0) %>% \n set_mode(\"classification\")\n\nnnet_wflow <- \n biv_rec %>% \n workflow(nnet_spec)\n\nset.seed(987)\nnnet_fit <- fit(nnet_wflow, cls_train)\nnnet_fit %>% extract_fit_engine()\n#> Multilayer perceptron\n#> \n#> relu activation\n#> 10 hidden units, 52 model parameters\n#> 2,000 samples, 2 features, 2 classes \n#> class weights Class1=1, Class2=1 \n#> weight decay: 0.01 \n#> dropout proportion: 0 \n#> batch size: 2000 \n#> learn rate: 0.1 \n#> training set loss after 1000 epochs: 0.375\n```\n:::\n\n\n## Model performance\n\nIn parsnip, the `predict()` function can be used to characterize performance on the validation set. Since parsnip always produces tibble outputs, these can just be column bound to the original data: \n\n\n::: {.cell layout-align=\"center\" hash='cache/biv--perf_790f24a48a151216e707b8de11042b11'}\n\n```{.r .cell-code}\nval_results <- \n cls_val %>%\n bind_cols(\n predict(nnet_fit, new_data = cls_val),\n predict(nnet_fit, new_data = cls_val, type = \"prob\")\n )\nval_results %>% slice(1:5)\n#> A B class .pred_class .pred_Class1 .pred_Class2\n#> 1 0.7632082 -0.04012164 Class2 Class2 0.06255509 0.93744493\n#> 2 0.9823745 -0.16911637 Class2 Class2 0.05721300 0.94278705\n#> 3 1.0558147 0.52817699 Class2 Class2 0.10368267 0.89631736\n#> 4 1.2424507 1.10902951 Class2 Class2 0.34966809 0.65033191\n#> 5 1.5889815 2.71047720 Class1 Class1 0.97951710 0.02048291\n\nval_results %>% roc_auc(truth = class, .pred_Class1)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 roc_auc binary 0.957\n\nval_results %>% accuracy(truth = class, .pred_class)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 accuracy binary 0.91\n\nval_results %>% conf_mat(truth = class, .pred_class)\n#> Truth\n#> Prediction Class1 Class2\n#> Class1 175 18\n#> Class2 27 280\n```\n:::\n\n\nLet's also create a grid to get a visual sense of the class boundary for the test set.\n\n\n::: {.cell layout-align=\"center\" hash='cache/biv-boundary_649fab72ec1b36da93d0add4a6fbd0b8'}\n\n```{.r .cell-code}\na_rng <- range(cls_train$A)\nb_rng <- range(cls_train$B)\nx_grid <-\n expand.grid(A = seq(a_rng[1], a_rng[2], length.out = 100),\n B = seq(b_rng[1], b_rng[2], length.out = 100))\n\n\n# Make predictions using the transformed predictors but \n# attach them to the predictors in the original units: \nx_grid <- \n x_grid %>% \n bind_cols(predict(nnet_fit, x_grid, type = \"prob\"))\n\nggplot(x_grid, aes(x = A, y = B)) + \n geom_point(data = cls_test, aes(col = class), alpha = 1 / 2, cex = 3) +\n geom_contour(aes(z = .pred_Class1), breaks = .5, col = \"black\", linewidth = 1) + \n coord_fixed()\n```\n\n::: {.cell-output-display}\n![](figs/biv-boundary-1.svg){fig-align='center' width=576}\n:::\n:::\n\n\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> AppliedPredictiveModeling * 1.1-7 2018-05-22 [1] CRAN (R 4.3.0)\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> brulee 0.2.0 2022-09-19 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/models/parsnip-ranger-glmnet/index/execute-results/html.json b/_freeze/learn/models/parsnip-ranger-glmnet/index/execute-results/html.json new file mode 100644 index 00000000..aacbb61c --- /dev/null +++ b/_freeze/learn/models/parsnip-ranger-glmnet/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "f8ffb2111c7d543c8cd791224515ad25", + "result": { + "markdown": "---\ntitle: \"Regression models two ways\"\ncategories:\n - model fitting\n - random forests\n - linear regression\ntype: learn-subsection\nweight: 1\ndescription: | \n Create and train different kinds of regression models with different computational engines.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: glmnet, randomForest, ranger, and tidymodels.\n\nWe can create regression models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict continuous or numeric quantities. Here, let's first fit a random forest model, which does _not_ require all numeric input (see discussion [here](https://bookdown.org/max/FES/categorical-trees.html)) and discuss how to use `fit()` and `fit_xy()`, as well as _data descriptors_. \n\nSecond, let's fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip. \n\n## The Ames housing data\n\nWe'll use the Ames housing data set to demonstrate how to create regression models using parsnip. First, set up the data set and create a simple training/test set split:\n\n\n::: {.cell layout-align=\"center\" hash='cache/ames-split_18b5bf0134171b332b56ced5fc3b1911'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\n\ndata(ames)\n\nset.seed(4595)\ndata_split <- initial_split(ames, strata = \"Sale_Price\", prop = 0.75)\n\names_train <- training(data_split)\names_test <- testing(data_split)\n```\n:::\n\n\nThe use of the test set here is _only for illustration_; normally in a data analysis these data would be saved to the very end after many models have been evaluated. \n\n## Random forest\n\nWe'll start by fitting a random forest model to a small set of parameters. Let's create a model with the predictors `Longitude`, `Latitude`, `Lot_Area`, `Neighborhood`, and `Year_Sold`. A simple random forest model can be specified via:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-basic_02332292464935db3e80ae3984b88bfe'}\n\n```{.r .cell-code}\nrf_defaults <- rand_forest(mode = \"regression\")\nrf_defaults\n#> Random Forest Model Specification (regression)\n#> \n#> Computational engine: ranger\n```\n:::\n\n\nThe model will be fit with the ranger package by default. Since we didn't add any extra arguments to `fit`, _many_ of the arguments will be set to their defaults from the function `ranger::ranger()`. The help pages for the model function describe the default parameters and you can also use the `translate()` function to check out such details. \n\nThe parsnip package provides two different interfaces to fit a model: \n\n- the formula interface (`fit()`), and\n- the non-formula interface (`fit_xy()`).\n\nLet's start with the non-formula interface:\n\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-basic-xy_b14276d12ae7cf9ce85ecf44ee6a9bb4'}\n\n```{.r .cell-code}\npreds <- c(\"Longitude\", \"Latitude\", \"Lot_Area\", \"Neighborhood\", \"Year_Sold\")\n\nrf_xy_fit <- \n rf_defaults %>%\n set_engine(\"ranger\") %>%\n fit_xy(\n x = ames_train[, preds],\n y = log10(ames_train$Sale_Price)\n )\n\nrf_xy_fit\n#> parsnip model object\n#> \n#> Ranger result\n#> \n#> Call:\n#> ranger::ranger(x = maybe_data_frame(x), y = y, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) \n#> \n#> Type: Regression \n#> Number of trees: 500 \n#> Sample size: 2197 \n#> Number of independent variables: 5 \n#> Mtry: 2 \n#> Target node size: 5 \n#> Variable importance mode: none \n#> Splitrule: variance \n#> OOB prediction error (MSE): 0.008500188 \n#> R squared (OOB): 0.7239116\n```\n:::\n\n\nThe non-formula interface doesn't do anything to the predictors before passing them to the underlying model function. This particular model does _not_ require indicator variables (sometimes called \"dummy variables\") to be created prior to fitting the model. Note that the output shows \"Number of independent variables: 5\".\n\nFor regression models, we can use the basic `predict()` method, which returns a tibble with a column named `.pred`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-basic-xy-pred_6e4354a6451e46bc8faa718908d9a912'}\n\n```{.r .cell-code}\ntest_results <- \n ames_test %>%\n select(Sale_Price) %>%\n mutate(Sale_Price = log10(Sale_Price)) %>%\n bind_cols(\n predict(rf_xy_fit, new_data = ames_test[, preds])\n )\ntest_results %>% slice(1:5)\n#> # A tibble: 5 × 2\n#> Sale_Price .pred\n#> \n#> 1 5.39 5.25\n#> 2 5.28 5.29\n#> 3 5.23 5.26\n#> 4 5.21 5.30\n#> 5 5.60 5.51\n\n# summarize performance\ntest_results %>% metrics(truth = Sale_Price, estimate = .pred) \n#> # A tibble: 3 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 rmse standard 0.0945\n#> 2 rsq standard 0.733 \n#> 3 mae standard 0.0629\n```\n:::\n\n\nNote that: \n\n * If the model required indicator variables, we would have to create them manually prior to using `fit()` (perhaps using the recipes package).\n * We had to manually log the outcome prior to modeling. \n\nNow, for illustration, let's use the formula method using some new parameter values:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-basic-form_1e1aadae8c7bde9360f11e8062781218'}\n\n```{.r .cell-code}\nrand_forest(mode = \"regression\", mtry = 3, trees = 1000) %>%\n set_engine(\"ranger\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n#> parsnip model object\n#> \n#> Ranger result\n#> \n#> Call:\n#> ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~3, x), num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) \n#> \n#> Type: Regression \n#> Number of trees: 1000 \n#> Sample size: 2197 \n#> Number of independent variables: 5 \n#> Mtry: 3 \n#> Target node size: 5 \n#> Variable importance mode: none \n#> Splitrule: variance \n#> OOB prediction error (MSE): 0.008402569 \n#> R squared (OOB): 0.7270823\n```\n:::\n\n \nSuppose that we would like to use the randomForest package instead of ranger. To do so, the only part of the syntax that needs to change is the `set_engine()` argument:\n\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-rf_7f5cc80f129451dce218438d3e2b5856'}\n\n```{.r .cell-code}\nrand_forest(mode = \"regression\", mtry = 3, trees = 1000) %>%\n set_engine(\"randomForest\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n#> parsnip model object\n#> \n#> \n#> Call:\n#> randomForest(x = maybe_data_frame(x), y = y, ntree = ~1000, mtry = min_cols(~3, x)) \n#> Type of random forest: regression\n#> Number of trees: 1000\n#> No. of variables tried at each split: 3\n#> \n#> Mean of squared residuals: 0.008472074\n#> % Var explained: 72.47\n```\n:::\n\n\nLook at the formula code that was printed out; one function uses the argument name `ntree` and the other uses `num.trees`. The parsnip models don't require you to know the specific names of the main arguments. \n\nNow suppose that we want to modify the value of `mtry` based on the number of predictors in the data. Usually, a good default value is `floor(sqrt(num_predictors))` but a pure bagging model requires an `mtry` value equal to the total number of parameters. There may be cases where you may not know how many predictors are going to be present when the model will be fit (perhaps due to the generation of indicator variables or a variable filter) so this might be difficult to know exactly ahead of time when you write your code. \n\nWhen the model it being fit by parsnip, [_data descriptors_](https://parsnip.tidymodels.org/reference/descriptors.html) are made available. These attempt to let you know what you will have available when the model is fit. When a model object is created (say using `rand_forest()`), the values of the arguments that you give it are _immediately evaluated_ unless you delay them. To delay the evaluation of any argument, you can used `rlang::expr()` to make an expression. \n\nTwo relevant data descriptors for our example model are:\n\n * `.preds()`: the number of predictor _variables_ in the data set that are associated with the predictors **prior to dummy variable creation**.\n * `.cols()`: the number of predictor _columns_ after dummy variables (or other encodings) are created.\n\nSince ranger won't create indicator values, `.preds()` would be appropriate for `mtry` for a bagging model. \n\nFor example, let's use an expression with the `.preds()` descriptor to fit a bagging model: \n\n\n::: {.cell layout-align=\"center\" hash='cache/bagged_2b76f70b641acbdb2616b84443585217'}\n\n```{.r .cell-code}\nrand_forest(mode = \"regression\", mtry = .preds(), trees = 1000) %>%\n set_engine(\"ranger\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n#> parsnip model object\n#> \n#> Ranger result\n#> \n#> Call:\n#> ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~.preds(), x), num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) \n#> \n#> Type: Regression \n#> Number of trees: 1000 \n#> Sample size: 2197 \n#> Number of independent variables: 5 \n#> Mtry: 5 \n#> Target node size: 5 \n#> Variable importance mode: none \n#> Splitrule: variance \n#> OOB prediction error (MSE): 0.00867085 \n#> R squared (OOB): 0.7183685\n```\n:::\n\n\n\n## Regularized regression\n\nA linear model might work for this data set as well. We can use the `linear_reg()` parsnip model. There are two engines that can perform regularization/penalization, the glmnet and sparklyr packages. Let's use the former here. The glmnet package only implements a non-formula method, but parsnip will allow either one to be used. \n\nWhen regularization is used, the predictors should first be centered and scaled before being passed to the model. The formula method won't do that automatically so we will need to do this ourselves. We'll use the [recipes](https://recipes.tidymodels.org/) package for these steps. \n\n\n::: {.cell layout-align=\"center\" hash='cache/glmn-form_a0ca81e5cfdf6601081373c7b271e499'}\n\n```{.r .cell-code}\nnorm_recipe <- \n recipe(\n Sale_Price ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, \n data = ames_train\n ) %>%\n step_other(Neighborhood) %>% \n step_dummy(all_nominal()) %>%\n step_center(all_predictors()) %>%\n step_scale(all_predictors()) %>%\n step_log(Sale_Price, base = 10) %>% \n # estimate the means and standard deviations\n prep(training = ames_train, retain = TRUE)\n\n# Now let's fit the model using the processed version of the data\n\nglmn_fit <- \n linear_reg(penalty = 0.001, mixture = 0.5) %>% \n set_engine(\"glmnet\") %>%\n fit(Sale_Price ~ ., data = bake(norm_recipe, new_data = NULL))\nglmn_fit\n#> parsnip model object\n#> \n#> \n#> Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = \"gaussian\", alpha = ~0.5) \n#> \n#> Df %Dev Lambda\n#> 1 0 0.00 0.138300\n#> 2 1 1.96 0.126000\n#> 3 1 3.72 0.114800\n#> 4 1 5.28 0.104600\n#> 5 2 7.07 0.095320\n#> 6 3 9.64 0.086850\n#> 7 4 12.58 0.079140\n#> 8 5 15.45 0.072110\n#> 9 5 17.93 0.065700\n#> 10 7 20.81 0.059860\n#> 11 7 23.51 0.054550\n#> 12 7 25.82 0.049700\n#> 13 8 28.20 0.045290\n#> 14 8 30.31 0.041260\n#> 15 8 32.12 0.037600\n#> 16 8 33.66 0.034260\n#> 17 8 34.97 0.031210\n#> 18 8 36.08 0.028440\n#> 19 8 37.02 0.025910\n#> 20 9 37.90 0.023610\n#> 21 9 38.65 0.021510\n#> 22 9 39.29 0.019600\n#> 23 9 39.83 0.017860\n#> 24 9 40.28 0.016270\n#> 25 10 40.68 0.014830\n#> 26 11 41.06 0.013510\n#> 27 11 41.38 0.012310\n#> 28 11 41.65 0.011220\n#> 29 11 41.88 0.010220\n#> 30 12 42.09 0.009313\n#> 31 12 42.27 0.008486\n#> 32 12 42.43 0.007732\n#> 33 12 42.56 0.007045\n#> 34 12 42.66 0.006419\n#> 35 12 42.75 0.005849\n#> 36 12 42.83 0.005329\n#> 37 12 42.90 0.004856\n#> 38 12 42.95 0.004424\n#> 39 12 42.99 0.004031\n#> 40 12 43.03 0.003673\n#> 41 12 43.06 0.003347\n#> 42 12 43.09 0.003050\n#> 43 12 43.11 0.002779\n#> 44 12 43.13 0.002532\n#> 45 12 43.15 0.002307\n#> 46 12 43.16 0.002102\n#> 47 12 43.17 0.001915\n#> 48 12 43.18 0.001745\n#> 49 12 43.19 0.001590\n#> 50 12 43.19 0.001449\n#> 51 12 43.20 0.001320\n#> 52 12 43.20 0.001203\n#> 53 12 43.21 0.001096\n#> 54 12 43.21 0.000999\n#> 55 12 43.21 0.000910\n#> 56 12 43.21 0.000829\n#> 57 12 43.22 0.000755\n#> 58 12 43.22 0.000688\n#> 59 12 43.22 0.000627\n#> 60 12 43.22 0.000571\n#> 61 12 43.22 0.000521\n#> 62 12 43.22 0.000474\n#> 63 12 43.22 0.000432\n#> 64 12 43.22 0.000394\n#> 65 12 43.22 0.000359\n```\n:::\n\n\nIf `penalty` were not specified, all of the `lambda` values would be computed. \n\nTo get the predictions for this specific value of `lambda` (aka `penalty`):\n\n\n::: {.cell layout-align=\"center\" hash='cache/glmn-pred_673611c19e448251aeb977fec5788162'}\n\n```{.r .cell-code}\n# First, get the processed version of the test set predictors:\ntest_normalized <- bake(norm_recipe, new_data = ames_test, all_predictors())\n\ntest_results <- \n test_results %>%\n rename(`random forest` = .pred) %>%\n bind_cols(\n predict(glmn_fit, new_data = test_normalized) %>%\n rename(glmnet = .pred)\n )\ntest_results\n#> # A tibble: 733 × 3\n#> Sale_Price `random forest` glmnet\n#> \n#> 1 5.39 5.25 5.16\n#> 2 5.28 5.29 5.27\n#> 3 5.23 5.26 5.24\n#> 4 5.21 5.30 5.24\n#> 5 5.60 5.51 5.24\n#> 6 5.32 5.29 5.26\n#> 7 5.17 5.14 5.18\n#> 8 5.06 5.13 5.17\n#> 9 4.98 5.01 5.18\n#> 10 5.11 5.14 5.19\n#> # ℹ 723 more rows\n\ntest_results %>% metrics(truth = Sale_Price, estimate = glmnet) \n#> # A tibble: 3 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 rmse standard 0.142 \n#> 2 rsq standard 0.391 \n#> 3 mae standard 0.0979\n\ntest_results %>% \n gather(model, prediction, -Sale_Price) %>% \n ggplot(aes(x = prediction, y = Sale_Price)) + \n geom_abline(col = \"green\", lty = 2) + \n geom_point(alpha = .4) + \n facet_wrap(~model) + \n coord_fixed()\n```\n\n::: {.cell-output-display}\n![](figs/glmn-pred-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThis final plot compares the performance of the random forest and regularized regression models.\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> glmnet * 4.1-7 2023-03-23 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> randomForest * 4.7-1.1 2022-05-23 [1] CRAN (R 4.3.0)\n#> ranger * 0.15.1 2023-04-03 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/models/pls/index/execute-results/html.json b/_freeze/learn/models/pls/index/execute-results/html.json new file mode 100644 index 00000000..2ec53e0c --- /dev/null +++ b/_freeze/learn/models/pls/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "ec7e110249609823915e0c48aba0fdfe", + "result": { + "markdown": "---\ntitle: \"Multivariate analysis using partial least squares\"\ncategories:\n - pre-processing\n - multivariate analysis\n - partial least squares\ntype: learn-subsection\nweight: 6\ndescription: | \n Build and fit a predictive model with more than one outcome.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: modeldata, pls, and tidymodels.\n\n\"Multivariate analysis\" usually refers to multiple _outcomes_ being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns `y1` and `y2` representing two outcomes to be predicted. The `lm()` function would look something like:\n\n\n::: {.cell layout-align=\"center\" hash='cache/lm_36d90c1aed59c0679cdfdde16dfcf574'}\n\n```{.r .cell-code}\nlm(cbind(y1, y2) ~ ., data = dat)\n```\n:::\n\n\nThis `cbind()` call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes. \n\nThe data that we'll use has three outcomes. From `?modeldata::meats`:\n\n> \"These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents.\n\n> \"For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is `-log10` of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry.\"\n\nThe goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here. \n\nTo start, let's take the two data matrices (called `endpoints` and `absorp`) and bind them together in a data frame:\n\n\n::: {.cell layout-align=\"center\" hash='cache/data_7962e3e49e5e2bfe6e4626edd3a76067'}\n\n```{.r .cell-code}\nlibrary(modeldata)\ndata(meats)\n```\n:::\n\n\nThe three _outcomes_ have fairly high correlations also. \n\n## Preprocessing the data\n\nIf the outcomes can be predicted using a linear model, partial least squares (PLS) is an ideal method. PLS models the data as a function of a set of unobserved _latent_ variables that are derived in a manner similar to principal component analysis (PCA). \n\nPLS, unlike PCA, also incorporates the outcome data when creating the PLS components. Like PCA, it tries to maximize the variance of the predictors that are explained by the components but it also tries to simultaneously maximize the correlation between those components and the outcomes. In this way, PLS _chases_ variation of the predictors and outcomes. \n\nSince we are working with variances and covariances, we need to standardize the data. The recipe will center and scale all of the variables. \n\nMany base R functions that deal with multivariate outcomes using a formula require the use of `cbind()` on the left-hand side of the formula to work with the traditional formula methods. In tidymodels, recipes do not; the outcomes can be symbolically \"added\" together on the left-hand side:\n\n\n::: {.cell layout-align=\"center\" hash='cache/recipe_cadc0ff0922e4cf47f8b5ff54db7f0fa'}\n\n```{.r .cell-code}\nnorm_rec <- \n recipe(water + fat + protein ~ ., data = meats) %>%\n step_normalize(everything()) \n```\n:::\n\n\nBefore we can finalize the PLS model, the number of PLS components to retain must be determined. This can be done using performance metrics such as the root mean squared error. However, we can also calculate the proportion of variance explained by the components for the _predictors and each of the outcomes_. This allows an informed choice to be made based on the level of evidence that the situation requires. \n\nSince the data set isn't large, let's use resampling to measure these proportions. With ten repeats of 10-fold cross-validation, we build the PLS model on 90% of the data and evaluate on the heldout 10%. For each of the 100 models, we extract and save the proportions. \n\nThe folds can be created using the [rsample](https://rsample.tidymodels.org/) package and the recipe can be estimated for each resample using the [`prepper()`](https://rsample.tidymodels.org/reference/prepper.html) function: \n\n\n::: {.cell layout-align=\"center\" hash='cache/cv_92ea8083c6bb6e1a1890f5a0e59a5a0d'}\n\n```{.r .cell-code}\nset.seed(57343)\nfolds <- vfold_cv(meats, repeats = 10)\n\nfolds <- \n folds %>%\n mutate(recipes = map(splits, prepper, recipe = norm_rec))\n```\n:::\n\n\n## Partial least squares\n\nThe complicated parts for moving forward are:\n\n1. Formatting the predictors and outcomes into the format that the pls package requires, and\n2. Estimating the proportions. \n\nFor the first part, the standardized outcomes and predictors need to be formatted into two separate matrices. Since we used `retain = TRUE` when prepping the recipes, we can `bake()` with `new_data = NULl` to get the processed data back out. To save the data as a matrix, the option `composition = \"matrix\"` will avoid saving the data as tibbles and use the required format. \n\nThe pls package expects a simple formula to specify the model, but each side of the formula should _represent a matrix_. In other words, we need a data set with two columns where each column is a matrix. The secret to doing this is to \"protect\" the two matrices using `I()` when adding them to the data frame.\n\nThe calculation for the proportion of variance explained is straightforward for the predictors; the function `pls::explvar()` will compute that. For the outcomes, the process is more complicated. A ready-made function to compute these is not obvious but there is some code inside of the summary function to do the computation (see below). \n\nThe function `get_var_explained()` shown here will do all these computations and return a data frame with columns `components`, `source` (for the predictors, water, etc), and the `proportion` of variance that is explained by the components. \n\n\n\n::: {.cell layout-align=\"center\" hash='cache/var-explained_2acce73dfd57c3f85ec64c27afb58a06'}\n\n```{.r .cell-code}\nlibrary(pls)\n\nget_var_explained <- function(recipe, ...) {\n \n # Extract the predictors and outcomes into their own matrices\n y_mat <- bake(recipe, new_data = NULL, composition = \"matrix\", all_outcomes())\n x_mat <- bake(recipe, new_data = NULL, composition = \"matrix\", all_predictors())\n \n # The pls package prefers the data in a data frame where the outcome\n # and predictors are in _matrices_. To make sure this is formatted\n # properly, use the `I()` function to inhibit `data.frame()` from making\n # all the individual columns. `pls_format` should have two columns.\n pls_format <- data.frame(\n endpoints = I(y_mat),\n measurements = I(x_mat)\n )\n # Fit the model\n mod <- plsr(endpoints ~ measurements, data = pls_format)\n \n # Get the proportion of the predictor variance that is explained\n # by the model for different number of components. \n xve <- explvar(mod)/100 \n\n # To do the same for the outcome, it is more complex. This code \n # was extracted from pls:::summary.mvr. \n explained <- \n drop(pls::R2(mod, estimate = \"train\", intercept = FALSE)$val) %>% \n # transpose so that components are in rows\n t() %>% \n as_tibble() %>%\n # Add the predictor proportions\n mutate(predictors = cumsum(xve) %>% as.vector(),\n components = seq_along(xve)) %>%\n # Put into a tidy format that is tall\n pivot_longer(\n cols = c(-components),\n names_to = \"source\",\n values_to = \"proportion\"\n )\n}\n```\n:::\n\n\nWe compute this data frame for each resample and save the results in the different columns. \n\n\n::: {.cell layout-align=\"center\" hash='cache/get-estimates_692f96aaee6ff253cc50aa0c09d2c872'}\n\n```{.r .cell-code}\nfolds <- \n folds %>%\n mutate(var = map(recipes, get_var_explained),\n var = unname(var))\n```\n:::\n\n\nTo extract and aggregate these data, simple row binding can be used to stack the data vertically. Most of the action happens in the first 15 components so let's filter the data and compute the _average_ proportion.\n\n\n::: {.cell layout-align=\"center\" hash='cache/collapse-and-average_c90711694d17ab11dd03860f64ac6514'}\n\n```{.r .cell-code}\nvariance_data <- \n bind_rows(folds[[\"var\"]]) %>%\n filter(components <= 15) %>%\n group_by(components, source) %>%\n summarize(proportion = mean(proportion))\n#> `summarise()` has grouped output by 'components'. You can override\n#> using the `.groups` argument.\n```\n:::\n\n\nThe plot below shows that, if the protein measurement is important, you might require 10 or so components to achieve a good representation of that outcome. Note that the predictor variance is captured extremely well using a single component. This is due to the high degree of correlation in those data. \n\n\n::: {.cell layout-align=\"center\" hash='cache/plot_c869332810008a4caf85c4116e9906ea'}\n\n```{.r .cell-code}\nggplot(variance_data, aes(x = components, y = proportion, col = source)) + \n geom_line(alpha = 0.5, size = 1.2) + \n geom_point() \n#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.\n#> ℹ Please use `linewidth` instead.\n```\n\n::: {.cell-output-display}\n![](figs/plot-1.svg){fig-align='center' width=100%}\n:::\n:::\n\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> pls * 2.8-1 2022-07-16 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/models/sub-sampling/index/execute-results/html.json b/_freeze/learn/models/sub-sampling/index/execute-results/html.json new file mode 100644 index 00000000..abd97cf3 --- /dev/null +++ b/_freeze/learn/models/sub-sampling/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "9762993c3e75276ef74c284c258121e5", + "result": { + "markdown": "---\ntitle: \"Subsampling for class imbalances\"\ncategories:\n - model fitting\n - pre-processing\n - class imbalances\n - discriminant analysis\ntype: learn-subsection\nweight: 3\ndescription: | \n Improve model performance in imbalanced data sets through undersampling or oversampling.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: discrim, klaR, readr, ROSE, themis, and tidymodels.\n\nSubsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance. \n\nThis article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/measuring-performance.html) for more information on these metrics. \n\n## Simulated data\n\nConsider a two-class problem where the first class has a very low rate of occurrence. The data were simulated and can be imported into R using the code below:\n\n\n::: {.cell layout-align=\"center\" hash='cache/load-data_d5ffcbae3d559e32088adc0f27e4bb37'}\n\n```{.r .cell-code}\nimbal_data <- \n readr::read_csv(\"https://bit.ly/imbal_data\") %>% \n mutate(Class = factor(Class))\ndim(imbal_data)\n#> [1] 1200 16\ntable(imbal_data$Class)\n#> \n#> Class1 Class2 \n#> 60 1140\n```\n:::\n\n\nIf \"Class1\" is the event of interest, it is very likely that a classification model would be able to achieve very good _specificity_ since almost all of the data are of the second class. _Sensitivity_, however, would likely be poor since the models will optimize accuracy (or other loss functions) by predicting everything to be the majority class. \n\nOne result of class imbalance when there are two classes is that the default probability cutoff of 50% is inappropriate; a different cutoff that is more extreme might be able to achieve good performance. \n\n## Subsampling the data\n\nOne way to alleviate this issue is to _subsample_ the data. There are a number of ways to do this but the most simple one is to _sample down_ (undersample) the majority class data until it occurs with the same frequency as the minority class. While it may seem counterintuitive, throwing out a large percentage of your data can be effective at producing a useful model that can recognize both the majority and minority classes. In some cases, this even means that the overall performance of the model is better (e.g. improved area under the ROC curve). However, subsampling almost always produces models that are _better calibrated_, meaning that the distributions of the class probabilities are more well behaved. As a result, the default 50% cutoff is much more likely to produce better sensitivity and specificity values than they would otherwise. \n\nLet's explore subsampling using `themis::step_rose()` in a recipe for the simulated data. It uses the ROSE (random over sampling examples) method from [Menardi, G. and Torelli, N. (2014)](https://scholar.google.com/scholar?hl=en&q=%22training+and+assessing+classification+rules+with+imbalanced+data%22). This is an example of an oversampling strategy, rather than undersampling.\n\nIn terms of workflow:\n\n * It is extremely important that subsampling occurs _inside of resampling_. Otherwise, the resampling process can produce [poor estimates of model performance](https://topepo.github.io/caret/subsampling-for-class-imbalances.html#resampling). \n * The subsampling process should only be applied to the analysis set. The assessment set should reflect the event rates seen \"in the wild\" and, for this reason, the `skip` argument to `step_downsample()` and other subsampling recipes steps has a default of `TRUE`. \n\nHere is a simple recipe implementing oversampling: \n\n\n::: {.cell layout-align=\"center\" hash='cache/rec_284148e0900db2f3ccf21596f79fd3ab'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\nlibrary(themis)\nimbal_rec <- \n recipe(Class ~ ., data = imbal_data) %>%\n step_rose(Class)\n```\n:::\n\n\nFor a model, let's use a [quadratic discriminant analysis](https://en.wikipedia.org/wiki/Quadratic_classifier#Quadratic_discriminant_analysis) (QDA) model. From the discrim package, this model can be specified using:\n\n\n::: {.cell layout-align=\"center\" hash='cache/qda_c232c75cb7155b22a49defee9e3fd73b'}\n\n```{.r .cell-code}\nlibrary(discrim)\nqda_mod <- \n discrim_regularized(frac_common_cov = 0, frac_identity = 0) %>% \n set_engine(\"klaR\")\n```\n:::\n\n\nTo keep these objects bound together, they can be combined in a [workflow](https://workflows.tidymodels.org/):\n\n\n::: {.cell layout-align=\"center\" hash='cache/wflw_4104f55c2acf370483095be6ffefc736'}\n\n```{.r .cell-code}\nqda_rose_wflw <- \n workflow() %>% \n add_model(qda_mod) %>% \n add_recipe(imbal_rec)\nqda_rose_wflw\n#> ══ Workflow ══════════════════════════════════════════════════════════\n#> Preprocessor: Recipe\n#> Model: discrim_regularized()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> 1 Recipe Step\n#> \n#> • step_rose()\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> Regularized Discriminant Model Specification (classification)\n#> \n#> Main Arguments:\n#> frac_common_cov = 0\n#> frac_identity = 0\n#> \n#> Computational engine: klaR\n```\n:::\n\n\n## Model performance\n\nStratified, repeated 10-fold cross-validation is used to resample the model:\n\n\n::: {.cell layout-align=\"center\" hash='cache/cv_257b1666f905152b0854162652710a31'}\n\n```{.r .cell-code}\nset.seed(5732)\ncv_folds <- vfold_cv(imbal_data, strata = \"Class\", repeats = 5)\n```\n:::\n\n\nTo measure model performance, let's use two metrics:\n\n * The area under the [ROC curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) is an overall assessment of performance across _all_ cutoffs. Values near one indicate very good results while values near 0.5 would imply that the model is very poor. \n * The _J_ index (a.k.a. [Youden's _J_](https://en.wikipedia.org/wiki/Youden%27s_J_statistic) statistic) is `sensitivity + specificity - 1`. Values near one are once again best. \n\nIf a model is poorly calibrated, the ROC curve value might not show diminished performance. However, the _J_ index would be lower for models with pathological distributions for the class probabilities. The yardstick package will be used to compute these metrics. \n\n\n::: {.cell layout-align=\"center\" hash='cache/metrics_e973879401e7878aefe8e78b0970dbc3'}\n\n```{.r .cell-code}\ncls_metrics <- metric_set(roc_auc, j_index)\n```\n:::\n\n\nNow, we train the models and generate the results using `tune::fit_resamples()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/resample-rose_9c88feb770ed61c524ccbe0365481e64'}\n\n```{.r .cell-code}\nset.seed(2180)\nqda_rose_res <- fit_resamples(\n qda_rose_wflw, \n resamples = cv_folds, \n metrics = cls_metrics\n)\n\ncollect_metrics(qda_rose_res)\n#> # A tibble: 2 × 6\n#> .metric .estimator mean n std_err .config \n#> \n#> 1 j_index binary 0.749 50 0.0234 Preprocessor1_Model1\n#> 2 roc_auc binary 0.949 50 0.00510 Preprocessor1_Model1\n```\n:::\n\n\nWhat do the results look like without using ROSE? We can create another workflow and fit the QDA model along the same resamples:\n\n\n::: {.cell layout-align=\"center\" hash='cache/qda-only_434ef41719060a908cfc5cf09f4f36fa'}\n\n```{.r .cell-code}\nqda_wflw <- \n workflow() %>% \n add_model(qda_mod) %>% \n add_formula(Class ~ .)\n\nset.seed(2180)\nqda_only_res <- fit_resamples(qda_wflw, resamples = cv_folds, metrics = cls_metrics)\ncollect_metrics(qda_only_res)\n#> # A tibble: 2 × 6\n#> .metric .estimator mean n std_err .config \n#> \n#> 1 j_index binary 0.250 50 0.0288 Preprocessor1_Model1\n#> 2 roc_auc binary 0.953 50 0.00479 Preprocessor1_Model1\n```\n:::\n\n\nIt looks like ROSE helped a lot, especially with the J-index. Class imbalance sampling methods tend to greatly improve metrics based on the hard class predictions (i.e., the categorical predictions) because the default cutoff tends to be a better balance of sensitivity and specificity. \n\nLet's plot the metrics for each resample to see how the individual results changed. \n\n\n::: {.cell layout-align=\"center\" hash='cache/merge-metrics_e2e3e8b29cc2a2d8e7ea774b83e8b5dc'}\n\n```{.r .cell-code}\nno_sampling <- \n qda_only_res %>% \n collect_metrics(summarize = FALSE) %>% \n dplyr::select(-.estimator) %>% \n mutate(sampling = \"no_sampling\")\n\nwith_sampling <- \n qda_rose_res %>% \n collect_metrics(summarize = FALSE) %>% \n dplyr::select(-.estimator) %>% \n mutate(sampling = \"rose\")\n\nbind_rows(no_sampling, with_sampling) %>% \n mutate(label = paste(id2, id)) %>% \n ggplot(aes(x = sampling, y = .estimate, group = label)) + \n geom_line(alpha = .4) + \n facet_wrap(~ .metric, scales = \"free_y\")\n```\n\n::: {.cell-output-display}\n![](figs/merge-metrics-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThis visually demonstrates that the subsampling mostly affects metrics that use the hard class predictions. \n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> discrim * 1.0.1 2023-03-08 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> klaR * 1.7-2 2023-03-17 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> ROSE * 0.0-4 2021-06-14 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> themis * 1.0.1 2023-04-14 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/models/time-series/index/execute-results/html.json b/_freeze/learn/models/time-series/index/execute-results/html.json new file mode 100644 index 00000000..ed2e3120 --- /dev/null +++ b/_freeze/learn/models/time-series/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "244ec86d88ed9c90bcc629c917defd9d", + "result": { + "markdown": "---\ntitle: \"Modeling time series with tidy resampling\"\ncategories:\n - model fitting\n - time series\ntype: learn-subsection\nweight: 4\ndescription: | \n Calculate performance estimates for time series forecasts using resampling.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: forecast, sweep, tidymodels, timetk, and zoo.\n\n\"[Demo Week: Tidy Forecasting with sweep](https://www.business-science.io/code-tools/2017/10/25/demo_week_sweep.html)\" is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using [rolling forecast origin resampling](https://robjhyndman.com/hyndsight/crossvalidation/). \n\n## Example data\n\nThe data for this article are sales of alcoholic beverages originally from [the Federal Reserve Bank of St. Louis website](https://fred.stlouisfed.org/series/S4248SM144NCEN).\n\n\n::: {.cell layout-align=\"center\" hash='cache/read-data_01b26233ff6bfabab491b8c16997ff7d'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\nlibrary(modeldata)\ndata(\"drinks\")\nglimpse(drinks)\n#> Rows: 309\n#> Columns: 2\n#> $ date 1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-0…\n#> $ S4248SM144NCEN 3459, 3458, 4002, 4564, 4221, 4529, 4466, 4137, 4126, 4…\n```\n:::\n\n\nEach row represents one month of sales (in millions of US dollars). \n\n## Time series resampling\n\nSuppose that we need predictions for one year ahead and our model should use the most recent data from the last 20 years. To set up this resampling scheme:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rof_a3901abdf6e0b2628ab084f7f5963b30'}\n\n```{.r .cell-code}\nroll_rs <- rolling_origin(\n drinks, \n initial = 12 * 20, \n assess = 12,\n cumulative = FALSE\n )\n\nnrow(roll_rs)\n#> [1] 58\n\nroll_rs\n#> # Rolling origin forecast resampling \n#> # A tibble: 58 × 2\n#> splits id \n#> \n#> 1 Slice01\n#> 2 Slice02\n#> 3 Slice03\n#> 4 Slice04\n#> 5 Slice05\n#> 6 Slice06\n#> 7 Slice07\n#> 8 Slice08\n#> 9 Slice09\n#> 10 Slice10\n#> # ℹ 48 more rows\n```\n:::\n\n\nEach `split` element contains the information about that resample:\n\n\n::: {.cell layout-align=\"center\" hash='cache/split_5fdb9ea2a48981f11ea3492abe5e9a3b'}\n\n```{.r .cell-code}\nroll_rs$splits[[1]]\n#> \n#> <240/12/309>\n```\n:::\n\n\nFor plotting, let's index each split by the first day of the assessment set:\n\n\n::: {.cell layout-align=\"center\" hash='cache/labels_0aca53ecc7434fd84f4e6e048b075dc0'}\n\n```{.r .cell-code}\nget_date <- function(x) {\n min(assessment(x)$date)\n}\n\nstart_date <- map(roll_rs$splits, get_date)\nroll_rs$start_date <- do.call(\"c\", start_date)\nhead(roll_rs$start_date)\n#> [1] \"2012-01-01\" \"2012-02-01\" \"2012-03-01\" \"2012-04-01\" \"2012-05-01\"\n#> [6] \"2012-06-01\"\n```\n:::\n\n\nThis resampling scheme has 58 splits of the data so that there will be 58 ARIMA models that are fit. To create the models, we use the `auto.arima()` function from the forecast package. The rsample functions `analysis()` and `assessment()` return a data frame, so another step converts the data to a `ts` object called `mod_dat` using a function in the timetk package.\n\n\n::: {.cell layout-align=\"center\" hash='cache/model-fun_873a12102712d4dad2abf7d98219cd29'}\n\n```{.r .cell-code}\nlibrary(forecast) # for `auto.arima`\nlibrary(timetk) # for `tk_ts`\nlibrary(zoo) # for `as.yearmon`\n\nfit_model <- function(x, ...) {\n # suggested by Matt Dancho:\n x %>%\n analysis() %>%\n # Since the first day changes over resamples, adjust it\n # based on the first date value in the data frame \n tk_ts(start = .$date[[1]] %>% as.yearmon(), \n frequency = 12, \n silent = TRUE) %>%\n auto.arima(...)\n}\n```\n:::\n\n\nSave each model in a new column:\n\n\n::: {.cell layout-align=\"center\" hash='cache/model-fit_fcf002b6825a607bf4e20193cb40ff1d'}\n\n```{.r .cell-code}\nroll_rs$arima <- map(roll_rs$splits, fit_model)\n\n# For example:\nroll_rs$arima[[1]]\n#> Series: . \n#> ARIMA(4,1,1)(0,1,2)[12] \n#> \n#> Coefficients:\n#> ar1 ar2 ar3 ar4 ma1 sma1 sma2\n#> -0.1852 -0.0238 0.3577 -0.1517 -0.8311 -0.193 -0.3244\n#> s.e. 0.1466 0.1656 0.1440 0.0809 0.1377 0.067 0.0640\n#> \n#> sigma^2 = 72198: log likelihood = -1591.15\n#> AIC=3198.3 AICc=3198.97 BIC=3225.7\n```\n:::\n\n\n(There are some warnings produced by these regarding extra columns in the data that can be ignored.)\n\n## Model performance\n\nUsing the model fits, let's measure performance in two ways:\n\n * _Interpolation_ error will measure how well the model fits to the data that were used to create the model. This is most likely optimistic since no holdout method is used. \n * _Extrapolation_ or _forecast_ error evaluates the performance of the model on the data from the following year (that were not used in the model fit).\n \nIn each case, the mean absolute percent error (MAPE) is the statistic used to characterize the model fits. The interpolation error can be computed from the `Arima` object. To make things easy, let's use the sweep package's `sw_glance()` function:\n\n\n::: {.cell layout-align=\"center\" hash='cache/interp_a12a048c24fd442bf30585d097853601'}\n\n```{.r .cell-code}\nlibrary(sweep)\n\nroll_rs$interpolation <- map_dbl(\n roll_rs$arima,\n function(x) \n sw_glance(x)[[\"MAPE\"]]\n )\n\nsummary(roll_rs$interpolation)\n#> Min. 1st Qu. Median Mean 3rd Qu. Max. \n#> 2.841 2.921 2.950 2.947 2.969 3.135\n```\n:::\n\n\nFor the extrapolation error, the model and split objects are required. Using these:\n\n\n::: {.cell layout-align=\"center\" hash='cache/extrap_a2698a074c1c93aafef9deb134c18898'}\n\n```{.r .cell-code}\nget_extrap <- function(split, mod) {\n n <- nrow(assessment(split))\n # Get assessment data\n pred_dat <- assessment(split) %>%\n mutate(\n pred = as.vector(forecast(mod, h = n)$mean),\n pct_error = ( S4248SM144NCEN - pred ) / S4248SM144NCEN * 100\n )\n mean(abs(pred_dat$pct_error))\n}\n\nroll_rs$extrapolation <- \n map2_dbl(roll_rs$splits, roll_rs$arima, get_extrap)\n\nsummary(roll_rs$extrapolation)\n#> Min. 1st Qu. Median Mean 3rd Qu. Max. \n#> 2.371 3.231 3.629 3.654 4.113 5.453\n```\n:::\n\n\nWhat do these error estimates look like over time?\n\n\n::: {.cell layout-align=\"center\" hash='cache/plot_3f95e9f1f47c3c69779b59f205ee7673'}\n\n```{.r .cell-code}\nroll_rs %>%\n select(interpolation, extrapolation, start_date) %>%\n pivot_longer(cols = matches(\"ation\"), names_to = \"error\", values_to = \"MAPE\") %>%\n ggplot(aes(x = start_date, y = MAPE, col = error)) + \n geom_point() + \n geom_line()\n```\n\n::: {.cell-output-display}\n![](figs/plot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nIt is likely that the interpolation error is an underestimate to some degree, as mentioned above. \n\nIt is also worth noting that `rolling_origin()` can be used over calendar periods, rather than just over a fixed window size. This is especially useful for irregular series where a fixed window size might not make sense because of missing data points, or because of calendar features like different months having a different number of days.\n\nThe example below demonstrates this idea by splitting `drinks` into a nested set of 26 years, and rolling over years rather than months. Note that the end result accomplishes a different task than the original example; in this new case, each slice moves forward an entire year, rather than just one month.\n\n\n::: {.cell layout-align=\"center\" hash='cache/rof-annual_57bb05c6982e168bb14d462998eb2839'}\n\n```{.r .cell-code}\n# The idea is to nest by the period to roll over,\n# which in this case is the year.\nroll_rs_annual <- drinks %>%\n mutate(year = as.POSIXlt(date)$year + 1900) %>%\n nest(data = c(date, S4248SM144NCEN)) %>%\n rolling_origin(\n initial = 20, \n assess = 1, \n cumulative = FALSE\n )\n\nanalysis(roll_rs_annual$splits[[1]])\n#> # A tibble: 20 × 2\n#> year data \n#> \n#> 1 1992 \n#> 2 1993 \n#> 3 1994 \n#> 4 1995 \n#> 5 1996 \n#> 6 1997 \n#> 7 1998 \n#> 8 1999 \n#> 9 2000 \n#> 10 2001 \n#> 11 2002 \n#> 12 2003 \n#> 13 2004 \n#> 14 2005 \n#> 15 2006 \n#> 16 2007 \n#> 17 2008 \n#> 18 2009 \n#> 19 2010 \n#> 20 2011 \n```\n:::\n\n\nThe workflow to access these calendar slices is to use `bind_rows()` to join\neach analysis set together.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-13_39cadf66f7df3b333da1ff288df20ec3'}\n\n```{.r .cell-code}\nmutate(\n roll_rs_annual,\n extracted_slice = map(splits, ~ bind_rows(analysis(.x)$data))\n)\n#> # Rolling origin forecast resampling \n#> # A tibble: 6 × 3\n#> splits id extracted_slice \n#> \n#> 1 Slice1 \n#> 2 Slice2 \n#> 3 Slice3 \n#> 4 Slice4 \n#> 5 Slice5 \n#> 6 Slice6 \n```\n:::\n\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> forecast * 8.21 2023-02-27 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> sweep * 0.2.4 2023-05-25 [1] Github (business-science/sweep@d0327bc)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> timetk * 2.8.3 2023-03-30 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> zoo * 1.8-12 2023-04-13 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/statistics/bootstrap/index/execute-results/html.json b/_freeze/learn/statistics/bootstrap/index/execute-results/html.json new file mode 100644 index 00000000..d2cf748c --- /dev/null +++ b/_freeze/learn/statistics/bootstrap/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "b58d401429e0dd37eeb2f6f84278d355", + "result": { + "markdown": "---\ntitle: \"Bootstrap resampling and tidy regression models\"\ncategories:\n - statistical analysis\n - bootstraping\n - tidying results\n - confidence intervals\ntype: learn-subsection\nweight: 3\ndescription: | \n Apply bootstrap resampling to estimate uncertainty in model parameters.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n\n## Introduction\n\nThis article only requires the tidymodels package.\n\nCombining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by [Andrew MacDonald here](https://rstudio-pubs-static.s3.amazonaws.com/19698_a4c472606e3c43e4b94720506e49bb7b.html), and [Hadley has explored efficient support for bootstrapping](https://github.com/hadley/dplyr/issues/269) as a potential enhancement to dplyr. The tidymodels package [broom](https://broom.tidyverse.org/) fits naturally with [dplyr](https://dplyr.tidyverse.org/) in performing these analyses.\n\nBootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate.\n\nLet's say we want to fit a nonlinear model to the weight/mileage relationship in the `mtcars` data set.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_4d1be3efaa09424aefbeea43776a815a'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\n\nggplot(mtcars, aes(mpg, wt)) + \n geom_point()\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-3-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nWe might use the method of nonlinear least squares (via the `nls()` function) to fit a model.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-4_2bafa86d0e39af7c235277e2037c35a7'}\n\n```{.r .cell-code}\nnlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0))\nsummary(nlsfit)\n#> \n#> Formula: mpg ~ k/wt + b\n#> \n#> Parameters:\n#> Estimate Std. Error t value Pr(>|t|) \n#> k 45.829 4.249 10.786 7.64e-12 ***\n#> b 4.386 1.536 2.855 0.00774 ** \n#> ---\n#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#> \n#> Residual standard error: 2.774 on 30 degrees of freedom\n#> \n#> Number of iterations to convergence: 1 \n#> Achieved convergence tolerance: 6.813e-09\n\nggplot(mtcars, aes(wt, mpg)) +\n geom_point() +\n geom_line(aes(y = predict(nlsfit)))\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-4-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nWhile this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data.\n\n## Bootstrapping models\n\nWe can use the `bootstraps()` function in the rsample package to sample bootstrap replications. First, we construct 2000 bootstrap replicates of the data, each of which has been randomly sampled with replacement. The resulting object is an `rset`, which is a data frame with a column of `rsplit` objects.\n\nAn `rsplit` object has two main components: an analysis data set and an assessment data set, accessible via `analysis(rsplit)` and `assessment(rsplit)` respectively. For bootstrap samples, the analysis data set is the bootstrap sample itself, and the assessment data set consists of all the out-of-bag samples.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-5_63f97c7e2129364ca39ef1776d4f9840'}\n\n```{.r .cell-code}\nset.seed(27)\nboots <- bootstraps(mtcars, times = 2000, apparent = TRUE)\nboots\n#> # Bootstrap sampling with apparent sample \n#> # A tibble: 2,001 × 2\n#> splits id \n#> \n#> 1 Bootstrap0001\n#> 2 Bootstrap0002\n#> 3 Bootstrap0003\n#> 4 Bootstrap0004\n#> 5 Bootstrap0005\n#> 6 Bootstrap0006\n#> 7 Bootstrap0007\n#> 8 Bootstrap0008\n#> 9 Bootstrap0009\n#> 10 Bootstrap0010\n#> # ℹ 1,991 more rows\n```\n:::\n\n\nLet's create a helper function to fit an `nls()` model on each bootstrap sample, and then use `purrr::map()` to apply this function to all the bootstrap samples at once. Similarly, we create a column of tidy coefficient information by unnesting.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-6_a74d37c174fe360a0893118e4316c3f5'}\n\n```{.r .cell-code}\nfit_nls_on_bootstrap <- function(split) {\n nls(mpg ~ k / wt + b, analysis(split), start = list(k = 1, b = 0))\n}\n\nboot_models <-\n boots %>% \n mutate(model = map(splits, fit_nls_on_bootstrap),\n coef_info = map(model, tidy))\n\nboot_coefs <- \n boot_models %>% \n unnest(coef_info)\n```\n:::\n\n\nThe unnested coefficient information contains a summary of each replication combined in a single data frame:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-7_b968cf604ebf00ac63257cbf5b3fac8a'}\n\n```{.r .cell-code}\nboot_coefs\n#> # A tibble: 4,002 × 8\n#> splits id model term estimate std.error statistic p.value\n#> \n#> 1 Bootstrap0… k 42.1 4.05 10.4 1.91e-11\n#> 2 Bootstrap0… b 5.39 1.43 3.78 6.93e- 4\n#> 3 Bootstrap0… k 49.9 5.66 8.82 7.82e-10\n#> 4 Bootstrap0… b 3.73 1.92 1.94 6.13e- 2\n#> 5 Bootstrap0… k 37.8 2.68 14.1 9.01e-15\n#> 6 Bootstrap0… b 6.73 1.17 5.75 2.78e- 6\n#> 7 Bootstrap0… k 45.6 4.45 10.2 2.70e-11\n#> 8 Bootstrap0… b 4.75 1.62 2.93 6.38e- 3\n#> 9 Bootstrap0… k 43.6 4.63 9.41 1.85e-10\n#> 10 Bootstrap0… b 5.89 1.68 3.51 1.44e- 3\n#> # ℹ 3,992 more rows\n```\n:::\n\n\n## Confidence intervals\n\nWe can then calculate confidence intervals (using what is called the [percentile method](https://www.uvm.edu/~dhowell/StatPages/Randomization%20Tests/ResamplingWithR/BootstMeans/bootstrapping_means.html)):\n\n\n::: {.cell layout-align=\"center\" hash='cache/percentiles_c9af04d41758d5e2d53d422bbfdc4e1b'}\n\n```{.r .cell-code}\npercentile_intervals <- int_pctl(boot_models, coef_info)\npercentile_intervals\n#> # A tibble: 2 × 6\n#> term .lower .estimate .upper .alpha .method \n#> \n#> 1 b 0.0475 4.12 7.31 0.05 percentile\n#> 2 k 37.6 46.7 59.8 0.05 percentile\n```\n:::\n\n\nOr we can use histograms to get a more detailed idea of the uncertainty in each estimate:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-9_61fefcd26abbc2962014e78c500cb006'}\n\n```{.r .cell-code}\nggplot(boot_coefs, aes(estimate)) +\n geom_histogram(bins = 30) +\n facet_wrap( ~ term, scales = \"free\") +\n geom_vline(aes(xintercept = .lower), data = percentile_intervals, col = \"blue\") +\n geom_vline(aes(xintercept = .upper), data = percentile_intervals, col = \"blue\")\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-9-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThe rsample package also has functions for [other types of confidence intervals](https://rsample.tidymodels.org/reference/int_pctl.html). \n\n## Possible model fits\n\nWe can use `augment()` to visualize the uncertainty in the fitted curve. Since there are so many bootstrap samples, we'll only show a sample of the model fits in our visualization:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-10_b3da0f309c5814e5008c4e0f1250ae7a'}\n\n```{.r .cell-code}\nboot_aug <- \n boot_models %>% \n sample_n(200) %>% \n mutate(augmented = map(model, augment)) %>% \n unnest(augmented)\n\nboot_aug\n#> # A tibble: 6,400 × 8\n#> splits id model coef_info mpg wt .fitted .resid\n#> \n#> 1 Bootstrap1644 16.4 4.07 15.6 0.829\n#> 2 Bootstrap1644 19.7 2.77 21.9 -2.21 \n#> 3 Bootstrap1644 19.2 3.84 16.4 2.84 \n#> 4 Bootstrap1644 21.4 2.78 21.8 -0.437\n#> 5 Bootstrap1644 26 2.14 27.8 -1.75 \n#> 6 Bootstrap1644 33.9 1.84 32.0 1.88 \n#> 7 Bootstrap1644 32.4 2.2 27.0 5.35 \n#> 8 Bootstrap1644 30.4 1.62 36.1 -5.70 \n#> 9 Bootstrap1644 21.5 2.46 24.4 -2.86 \n#> 10 Bootstrap1644 26 2.14 27.8 -1.75 \n#> # ℹ 6,390 more rows\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-11_610d1c960519a5123340e494bdac45c4'}\n\n```{.r .cell-code}\nggplot(boot_aug, aes(wt, mpg)) +\n geom_line(aes(y = .fitted, group = id), alpha = .2, col = \"blue\") +\n geom_point()\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-11-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nWith only a few small changes, we could easily perform bootstrapping with other kinds of predictive or hypothesis testing models, since the `tidy()` and `augment()` functions works for many statistical outputs. As another example, we could use `smooth.spline()`, which fits a cubic smoothing spline to data:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-12_163a2bbcfb15170c317936822fc092e9'}\n\n```{.r .cell-code}\nfit_spline_on_bootstrap <- function(split) {\n data <- analysis(split)\n smooth.spline(data$wt, data$mpg, df = 4)\n}\n\nboot_splines <- \n boots %>% \n sample_n(200) %>% \n mutate(spline = map(splits, fit_spline_on_bootstrap),\n aug_train = map(spline, augment))\n\nsplines_aug <- \n boot_splines %>% \n unnest(aug_train)\n\nggplot(splines_aug, aes(x, y)) +\n geom_line(aes(y = .fitted, group = id), alpha = 0.2, col = \"blue\") +\n geom_point()\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-12-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/statistics/infer/index/execute-results/html.json b/_freeze/learn/statistics/infer/index/execute-results/html.json new file mode 100644 index 00000000..3a5071fa --- /dev/null +++ b/_freeze/learn/statistics/infer/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "b8ef3e225606acb5f3e0826435bebfed", + "result": { + "markdown": "---\ntitle: \"Hypothesis testing using resampling and tidy data\"\ncategories:\n - statistical analysis\n - hypothesis testing\n - bootstraping\ntype: learn-subsection\nweight: 4\ndescription: | \n Perform common hypothesis tests for statistical inference using flexible functions.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nThis article only requires the tidymodels package. \n\nThe tidymodels package [infer](https://infer.tidymodels.org/) implements an expressive grammar to perform statistical inference that coheres with the `tidyverse` design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs.\n\nRegardless of which hypothesis test we're using, we're still asking the same kind of question: \n\n>Is the effect or difference in our observed data real, or due to chance? \n\nTo answer this question, we start by assuming that the observed data came from some world where \"nothing is going on\" (i.e. the observed effect was simply due to random chance), and call this assumption our **null hypothesis**. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the **alternate hypothesis**, which supposes that the effect present in the observed data is actually due to the fact that \"something is going on.\") We then calculate a **test statistic** from our data that describes the observed effect. We can use this test statistic to calculate a **p-value**, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined **significance level** $\\alpha$, then we can reject our null hypothesis.\n\nIf you are new to hypothesis testing, take a look at \n\n* [Section 9.2 of _Statistical Inference via Data Science_](https://moderndive.com/9-hypothesis-testing.html#understanding-ht)\n* The American Statistical Association's recent [statement on p-values](https://doi.org/10.1080/00031305.2016.1154108) \n\nThe workflow of this package is designed around these ideas. Starting from some data set,\n\n+ `specify()` allows you to specify the variable, or relationship between variables, that you're interested in,\n+ `hypothesize()` allows you to declare the null hypothesis,\n+ `generate()` allows you to generate data reflecting the null hypothesis, and\n+ `calculate()` allows you to calculate a distribution of statistics from the generated data to form the null distribution.\n\nThroughout this vignette, we make use of `gss`, a data set available in infer containing a sample of 500 observations of 11 variables from the *General Social Survey*. \n\n\n::: {.cell layout-align=\"center\" hash='cache/load-gss_2bc44232d1aa786bcdceae545fa6f4f0'}\n\n```{.r .cell-code}\nlibrary(tidymodels) # Includes the infer package\n\n# load in the data set\ndata(gss)\n\n# take a look at its structure\ndplyr::glimpse(gss)\n#> Rows: 500\n#> Columns: 11\n#> $ year 2014, 1994, 1998, 1996, 1994, 1996, 1990, 2016, 2000, 1998, 20…\n#> $ age 36, 34, 24, 42, 31, 32, 48, 36, 30, 33, 21, 30, 38, 49, 25, 56…\n#> $ sex male, female, male, male, male, female, female, female, female…\n#> $ college degree, no degree, degree, no degree, degree, no degree, no de…\n#> $ partyid ind, rep, ind, ind, rep, rep, dem, ind, rep, dem, dem, ind, de…\n#> $ hompop 3, 4, 1, 4, 2, 4, 2, 1, 5, 2, 4, 3, 4, 4, 2, 2, 3, 2, 1, 2, 5,…\n#> $ hours 50, 31, 40, 40, 40, 53, 32, 20, 40, 40, 23, 52, 38, 72, 48, 40…\n#> $ income $25000 or more, $20000 - 24999, $25000 or more, $25000 or more…\n#> $ class middle class, working class, working class, working class, mid…\n#> $ finrela below average, below average, below average, above average, ab…\n#> $ weight 0.8960034, 1.0825000, 0.5501000, 1.0864000, 1.0825000, 1.08640…\n```\n:::\n\n\nEach row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this data set is a representative sample of a population we want to learn about: American adults.\n\n## Specify variables\n\nThe `specify()` function can be used to specify which of the variables in the data set you're interested in. If you're only interested in, say, the `age` of the respondents, you might write:\n\n\n::: {.cell layout-align=\"center\" hash='cache/specify-example_f39f9849a4304bfdd4d54eee9dcbaf90'}\n\n```{.r .cell-code}\ngss %>%\n specify(response = age)\n#> Response: age (numeric)\n#> # A tibble: 500 × 1\n#> age\n#> \n#> 1 36\n#> 2 34\n#> 3 24\n#> 4 42\n#> 5 31\n#> 6 32\n#> 7 48\n#> 8 36\n#> 9 30\n#> 10 33\n#> # ℹ 490 more rows\n```\n:::\n\n\nOn the front end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. What do we see if we check the class of this object, though?\n\n\n::: {.cell layout-align=\"center\" hash='cache/specify-one_47f1a10e3b2037b5729aac2b5c5608f0'}\n\n```{.r .cell-code}\ngss %>%\n specify(response = age) %>%\n class()\n#> [1] \"infer\" \"tbl_df\" \"tbl\" \"data.frame\"\n```\n:::\n\n\nWe can see that the infer class has been appended on top of the dataframe classes; this new class stores some extra metadata.\n\nIf you're interested in two variables (`age` and `partyid`, for example) you can `specify()` their relationship in one of two (equivalent) ways:\n\n\n::: {.cell layout-align=\"center\" hash='cache/specify-two_1217a708eb37fe77a83e733029668e4c'}\n\n```{.r .cell-code}\n# as a formula\ngss %>%\n specify(age ~ partyid)\n#> Response: age (numeric)\n#> Explanatory: partyid (factor)\n#> # A tibble: 500 × 2\n#> age partyid\n#> \n#> 1 36 ind \n#> 2 34 rep \n#> 3 24 ind \n#> 4 42 ind \n#> 5 31 rep \n#> 6 32 rep \n#> 7 48 dem \n#> 8 36 ind \n#> 9 30 rep \n#> 10 33 dem \n#> # ℹ 490 more rows\n\n# with the named arguments\ngss %>%\n specify(response = age, explanatory = partyid)\n#> Response: age (numeric)\n#> Explanatory: partyid (factor)\n#> # A tibble: 500 × 2\n#> age partyid\n#> \n#> 1 36 ind \n#> 2 34 rep \n#> 3 24 ind \n#> 4 42 ind \n#> 5 31 rep \n#> 6 32 rep \n#> 7 48 dem \n#> 8 36 ind \n#> 9 30 rep \n#> 10 33 dem \n#> # ℹ 490 more rows\n```\n:::\n\n\nIf you're doing inference on one proportion or a difference in proportions, you will need to use the `success` argument to specify which level of your `response` variable is a success. For instance, if you're interested in the proportion of the population with a college degree, you might use the following code:\n\n\n::: {.cell layout-align=\"center\" hash='cache/specify-success_85bb9f390a98d0b03a92fa67dee02785'}\n\n```{.r .cell-code}\n# specifying for inference on proportions\ngss %>%\n specify(response = college, success = \"degree\")\n#> Response: college (factor)\n#> # A tibble: 500 × 1\n#> college \n#> \n#> 1 degree \n#> 2 no degree\n#> 3 degree \n#> 4 no degree\n#> 5 degree \n#> 6 no degree\n#> 7 no degree\n#> 8 degree \n#> 9 degree \n#> 10 no degree\n#> # ℹ 490 more rows\n```\n:::\n\n\n## Declare the hypothesis\n\nThe next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of \"independence\" or \"point\" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/hypothesize-independence_a081019d5492a3ac5bd37d83d3bd7d42'}\n\n```{.r .cell-code}\ngss %>%\n specify(college ~ partyid, success = \"degree\") %>%\n hypothesize(null = \"independence\")\n#> Response: college (factor)\n#> Explanatory: partyid (factor)\n#> Null Hypothesis: independence\n#> # A tibble: 500 × 2\n#> college partyid\n#> \n#> 1 degree ind \n#> 2 no degree rep \n#> 3 degree ind \n#> 4 no degree ind \n#> 5 degree rep \n#> 6 no degree rep \n#> 7 no degree dem \n#> 8 degree ind \n#> 9 degree rep \n#> 10 no degree dem \n#> # ℹ 490 more rows\n```\n:::\n\n\nIf you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write:\n\n\n::: {.cell layout-align=\"center\" hash='cache/hypothesize-40-hr-week_9fc84aab707ba927208d59a9ae5858c4'}\n\n```{.r .cell-code}\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40)\n#> Response: hours (numeric)\n#> Null Hypothesis: point\n#> # A tibble: 500 × 1\n#> hours\n#> \n#> 1 50\n#> 2 31\n#> 3 40\n#> 4 40\n#> 5 40\n#> 6 53\n#> 7 32\n#> 8 20\n#> 9 40\n#> 10 40\n#> # ℹ 490 more rows\n```\n:::\n\n\nAgain, from the front-end, the dataframe outputted from `hypothesize()` looks almost exactly the same as it did when it came out of `specify()`, but infer now \"knows\" your null hypothesis.\n\n## Generate the distribution\n\nOnce we've asserted our null hypothesis using `hypothesize()`, we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the `type` argument:\n\n* `bootstrap`: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data. \n* `permute`: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample. \n* `simulate`: A value will be sampled from a theoretical distribution with parameters specified in `hypothesize()` for each replicate. (This option is currently only applicable for testing point estimates.) \n\nContinuing on with our example above, about the average number of hours worked a week, we might write:\n\n\n::: {.cell layout-align=\"center\" hash='cache/generate-point_99f5f76d6e4b6607b7306ef3c01f673b'}\n\n```{.r .cell-code}\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\")\n#> Response: hours (numeric)\n#> Null Hypothesis: point\n#> # A tibble: 2,500,000 × 2\n#> # Groups: replicate [5,000]\n#> replicate hours\n#> \n#> 1 1 28.6 \n#> 2 1 8.62\n#> 3 1 38.6 \n#> 4 1 18.6 \n#> 5 1 6.62\n#> 6 1 38.6 \n#> 7 1 53.6 \n#> 8 1 38.6 \n#> 9 1 35.6 \n#> 10 1 28.6 \n#> # ℹ 2,499,990 more rows\n```\n:::\n\n\nIn the above example, we take 5000 bootstrap samples to form our null distribution.\n\nTo generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 5000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age:\n\n\n::: {.cell layout-align=\"center\" hash='cache/generate-permute_7170025bb93d810d654d5cb892f0b937'}\n\n```{.r .cell-code}\ngss %>%\n specify(partyid ~ age) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\")\n#> Response: partyid (factor)\n#> Explanatory: age (numeric)\n#> Null Hypothesis: independence\n#> # A tibble: 2,500,000 × 3\n#> # Groups: replicate [5,000]\n#> partyid age replicate\n#> \n#> 1 dem 36 1\n#> 2 ind 34 1\n#> 3 dem 24 1\n#> 4 dem 42 1\n#> 5 ind 31 1\n#> 6 ind 32 1\n#> 7 ind 48 1\n#> 8 rep 36 1\n#> 9 rep 30 1\n#> 10 dem 33 1\n#> # ℹ 2,499,990 more rows\n```\n:::\n\n\n## Calculate statistics\n\nDepending on whether you're carrying out computation-based inference or theory-based inference, you will either supply `calculate()` with the output of `generate()` or `hypothesize()`, respectively. The function, for one, takes in a `stat` argument, which is currently one of `\"mean\"`, `\"median\"`, `\"sum\"`, `\"sd\"`, `\"prop\"`, `\"count\"`, `\"diff in means\"`, `\"diff in medians\"`, `\"diff in props\"`, `\"Chisq\"`, `\"F\"`, `\"t\"`, `\"z\"`, `\"slope\"`, or `\"correlation\"`. For example, continuing our example above to calculate the null distribution of mean hours worked per week:\n\n\n::: {.cell layout-align=\"center\" hash='cache/calculate-point_cb5eb4b469232ee013b50b8f17442ae7'}\n\n```{.r .cell-code}\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\") %>%\n calculate(stat = \"mean\")\n#> Response: hours (numeric)\n#> Null Hypothesis: point\n#> # A tibble: 5,000 × 2\n#> replicate stat\n#> \n#> 1 1 40.9\n#> 2 2 40.3\n#> 3 3 39.2\n#> 4 4 39.2\n#> 5 5 39.3\n#> 6 6 39.4\n#> 7 7 40.7\n#> 8 8 41.0\n#> 9 9 39.2\n#> 10 10 40.2\n#> # ℹ 4,990 more rows\n```\n:::\n\n\nThe output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or $t$ and $z$ statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write:\n\n\n::: {.cell layout-align=\"center\" hash='cache/specify-diff-in-means_0b3512391564e751997da5dedf18fa08'}\n\n```{.r .cell-code}\ngss %>%\n specify(age ~ college) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(\"diff in means\", order = c(\"degree\", \"no degree\"))\n#> Response: age (numeric)\n#> Explanatory: college (factor)\n#> Null Hypothesis: independence\n#> # A tibble: 5,000 × 2\n#> replicate stat\n#> \n#> 1 1 -0.796\n#> 2 2 -0.558\n#> 3 3 0.773\n#> 4 4 -0.390\n#> 5 5 -2.26 \n#> 6 6 -0.355\n#> 7 7 -1.11 \n#> 8 8 -0.628\n#> 9 9 1.14 \n#> 10 10 -0.928\n#> # ℹ 4,990 more rows\n```\n:::\n\n\n## Other utilities\n\nThe infer package also offers several utilities to extract meaning out of summary statistics and null distributions; the package provides functions to visualize where a statistic is relative to a distribution (with `visualize()`), calculate p-values (with `get_p_value()`), and calculate confidence intervals (with `get_confidence_interval()`).\n\nTo illustrate, we'll go back to the example of determining whether the mean number of hours worked per week is 40 hours.\n\n\n::: {.cell layout-align=\"center\" hash='cache/utilities-examples_db7c0f64693429e9c2e6d4f8cd43892e'}\n\n```{.r .cell-code}\n# find the point estimate\npoint_estimate <- gss %>%\n specify(response = hours) %>%\n calculate(stat = \"mean\")\n\n# generate a null distribution\nnull_dist <- gss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\") %>%\n calculate(stat = \"mean\")\n```\n:::\n\n\n(Notice the warning: `Removed 1244 rows containing missing values.` This would be worth noting if you were actually carrying out this hypothesis test.)\n\nOur point estimate 41.382 seems *pretty* close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn't 40.\n\nWe could initially just visualize the null distribution.\n\n\n::: {.cell layout-align=\"center\" hash='cache/visualize_ba45a5708f9535ab429378e6104e0623'}\n\n```{.r .cell-code}\nnull_dist %>%\n visualize()\n```\n\n::: {.cell-output-display}\n![](figs/visualize-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nWhere does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this.\n\n\n::: {.cell layout-align=\"center\" hash='cache/visualize2_1493ab73ed2491fe4347180e255d151c'}\n\n```{.r .cell-code}\nnull_dist %>%\n visualize() +\n shade_p_value(obs_stat = point_estimate, direction = \"two_sided\")\n```\n\n::: {.cell-output-display}\n![](figs/visualize2-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nNotice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the `+` operator to apply the `shade_p_value()` function. This is because `visualize()` outputs a plot object from ggplot2 instead of a dataframe, and the `+` operator is needed to add the p-value layer to the plot object.) The red bar looks like it's slightly far out on the right tail of the null distribution, so observing a sample mean of 41.382 hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though?\n\n\n::: {.cell layout-align=\"center\" hash='cache/get_p_value_9ff59346118812eb2586c37fc7af4b26'}\n\n```{.r .cell-code}\n# get a two-tailed p-value\np_value <- null_dist %>%\n get_p_value(obs_stat = point_estimate, direction = \"two_sided\")\n\np_value\n#> # A tibble: 1 × 1\n#> p_value\n#> \n#> 1 0.0416\n```\n:::\n\n\nIt looks like the p-value is 0.0416, which is pretty small---if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (1.382 hours) from 40 would be 0.0416. This may or may not be statistically significantly different, depending on the significance level $\\alpha$ you decided on *before* you ran this analysis. If you had set $\\alpha = .05$, then this difference would be statistically significant, but if you had set $\\alpha = .01$, then it would not be.\n\nTo get a confidence interval around our estimate, we can write:\n\n\n::: {.cell layout-align=\"center\" hash='cache/get_conf_c42a6753bef5ebab49c9ed46a27f8bc2'}\n\n```{.r .cell-code}\n# start with the null distribution\nnull_dist %>%\n # calculate the confidence interval around the point estimate\n get_confidence_interval(point_estimate = point_estimate,\n # at the 95% confidence level\n level = .95,\n # using the standard error\n type = \"se\")\n#> # A tibble: 1 × 2\n#> lower_ci upper_ci\n#> \n#> 1 40.1 42.7\n```\n:::\n\n\nAs you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level $\\alpha = .05$.\n\n## Theoretical methods\n\nThe infer package also provides functionality to use theoretical methods for `\"Chisq\"`, `\"F\"` and `\"t\"` test statistics. \n\nGenerally, to find a null distribution using theory-based methods, use the same code that you would use to find the null distribution using randomization-based methods, but skip the `generate()` step. For example, if we wanted to find a null distribution for the relationship between age (`age`) and party identification (`partyid`) using randomization, we could write:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-19_2f7a27b6835fc87ea3b50b8ff4096b8e'}\n\n```{.r .cell-code}\nnull_f_distn <- gss %>%\n specify(age ~ partyid) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(stat = \"F\")\n```\n:::\n\n\nTo find the null distribution using theory-based methods, instead, skip the `generate()` step entirely:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-20_b98392f4eeae7dea46b040259fc5ad3b'}\n\n```{.r .cell-code}\nnull_f_distn_theoretical <- gss %>%\n specify(age ~ partyid) %>%\n hypothesize(null = \"independence\") %>%\n calculate(stat = \"F\")\n```\n:::\n\n\nWe'll calculate the observed statistic to make use of in the following visualizations; this procedure is the same, regardless of the methods used to find the null distribution.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-21_28c37abc474e90d7d749b2f6c1edc10c'}\n\n```{.r .cell-code}\nF_hat <- gss %>% \n specify(age ~ partyid) %>%\n calculate(stat = \"F\")\n```\n:::\n\n\nNow, instead of just piping the null distribution into `visualize()`, as we would do if we wanted to visualize the randomization-based null distribution, we also need to provide `method = \"theoretical\"` to `visualize()`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-22_60509d3e0d1bde3fe12c5c3e36e2d147'}\n\n```{.r .cell-code}\nvisualize(null_f_distn_theoretical, method = \"theoretical\") +\n shade_p_value(obs_stat = F_hat, direction = \"greater\")\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-22-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nTo get a sense of how the theory-based and randomization-based null distributions relate, we can pipe the randomization-based null distribution into `visualize()` and also specify `method = \"both\"`\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-23_ce929629fc6341f9bfc62dd262c19788'}\n\n```{.r .cell-code}\nvisualize(null_f_distn, method = \"both\") +\n shade_p_value(obs_stat = F_hat, direction = \"greater\")\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-23-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThat's it! This vignette covers most all of the key functionality of infer. See `help(package = \"infer\")` for a full list of functions and vignettes.\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/statistics/k-means/index/execute-results/html.json b/_freeze/learn/statistics/k-means/index/execute-results/html.json new file mode 100644 index 00000000..fcaf76e9 --- /dev/null +++ b/_freeze/learn/statistics/k-means/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "34ddaefaa5f3589af9ca85318ae03da4", + "result": { + "markdown": "---\ntitle: \"K-means clustering with tidy data principles\"\ncategories:\n - statistical analysis\n - clustering\n - tidying results\ntype: learn-subsection\nweight: 2\ndescription: | \n Summarize clustering characteristics and estimate the best number of clusters for a data set.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nThis article only requires the tidymodels package.\n\nK-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions: \n\n- `tidy()`\n- `augment()` \n- `glance()`\n\nLet's start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_da2530708f5dfb4a750218471fffffbf'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\n\nset.seed(27)\n\ncenters <- tibble(\n cluster = factor(1:3), \n num_points = c(100, 150, 50), # number points in each cluster\n x1 = c(5, 0, -3), # x1 coordinate of cluster center\n x2 = c(-1, 1, -2) # x2 coordinate of cluster center\n)\n\nlabelled_points <- \n centers %>%\n mutate(\n x1 = map2(num_points, x1, rnorm),\n x2 = map2(num_points, x2, rnorm)\n ) %>% \n select(-num_points) %>% \n unnest(cols = c(x1, x2))\n\nggplot(labelled_points, aes(x1, x2, color = cluster)) +\n geom_point(alpha = 0.3)\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-3-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThis is an ideal case for k-means clustering. \n\n## How does K-means work?\n\nRather than using equations, this short animation using the [artwork](https://github.com/allisonhorst/stats-illustrations) of Allison Horst explains the clustering process:\n\n\n::: {.cell layout-align=\"center\" hash='cache/illustrations_ac33bdbf9aa409a303f4884ea81d9606'}\n![](kmeans.gif){fig-align='center'}\n:::\n\n\n## Clustering in R\n\nWe'll use the built-in `kmeans()` function, which accepts a data frame with all numeric columns as it's primary argument.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-5_d5799415ae2a6c1256f67a67ce3a5e21'}\n\n```{.r .cell-code}\npoints <- \n labelled_points %>% \n select(-cluster)\n\nkclust <- kmeans(points, centers = 3)\nkclust\n#> K-means clustering with 3 clusters of sizes 148, 51, 101\n#> \n#> Cluster means:\n#> x1 x2\n#> 1 0.08853475 1.045461\n#> 2 -3.14292460 -2.000043\n#> 3 5.00401249 -1.045811\n#> \n#> Clustering vector:\n#> [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3\n#> [38] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3\n#> [75] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1\n#> [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n#> [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n#> [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n#> [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2\n#> [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n#> [297] 2 2 2 2\n#> \n#> Within cluster sum of squares by cluster:\n#> [1] 298.9415 108.8112 243.2092\n#> (between_SS / total_SS = 82.5 %)\n#> \n#> Available components:\n#> \n#> [1] \"cluster\" \"centers\" \"totss\" \"withinss\" \"tot.withinss\"\n#> [6] \"betweenss\" \"size\" \"iter\" \"ifault\"\nsummary(kclust)\n#> Length Class Mode \n#> cluster 300 -none- numeric\n#> centers 6 -none- numeric\n#> totss 1 -none- numeric\n#> withinss 3 -none- numeric\n#> tot.withinss 1 -none- numeric\n#> betweenss 1 -none- numeric\n#> size 3 -none- numeric\n#> iter 1 -none- numeric\n#> ifault 1 -none- numeric\n```\n:::\n\n\nThe output is a list of vectors, where each component has a different length. There's one of length 300, the same as our original data set. There are two elements of length 3 (`withinss` and `tot.withinss`) and `centers` is a matrix with 3 rows. And then there are the elements of length 1: `totss`, `tot.withinss`, `betweenss`, and `iter`. (The value `ifault` indicates possible algorithm problems.)\n\nThese differing lengths have important meaning when we want to tidy our data set; they signify that each type of component communicates a *different kind* of information.\n\n- `cluster` (300 values) contains information about each *point*\n- `centers`, `withinss`, and `size` (3 values) contain information about each *cluster*\n- `totss`, `tot.withinss`, `betweenss`, and `iter` (1 value) contain information about the *full clustering*\n\nWhich of these do we want to extract? There is no right answer; each of them may be interesting to an analyst. Because they communicate entirely different information (not to mention there's no straightforward way to combine them), they are extracted by separate functions. `augment` adds the point classifications to the original data set:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-6_d3717c89fa92a77f402386f99eef2483'}\n\n```{.r .cell-code}\naugment(kclust, points)\n#> # A tibble: 300 × 3\n#> x1 x2 .cluster\n#> \n#> 1 6.91 -2.74 3 \n#> 2 6.14 -2.45 3 \n#> 3 4.24 -0.946 3 \n#> 4 3.54 0.287 3 \n#> 5 3.91 0.408 3 \n#> 6 5.30 -1.58 3 \n#> 7 5.01 -1.77 3 \n#> 8 6.16 -1.68 3 \n#> 9 7.13 -2.17 3 \n#> 10 5.24 -2.42 3 \n#> # ℹ 290 more rows\n```\n:::\n\n\nThe `tidy()` function summarizes on a per-cluster level:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-7_0a4cf01e15081ad3264ba86f6be73b57'}\n\n```{.r .cell-code}\ntidy(kclust)\n#> # A tibble: 3 × 5\n#> x1 x2 size withinss cluster\n#> \n#> 1 0.0885 1.05 148 299. 1 \n#> 2 -3.14 -2.00 51 109. 2 \n#> 3 5.00 -1.05 101 243. 3\n```\n:::\n\n\nAnd as it always does, the `glance()` function extracts a single-row summary:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-8_8f7a9984fb72d6f34b4de07a0298ed4a'}\n\n```{.r .cell-code}\nglance(kclust)\n#> # A tibble: 1 × 4\n#> totss tot.withinss betweenss iter\n#> \n#> 1 3724. 651. 3073. 2\n```\n:::\n\n\n## Exploratory clustering\n\nWhile these summaries are useful, they would not have been too difficult to extract out from the data set yourself. The real power comes from combining these analyses with other tools like [dplyr](https://dplyr.tidyverse.org/).\n\nLet's say we want to explore the effect of different choices of `k`, from 1 to 9, on this clustering. First cluster the data 9 times, each using a different value of `k`, then create columns containing the tidied, glanced and augmented data:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-9_8a3b763e4cfe1ee19371d93a101e539b'}\n\n```{.r .cell-code}\nkclusts <- \n tibble(k = 1:9) %>%\n mutate(\n kclust = map(k, ~kmeans(points, .x)),\n tidied = map(kclust, tidy),\n glanced = map(kclust, glance),\n augmented = map(kclust, augment, points)\n )\n\nkclusts\n#> # A tibble: 9 × 5\n#> k kclust tidied glanced augmented \n#> \n#> 1 1 \n#> 2 2 \n#> 3 3 \n#> 4 4 \n#> 5 5 \n#> 6 6 \n#> 7 7 \n#> 8 8 \n#> 9 9 \n```\n:::\n\n\nWe can turn these into three separate data sets each representing a different type of data: using `tidy()`, using `augment()`, and using `glance()`. Each of these goes into a separate data set as they represent different types of data.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-10_4ee4df38af9a1ae909e447f414ee89d3'}\n\n```{.r .cell-code}\nclusters <- \n kclusts %>%\n unnest(cols = c(tidied))\n\nassignments <- \n kclusts %>% \n unnest(cols = c(augmented))\n\nclusterings <- \n kclusts %>%\n unnest(cols = c(glanced))\n```\n:::\n\n\nNow we can plot the original points using the data from `augment()`, with each point colored according to the predicted cluster.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-11_3abb5db4f373908d0863fd146b176426'}\n\n```{.r .cell-code}\np1 <- \n ggplot(assignments, aes(x = x1, y = x2)) +\n geom_point(aes(color = .cluster), alpha = 0.8) + \n facet_wrap(~ k)\np1\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-11-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nAlready we get a good sense of the proper number of clusters (3), and how the k-means algorithm functions when `k` is too high or too low. We can then add the centers of the cluster using the data from `tidy()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-12_49aab4b0d7329a8754bd357b1756a811'}\n\n```{.r .cell-code}\np2 <- p1 + geom_point(data = clusters, size = 10, shape = \"x\")\np2\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-12-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThe data from `glance()` fills a different but equally important purpose; it lets us view trends of some summary statistics across values of `k`. Of particular interest is the total within sum of squares, saved in the `tot.withinss` column.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-13_82d1ec1e6969b8d13f5406447ac5ea8c'}\n\n```{.r .cell-code}\nggplot(clusterings, aes(k, tot.withinss)) +\n geom_line() +\n geom_point()\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-13-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThis represents the variance within the clusters. It decreases as `k` increases, but notice a bend (or \"elbow\") around `k = 3`. This bend indicates that additional clusters beyond the third have little value. (See [here](https://web.stanford.edu/~hastie/Papers/gap.pdf) for a more mathematically rigorous interpretation and implementation of this method). Thus, all three methods of tidying data provided by broom are useful for summarizing clustering output.\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/statistics/tidy-analysis/index/execute-results/html.json b/_freeze/learn/statistics/tidy-analysis/index/execute-results/html.json new file mode 100644 index 00000000..283273f6 --- /dev/null +++ b/_freeze/learn/statistics/tidy-analysis/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "9dd5f07df661cda40b8c331c9af79966", + "result": { + "markdown": "---\ntitle: \"Correlation and regression fundamentals with tidy data principles\"\ncategories:\n - statistical analysis\n - correlation\n - tidying results\ntype: learn-subsection\nweight: 1\ndescription: | \n Analyze the results of correlation tests and simple regression models for many data sets at once.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nThis article only requires the tidymodels package.\n\nWhile the tidymodels package [broom](https://broom.tidyverse.org/) is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the `nest()/unnest()` functions from [tidyr](https://tidyr.tidyverse.org/) and the `map()` function in [purrr](https://purrr.tidyverse.org/).\n\n## Correlation analysis\n\nLet's demonstrate this with a simple data set, the built-in `Orange`. We start by coercing `Orange` to a `tibble`. This gives a nicer print method that will be especially useful later on when we start working with list-columns.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_b8c28b0b2e42595af6cd6e42e320211e'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\n\ndata(Orange)\n\nOrange <- as_tibble(Orange)\nOrange\n#> # A tibble: 35 × 3\n#> Tree age circumference\n#> \n#> 1 1 118 30\n#> 2 1 484 58\n#> 3 1 664 87\n#> 4 1 1004 115\n#> 5 1 1231 120\n#> 6 1 1372 142\n#> 7 1 1582 145\n#> 8 2 118 33\n#> 9 2 484 69\n#> 10 2 664 111\n#> # ℹ 25 more rows\n```\n:::\n\n\nThis contains 35 observations of three variables: `Tree`, `age`, and `circumference`. `Tree` is a factor with five levels describing five trees. As might be expected, age and circumference are correlated:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-4_d23aebe621890c33deabf5c88ab2e2e7'}\n\n```{.r .cell-code}\ncor(Orange$age, Orange$circumference)\n#> [1] 0.9135189\n\nlibrary(ggplot2)\n\nggplot(Orange, aes(age, circumference, color = Tree)) +\n geom_line()\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-4-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nSuppose you want to test for correlations individually *within* each tree. You can do this with dplyr's `group_by`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-5_91dbe7c00d07ed20010e073d66c2fdda'}\n\n```{.r .cell-code}\nOrange %>% \n group_by(Tree) %>%\n summarize(correlation = cor(age, circumference))\n#> # A tibble: 5 × 2\n#> Tree correlation\n#> \n#> 1 3 0.988\n#> 2 1 0.985\n#> 3 5 0.988\n#> 4 2 0.987\n#> 5 4 0.984\n```\n:::\n\n\n(Note that the correlations are much higher than the aggregated one, and also we can now see the correlation is similar across trees).\n\nSuppose that instead of simply estimating a correlation, we want to perform a hypothesis test with `cor.test()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-6_46fa9b020ddb8990c5c81c2814b26dca'}\n\n```{.r .cell-code}\nct <- cor.test(Orange$age, Orange$circumference)\nct\n#> \n#> \tPearson's product-moment correlation\n#> \n#> data: Orange$age and Orange$circumference\n#> t = 12.9, df = 33, p-value = 1.931e-14\n#> alternative hypothesis: true correlation is not equal to 0\n#> 95 percent confidence interval:\n#> 0.8342364 0.9557955\n#> sample estimates:\n#> cor \n#> 0.9135189\n```\n:::\n\n\nThis test output contains multiple values we may be interested in. Some are vectors of length 1, such as the p-value and the estimate, and some are longer, such as the confidence interval. We can get this into a nicely organized tibble using the `tidy()` function:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-7_e76d10df195a88e005ab096c3a46fd52'}\n\n```{.r .cell-code}\ntidy(ct)\n#> # A tibble: 1 × 8\n#> estimate statistic p.value parameter conf.low conf.high method alternative\n#> \n#> 1 0.914 12.9 1.93e-14 33 0.834 0.956 Pearson'… two.sided\n```\n:::\n\n\nOften, we want to perform multiple tests or fit multiple models, each on a different part of the data. In this case, we recommend a `nest-map-unnest` workflow. For example, suppose we want to perform correlation tests for each different tree. We start by `nest`ing our data based on the group of interest:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-8_3a8c5c9b027a3b1290a315655286c386'}\n\n```{.r .cell-code}\nnested <- \n Orange %>% \n nest(data = c(age, circumference))\n```\n:::\n\n\nThen we perform a correlation test for each nested tibble using `purrr::map()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-9_99179b6d70604b3edd0d0107b29bea04'}\n\n```{.r .cell-code}\nnested %>% \n mutate(test = map(data, ~ cor.test(.x$age, .x$circumference)))\n#> # A tibble: 5 × 3\n#> Tree data test \n#> \n#> 1 1 \n#> 2 2 \n#> 3 3 \n#> 4 4 \n#> 5 5 \n```\n:::\n\n\nThis results in a list-column of S3 objects. We want to tidy each of the objects, which we can also do with `map()`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-10_1d2edd8b12d02933b931669b101d050e'}\n\n```{.r .cell-code}\nnested %>% \n mutate(\n test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col\n tidied = map(test, tidy)\n ) \n#> # A tibble: 5 × 4\n#> Tree data test tidied \n#> \n#> 1 1 \n#> 2 2 \n#> 3 3 \n#> 4 4 \n#> 5 5 \n```\n:::\n\n\nFinally, we want to unnest the tidied data frames so we can see the results in a flat tibble. All together, this looks like:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-11_48290f0abce3aa8054e3b335ae75bfdf'}\n\n```{.r .cell-code}\nOrange %>% \n nest(data = c(age, circumference)) %>% \n mutate(\n test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col\n tidied = map(test, tidy)\n ) %>% \n unnest(cols = tidied) %>% \n select(-data, -test)\n#> # A tibble: 5 × 9\n#> Tree estimate statistic p.value parameter conf.low conf.high method \n#> \n#> 1 1 0.985 13.0 0.0000485 5 0.901 0.998 Pearson's pro…\n#> 2 2 0.987 13.9 0.0000343 5 0.914 0.998 Pearson's pro…\n#> 3 3 0.988 14.4 0.0000290 5 0.919 0.998 Pearson's pro…\n#> 4 4 0.984 12.5 0.0000573 5 0.895 0.998 Pearson's pro…\n#> 5 5 0.988 14.1 0.0000318 5 0.916 0.998 Pearson's pro…\n#> # ℹ 1 more variable: alternative \n```\n:::\n\n\n## Regression models\n\nThis type of workflow becomes even more useful when applied to regressions. Untidy output for a regression looks like:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-12_b1826e99afb5e6d9f52a61ffc0f64eb3'}\n\n```{.r .cell-code}\nlm_fit <- lm(age ~ circumference, data = Orange)\nsummary(lm_fit)\n#> \n#> Call:\n#> lm(formula = age ~ circumference, data = Orange)\n#> \n#> Residuals:\n#> Min 1Q Median 3Q Max \n#> -317.88 -140.90 -17.20 96.54 471.16 \n#> \n#> Coefficients:\n#> Estimate Std. Error t value Pr(>|t|) \n#> (Intercept) 16.6036 78.1406 0.212 0.833 \n#> circumference 7.8160 0.6059 12.900 1.93e-14 ***\n#> ---\n#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#> \n#> Residual standard error: 203.1 on 33 degrees of freedom\n#> Multiple R-squared: 0.8345,\tAdjusted R-squared: 0.8295 \n#> F-statistic: 166.4 on 1 and 33 DF, p-value: 1.931e-14\n```\n:::\n\n\nWhen we tidy these results, we get multiple rows of output for each model:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-13_3097cfaa120e9f8a9c38220653816d1e'}\n\n```{.r .cell-code}\ntidy(lm_fit)\n#> # A tibble: 2 × 5\n#> term estimate std.error statistic p.value\n#> \n#> 1 (Intercept) 16.6 78.1 0.212 8.33e- 1\n#> 2 circumference 7.82 0.606 12.9 1.93e-14\n```\n:::\n\n\nNow we can handle multiple regressions at once using exactly the same workflow as before:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-14_8e75b7bb2abc579c067c0c472239046f'}\n\n```{.r .cell-code}\nOrange %>%\n nest(data = c(-Tree)) %>% \n mutate(\n fit = map(data, ~ lm(age ~ circumference, data = .x)),\n tidied = map(fit, tidy)\n ) %>% \n unnest(tidied) %>% \n select(-data, -fit)\n#> # A tibble: 10 × 6\n#> Tree term estimate std.error statistic p.value\n#> \n#> 1 1 (Intercept) -265. 98.6 -2.68 0.0436 \n#> 2 1 circumference 11.9 0.919 13.0 0.0000485\n#> 3 2 (Intercept) -132. 83.1 -1.59 0.172 \n#> 4 2 circumference 7.80 0.560 13.9 0.0000343\n#> 5 3 (Intercept) -210. 85.3 -2.46 0.0574 \n#> 6 3 circumference 12.0 0.835 14.4 0.0000290\n#> 7 4 (Intercept) -76.5 88.3 -0.867 0.426 \n#> 8 4 circumference 7.17 0.572 12.5 0.0000573\n#> 9 5 (Intercept) -54.5 76.9 -0.709 0.510 \n#> 10 5 circumference 8.79 0.621 14.1 0.0000318\n```\n:::\n\n\nYou can just as easily use multiple predictors in the regressions, as shown here on the `mtcars` dataset. We nest the data into automatic vs. manual cars (the `am` column), then perform the regression within each nested tibble.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-15_b9dc82496897bd3f80514e02077a3dd8'}\n\n```{.r .cell-code}\ndata(mtcars)\nmtcars <- as_tibble(mtcars) # to play nicely with list-cols\nmtcars\n#> # A tibble: 32 × 11\n#> mpg cyl disp hp drat wt qsec vs am gear carb\n#> \n#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4\n#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4\n#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1\n#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1\n#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2\n#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1\n#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4\n#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2\n#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2\n#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4\n#> # ℹ 22 more rows\n\nmtcars %>%\n nest(data = c(-am)) %>% \n mutate(\n fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), # S3 list-col\n tidied = map(fit, tidy)\n ) %>% \n unnest(tidied) %>% \n select(-data, -fit)\n#> # A tibble: 8 × 6\n#> am term estimate std.error statistic p.value\n#> \n#> 1 1 (Intercept) 4.28 3.46 1.24 0.247 \n#> 2 1 mpg -0.101 0.0294 -3.43 0.00750 \n#> 3 1 qsec 0.0398 0.151 0.264 0.798 \n#> 4 1 gear -0.0229 0.349 -0.0656 0.949 \n#> 5 0 (Intercept) 4.92 1.40 3.52 0.00309 \n#> 6 0 mpg -0.192 0.0443 -4.33 0.000591\n#> 7 0 qsec 0.0919 0.0983 0.935 0.365 \n#> 8 0 gear 0.147 0.368 0.398 0.696\n```\n:::\n\n\nWhat if you want not just the `tidy()` output, but the `augment()` and `glance()` outputs as well, while still performing each regression only once? Since we're using list-columns, we can just fit the model once and use multiple list-columns to store the tidied, glanced and augmented outputs.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-16_bfe852ae3caea34b424dc7d4ad083632'}\n\n```{.r .cell-code}\nregressions <- \n mtcars %>%\n nest(data = c(-am)) %>% \n mutate(\n fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)),\n tidied = map(fit, tidy),\n glanced = map(fit, glance),\n augmented = map(fit, augment)\n )\n\nregressions %>% \n select(tidied) %>% \n unnest(tidied)\n#> # A tibble: 8 × 5\n#> term estimate std.error statistic p.value\n#> \n#> 1 (Intercept) 4.28 3.46 1.24 0.247 \n#> 2 mpg -0.101 0.0294 -3.43 0.00750 \n#> 3 qsec 0.0398 0.151 0.264 0.798 \n#> 4 gear -0.0229 0.349 -0.0656 0.949 \n#> 5 (Intercept) 4.92 1.40 3.52 0.00309 \n#> 6 mpg -0.192 0.0443 -4.33 0.000591\n#> 7 qsec 0.0919 0.0983 0.935 0.365 \n#> 8 gear 0.147 0.368 0.398 0.696\n\nregressions %>% \n select(glanced) %>% \n unnest(glanced)\n#> # A tibble: 2 × 12\n#> r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC\n#> \n#> 1 0.833 0.778 0.291 15.0 0.000759 3 -0.00580 10.0 12.8\n#> 2 0.625 0.550 0.522 8.32 0.00170 3 -12.4 34.7 39.4\n#> # ℹ 3 more variables: deviance , df.residual , nobs \n\nregressions %>% \n select(augmented) %>% \n unnest(augmented)\n#> # A tibble: 32 × 10\n#> wt mpg qsec gear .fitted .resid .hat .sigma .cooksd .std.resid\n#> \n#> 1 2.62 21 16.5 4 2.73 -0.107 0.517 0.304 0.0744 -0.527 \n#> 2 2.88 21 17.0 4 2.75 0.126 0.273 0.304 0.0243 0.509 \n#> 3 2.32 22.8 18.6 4 2.63 -0.310 0.312 0.279 0.188 -1.29 \n#> 4 2.2 32.4 19.5 4 1.70 0.505 0.223 0.233 0.278 1.97 \n#> 5 1.62 30.4 18.5 4 1.86 -0.244 0.269 0.292 0.0889 -0.982 \n#> 6 1.84 33.9 19.9 4 1.56 0.274 0.286 0.286 0.125 1.12 \n#> 7 1.94 27.3 18.9 4 2.19 -0.253 0.151 0.293 0.0394 -0.942 \n#> 8 2.14 26 16.7 5 2.21 -0.0683 0.277 0.307 0.00732 -0.276 \n#> 9 1.51 30.4 16.9 5 1.77 -0.259 0.430 0.284 0.263 -1.18 \n#> 10 3.17 15.8 14.5 5 3.15 0.0193 0.292 0.308 0.000644 0.0789\n#> # ℹ 22 more rows\n```\n:::\n\n\nBy combining the estimates and p-values across all groups into the same tidy data frame (instead of a list of output model objects), a new class of analyses and visualizations becomes straightforward. This includes:\n\n- sorting by p-value or estimate to find the most significant terms across all tests,\n- p-value histograms, and\n- volcano plots comparing p-values to effect size estimates.\n\nIn each of these cases, we can easily filter, facet, or distinguish based on the `term` column. In short, this makes the tools of tidy data analysis available for the *results* of data analysis and models, not just the inputs.\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/statistics/xtabs/index/execute-results/html.json b/_freeze/learn/statistics/xtabs/index/execute-results/html.json new file mode 100644 index 00000000..1eda3879 --- /dev/null +++ b/_freeze/learn/statistics/xtabs/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "33fb39003f1fa94a3679e6c404faee52", + "result": { + "markdown": "---\ntitle: \"Statistical analysis of contingency tables\"\ncategories:\n - statistical analysis\n - analysis of tables\n - hypothesis testing\ntype: learn-subsection\nweight: 5\ndescription: | \n Use tests of independence and goodness of fit to analyze tables of counts.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n\n## Introduction\n\nThis article only requires that you have the tidymodels package installed.\n\nIn this vignette, we'll walk through conducting a $\\chi^2$ (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We'll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we'll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution.\n\nThroughout this vignette, we'll make use of the `ad_data` data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from [Craig-Schapiro _et al_ (2011)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3079734/). See `?ad_data` for more information on the variables included and their source. One of the main research questions in these data were how a person's genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows: \n\n\n::: {.cell layout-align=\"center\" hash='cache/glimpse-ad_data-actual_94d0250e8076cbdabed74e9980d3fa0c'}\n\n```{.r .cell-code}\nlibrary(tidymodels) # Includes the infer package\n\ndata(ad_data, package = \"modeldata\")\nad_data %>%\n select(Genotype, Class)\n#> # A tibble: 333 × 2\n#> Genotype Class \n#> \n#> 1 E3E3 Control \n#> 2 E3E4 Control \n#> 3 E3E4 Control \n#> 4 E3E4 Control \n#> 5 E3E3 Control \n#> 6 E4E4 Impaired\n#> 7 E2E3 Control \n#> 8 E2E3 Control \n#> 9 E3E3 Control \n#> 10 E2E3 Impaired\n#> # ℹ 323 more rows\n```\n:::\n\n\nThe three main genetic variants are called E2, E3, and E4. The values in `Genotype` represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of \"E2E4\" means E2 from one parent and E4 from the other). \n\n## Test of independence\n\nTo carry out a chi-squared test of independence, we'll examine the association between their cognitive ability (impaired and healthy) and the genetic makeup. This is what the relationship looks like in the sample data:\n\n\n::: {.cell layout-align=\"center\" hash='cache/plot-indep_7b4b4278ee24551267ebbc164a3d5901'}\n::: {.cell-output-display}\n![](figs/plot-indep-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nIf there were no relationship, we would expect to see the purple bars reaching to the same length, regardless of cognitive ability. Are the differences we see here, though, just due to random noise?\n\nFirst, to calculate the observed statistic, we can use `specify()` and `calculate()`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/calc-obs-stat-indep_5b5c2708335eaa5642566a36bc8607a8'}\n\n```{.r .cell-code}\n# calculate the observed statistic\nobserved_indep_statistic <- ad_data %>%\n specify(Genotype ~ Class) %>%\n calculate(stat = \"Chisq\")\n```\n:::\n\n\nThe observed $\\chi^2$ statistic is 21.5774809. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between cognitive ability and genetics.\n\nWe can `generate()` the null distribution in one of two ways: using randomization or theory-based methods. The randomization approach permutes the response and explanatory variables, so that each person's genetics is matched up with a random cognitive rating from the sample in order to break up any association between the two.\n\n\n::: {.cell layout-align=\"center\" hash='cache/generate-null-indep_f0c4dc6d3909bf492fff7fec016efe78'}\n\n```{.r .cell-code}\n# generate the null distribution using randomization\nnull_distribution_simulated <- ad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(stat = \"Chisq\")\n```\n:::\n\n\nNote that, in the line `specify(Genotype ~ Class)` above, we could use the equivalent syntax `specify(response = Genotype, explanatory = Class)`. The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization.\n\n\n::: {.cell layout-align=\"center\" hash='cache/generate-null-indep-t_caa67003246e478f63031c6cfddd20db'}\n\n```{.r .cell-code}\n# generate the null distribution by theoretical approximation\nnull_distribution_theoretical <- ad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n # note that we skip the generation step here!\n calculate(stat = \"Chisq\")\n```\n:::\n\n\nTo get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/visualize-indep_f2f200d811183a0c4cb1c5db5a6887da'}\n\n```{.r .cell-code}\n# visualize the null distribution and test statistic!\nnull_distribution_simulated %>%\n visualize() + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\n::: {.cell-output-display}\n![](figs/visualize-indep-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nWe could also visualize the observed statistic against the theoretical null distribution. Note that we skip the `generate()` and `calculate()` steps when using the theoretical approach, and that we now need to provide `method = \"theoretical\"` to `visualize()`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/visualize-indep-theor_69d67e24d9d20b2cc417500817530986'}\n\n```{.r .cell-code}\n# visualize the theoretical null distribution and test statistic!\nad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n visualize(method = \"theoretical\") + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\n::: {.cell-output-display}\n![](figs/visualize-indep-theor-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nTo visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and further provide `method = \"both\"`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/visualize-indep-both_de4ea057f276adae65c0500f0b101204'}\n\n```{.r .cell-code}\n# visualize both null distributions and the test statistic!\nnull_distribution_simulated %>%\n visualize(method = \"both\") + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n```\n\n::: {.cell-output-display}\n![](figs/visualize-indep-both-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nEither way, it looks like our observed test statistic would be fairly unlikely if there were actually no association between cognition and genotype. More exactly, we can calculate the p-value:\n\n\n::: {.cell layout-align=\"center\" hash='cache/p-value-indep_c4fc49cb845ca2d17eb8b90ad1415c5f'}\n\n```{.r .cell-code}\n# calculate the p value from the observed statistic and null distribution\np_value_independence <- null_distribution_simulated %>%\n get_p_value(obs_stat = observed_indep_statistic,\n direction = \"greater\")\n\np_value_independence\n#> # A tibble: 1 × 1\n#> p_value\n#> \n#> 1 0.0008\n```\n:::\n\n\nThus, if there were really no relationship between cognition and genotype, the probability that we would see a statistic as or more extreme than 21.5774809 is approximately 8\\times 10^{-4}.\n\nNote that, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this:\n\n\n::: {.cell layout-align=\"center\" hash='cache/chisq-indep-wrapper_5a5fe1de95b0fa9f10274adace466c56'}\n\n```{.r .cell-code}\nchisq_test(ad_data, Genotype ~ Class)\n#> # A tibble: 1 × 3\n#> statistic chisq_df p_value\n#> \n#> 1 21.6 5 0.000630\n```\n:::\n\n\n\n## Goodness of fit\n\nNow, moving on to a chi-squared goodness of fit test, we'll take a look at just the genotype data. Many papers have investigated the relationship of Apolipoprotein E to diseases. For example, [Song _et al_ (2004)](https://annals.org/aim/article-abstract/717641/meta-analysis-apolipoprotein-e-genotypes-risk-coronary-heart-disease) conducted a meta-analysis of numerous studies that looked at this gene and heart disease. In their paper, they describe the frequency of the different genotypes across many samples. For the cognition study, it might be interesting to see if our sample of genotypes was consistent with this literature (treating the rates, for this analysis, as known). \n\nThe rates of the meta-analysis and our observed data are: \n \n\n::: {.cell layout-align=\"center\" hash='cache/rates_08b62e13f32945d5639e4fa0a3e7650e'}\n\n```{.r .cell-code}\n# Song, Y., Stampfer, M. J., & Liu, S. (2004). Meta-Analysis: Apolipoprotein E \n# Genotypes and Risk for Coronary Heart Disease. Annals of Internal Medicine, \n# 141(2), 137.\nmeta_rates <- c(\"E2E2\" = 0.71, \"E2E3\" = 11.4, \"E2E4\" = 2.32,\n \"E3E3\" = 61.0, \"E3E4\" = 22.6, \"E4E4\" = 2.22)\nmeta_rates <- meta_rates/sum(meta_rates) # these add up to slightly > 100%\n\nobs_rates <- table(ad_data$Genotype)/nrow(ad_data)\nround(cbind(obs_rates, meta_rates) * 100, 2)\n#> obs_rates meta_rates\n#> E2E2 0.60 0.71\n#> E2E3 11.11 11.37\n#> E2E4 2.40 2.31\n#> E3E3 50.15 60.85\n#> E3E4 31.83 22.54\n#> E4E4 3.90 2.21\n```\n:::\n\n\nSuppose our null hypothesis is that `Genotype` follows the same frequency distribution as the meta-analysis. Lets now test whether this difference in distributions is statistically significant.\n\nFirst, to carry out this hypothesis test, we would calculate our observed statistic.\n\n\n::: {.cell layout-align=\"center\" hash='cache/observed-gof-statistic_e91fd2fa77ac6bf08b1757bfed1f3f5d'}\n\n```{.r .cell-code}\n# calculating the null distribution\nobserved_gof_statistic <- ad_data %>%\n specify(response = Genotype) %>%\n hypothesize(null = \"point\", p = meta_rates) %>%\n calculate(stat = \"Chisq\")\n```\n:::\n\n\nThe observed statistic is 23.3838483. Now, generating a null distribution, by just dropping in a call to `generate()`:\n\n\n\n::: {.cell layout-align=\"center\" hash='cache/null-distribution-gof_bb60d11813808ededfda9e5bf2d1b026'}\n\n```{.r .cell-code}\n# generating a null distribution\nnull_distribution_gof <- ad_data %>%\n specify(response = Genotype) %>%\n hypothesize(null = \"point\", p = meta_rates) %>%\n generate(reps = 5000, type = \"simulate\") %>%\n calculate(stat = \"Chisq\")\n```\n:::\n\n\nAgain, to get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/visualize-indep-gof_4d066a35c73c809812320d0660a99d40'}\n\n```{.r .cell-code}\n# visualize the null distribution and test statistic!\nnull_distribution_gof %>%\n visualize() + \n shade_p_value(observed_gof_statistic,\n direction = \"greater\")\n```\n\n::: {.cell-output-display}\n![](figs/visualize-indep-gof-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThis statistic seems like it would be unlikely if our rates were the same as the rates from the meta-analysis! How unlikely, though? Calculating the p-value:\n\n\n::: {.cell layout-align=\"center\" hash='cache/get-p-value-gof_3b42f07bf93dda8fb8c6c6fdec48548d'}\n\n```{.r .cell-code}\n# calculate the p-value\np_value_gof <- null_distribution_gof %>%\n get_p_value(observed_gof_statistic,\n direction = \"greater\")\n\np_value_gof\n#> # A tibble: 1 × 1\n#> p_value\n#> \n#> 1 0.0008\n```\n:::\n\n\nThus, if each genotype occurred at the same rate as the Song paper, the probability that we would see a distribution like the one we did is approximately 8\\times 10^{-4}.\n\nAgain, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out chi-squared goodness of fit tests on tidy data. The syntax goes like this:\n\n\n::: {.cell layout-align=\"center\" hash='cache/chisq-gof-wrapper_7462abfdbc94485cd21a800c19791b3f'}\n\n```{.r .cell-code}\nchisq_test(ad_data, response = Genotype, p = meta_rates)\n#> # A tibble: 1 × 3\n#> statistic chisq_df p_value\n#> \n#> 1 23.4 5 0.000285\n```\n:::\n\n\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/work/bayes-opt/index/execute-results/html.json b/_freeze/learn/work/bayes-opt/index/execute-results/html.json new file mode 100644 index 00000000..7d6d0891 --- /dev/null +++ b/_freeze/learn/work/bayes-opt/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "abb72452b57a3591f2c7e824be72e0a8", + "result": { + "markdown": "---\ntitle: \"Iterative Bayesian optimization of a classification model\"\ncategories:\n - model tuning\n - Bayesian optimization\n - SVMs\ntype: learn-subsection\nweight: 3\ndescription: | \n Identify the best hyperparameters for a model using Bayesian optimization of iterative search.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: kernlab, modeldata, themis, and tidymodels.\n\nMany of the examples for model tuning focus on [grid search](/learn/work/tune-svm/). For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, _iterative search_ can be used to analyze the existing tuning parameter results and then _predict_ which tuning parameters to try next. \n\nThere are a variety of methods for iterative search and the focus in this article is on _Bayesian optimization_. For more information on this method, these resources might be helpful:\n\n* [_Practical bayesian optimization of machine learning algorithms_](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=Practical+Bayesian+Optimization+of+Machine+Learning+Algorithms&btnG=) (2012). J Snoek, H Larochelle, and RP Adams. Advances in neural information. \n\n* [_A Tutorial on Bayesian Optimization for Machine Learning_](https://www.cs.toronto.edu/~rgrosse/courses/csc411_f18/tutorials/tut8_adams_slides.pdf) (2018). R Adams.\n\n * [_Gaussian Processes for Machine Learning_](http://www.gaussianprocess.org/gpml/) (2006). C E Rasmussen and C Williams.\n\n* [Other articles!](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=\"Bayesian+Optimization\"&btnG=)\n\n\n## Cell segmenting revisited\n\nTo demonstrate this approach to tuning models, let's return to the cell segmentation data from the [Getting Started](/start/resampling/) article on resampling: \n\n\n::: {.cell layout-align=\"center\" hash='cache/import-data_1529c5bf7506d469f00dc2d2a642ee81'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\nlibrary(modeldata)\n\n# Load data\ndata(cells)\n\nset.seed(2369)\ntr_te_split <- initial_split(cells %>% select(-case), prop = 3/4)\ncell_train <- training(tr_te_split)\ncell_test <- testing(tr_te_split)\n\nset.seed(1697)\nfolds <- vfold_cv(cell_train, v = 10)\n```\n:::\n\n\n## The tuning scheme\n\nSince the predictors are highly correlated, we can used a recipe to convert the original predictors to principal component scores. There is also slight class imbalance in these data; about 64% of the data are poorly segmented. To mitigate this, the data will be down-sampled at the end of the pre-processing so that the number of poorly and well segmented cells occur with equal frequency. We can use a recipe for all this pre-processing, but the number of principal components will need to be _tuned_ so that we have enough (but not too many) representations of the data. \n\n\n::: {.cell layout-align=\"center\" hash='cache/recipe_3a94713e4733aa81f01793825603ac74'}\n\n```{.r .cell-code}\nlibrary(themis)\n\ncell_pre_proc <-\n recipe(class ~ ., data = cell_train) %>%\n step_YeoJohnson(all_predictors()) %>%\n step_normalize(all_predictors()) %>%\n step_pca(all_predictors(), num_comp = tune()) %>%\n step_downsample(class)\n```\n:::\n\n\nIn this analysis, we will use a support vector machine to model the data. Let's use a radial basis function (RBF) kernel and tune its main parameter ($\\sigma$). Additionally, the main SVM parameter, the cost value, also needs optimization. \n\n\n::: {.cell layout-align=\"center\" hash='cache/model_e6bfaeec3b8139f5ab952957740a6b58'}\n\n```{.r .cell-code}\nsvm_mod <-\n svm_rbf(mode = \"classification\", cost = tune(), rbf_sigma = tune()) %>%\n set_engine(\"kernlab\")\n```\n:::\n\n\nThese two objects (the recipe and model) will be combined into a single object via the `workflow()` function from the [workflows](https://workflows.tidymodels.org/) package; this object will be used in the optimization process. \n\n\n::: {.cell layout-align=\"center\" hash='cache/workflow_0cafc1c93aa4b6c10cf118dc7ac36998'}\n\n```{.r .cell-code}\nsvm_wflow <-\n workflow() %>%\n add_model(svm_mod) %>%\n add_recipe(cell_pre_proc)\n```\n:::\n\n\nFrom this object, we can derive information about what parameters are slated to be tuned. A parameter set is derived by: \n\n\n::: {.cell layout-align=\"center\" hash='cache/pset_cf3c71a071bdd3ded2e951c057eb4e7e'}\n\n```{.r .cell-code}\nsvm_set <- extract_parameter_set_dials(svm_wflow)\nsvm_set\n#> Collection of 3 parameters for tuning\n#> \n#> identifier type object\n#> cost cost nparam[+]\n#> rbf_sigma rbf_sigma nparam[+]\n#> num_comp num_comp nparam[+]\n```\n:::\n\n\nThe default range for the number of PCA components is rather small for this data set. A member of the parameter set can be modified using the `update()` function. Let's constrain the search to one to twenty components by updating the `num_comp` parameter. Additionally, the lower bound of this parameter is set to zero which specifies that the original predictor set should also be evaluated (i.e., with no PCA step at all): \n\n\n::: {.cell layout-align=\"center\" hash='cache/update_464c1742f1417462a8f562edbf371c2b'}\n\n```{.r .cell-code}\nsvm_set <- \n svm_set %>% \n update(num_comp = num_comp(c(0L, 20L)))\n```\n:::\n\n\n## Sequential tuning \n\nBayesian optimization is a sequential method that uses a model to predict new candidate parameters for assessment. When scoring potential parameter value, the mean and variance of performance are predicted. The strategy used to define how these two statistical quantities are used is defined by an _acquisition function_. \n\nFor example, one approach for scoring new candidates is to use a confidence bound. Suppose accuracy is being optimized. For a metric that we want to maximize, a lower confidence bound can be used. The multiplier on the standard error (denoted as $\\kappa$) is a value that can be used to make trade-offs between **exploration** and **exploitation**. \n\n * **Exploration** means that the search will consider candidates in untested space.\n\n * **Exploitation** focuses in areas where the previous best results occurred. \n\nThe variance predicted by the Bayesian model is mostly spatial variation; the value will be large for candidate values that are not close to values that have already been evaluated. If the standard error multiplier is high, the search process will be more likely to avoid areas without candidate values in the vicinity. \n\nWe'll use another acquisition function, _expected improvement_, that determines which candidates are likely to be helpful relative to the current best results. This is the default acquisition function. More information on these functions can be found in the [package vignette for acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html). \n\n\n::: {.cell layout-align=\"center\"}\n\n```{.r .cell-code}\nset.seed(12)\nsearch_res <-\n svm_wflow %>% \n tune_bayes(\n resamples = folds,\n # To use non-default parameter ranges\n param_info = svm_set,\n # Generate five at semi-random to start\n initial = 5,\n iter = 50,\n # How to measure performance?\n metrics = metric_set(roc_auc),\n control = control_bayes(no_improve = 30, verbose = TRUE)\n )\n#> \n#> ❯ Generating a set of 5 initial parameter results\n#> ✓ Initialization complete\n#> \n#> \n#> ── Iteration 1 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8794 (@iter 0)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=29.2, rbf_sigma=0.707, num_comp=17\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.7883 (+/-0.0111)\n#> \n#> ── Iteration 2 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8794 (@iter 0)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=30.4, rbf_sigma=0.0087, num_comp=13\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results:\troc_auc=0.8954 (+/-0.0101)\n#> \n#> ── Iteration 3 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0374, rbf_sigma=0.00425, num_comp=11\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8749 (+/-0.0123)\n#> \n#> ── Iteration 4 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=28.8, rbf_sigma=0.00386, num_comp=4\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8738 (+/-0.012)\n#> \n#> ── Iteration 5 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=21.5, rbf_sigma=0.0738, num_comp=11\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8523 (+/-0.0115)\n#> \n#> ── Iteration 6 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=24.1, rbf_sigma=0.0111, num_comp=18\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8829 (+/-0.0101)\n#> \n#> ── Iteration 7 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=1.48, rbf_sigma=0.00629, num_comp=13\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8801 (+/-0.0118)\n#> \n#> ── Iteration 8 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=25.3, rbf_sigma=0.011, num_comp=11\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results:\troc_auc=0.8985 (+/-0.0102)\n#> \n#> ── Iteration 9 ───────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8985 (@iter 8)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=14.8, rbf_sigma=0.628, num_comp=0\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.7852 (+/-0.0173)\n#> \n#> ── Iteration 10 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8985 (@iter 8)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=30.1, rbf_sigma=0.0102, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results:\troc_auc=0.899 (+/-0.00955)\n#> \n#> ── Iteration 11 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.899 (@iter 10)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=25.1, rbf_sigma=0.0111, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results:\troc_auc=0.8994 (+/-0.00996)\n#> \n#> ── Iteration 12 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=28.3, rbf_sigma=0.0118, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8989 (+/-0.00928)\n#> \n#> ── Iteration 13 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=28.5, rbf_sigma=0.0026, num_comp=19\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8879 (+/-0.00951)\n#> \n#> ── Iteration 14 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=30.4, rbf_sigma=0.00245, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.886 (+/-0.0111)\n#> \n#> ── Iteration 15 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=31.5, rbf_sigma=0.0179, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8964 (+/-0.00967)\n#> \n#> ── Iteration 16 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=31.1, rbf_sigma=0.00933, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8994 (+/-0.00968)\n#> \n#> ── Iteration 17 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=27.3, rbf_sigma=0.00829, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results:\troc_auc=0.8995 (+/-0.00996)\n#> \n#> ── Iteration 18 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=31.7, rbf_sigma=0.00363, num_comp=12\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8909 (+/-0.00973)\n#> \n#> ── Iteration 19 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=29.6, rbf_sigma=0.0119, num_comp=8\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8964 (+/-0.0104)\n#> \n#> ── Iteration 20 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=23.6, rbf_sigma=0.0121, num_comp=0\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8636 (+/-0.0122)\n#> \n#> ── Iteration 21 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=27.6, rbf_sigma=0.00824, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8993 (+/-0.00961)\n#> \n#> ── Iteration 22 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=27.6, rbf_sigma=0.00901, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8993 (+/-0.01)\n#> \n#> ── Iteration 23 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=24.1, rbf_sigma=0.0133, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8988 (+/-0.00935)\n#> \n#> ── Iteration 24 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=18.8, rbf_sigma=0.00058, num_comp=20\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8784 (+/-0.0112)\n#> \n#> ── Iteration 25 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=29.3, rbf_sigma=0.00958, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8994 (+/-0.00959)\n#> \n#> ── Iteration 26 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=30.6, rbf_sigma=0.00841, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8993 (+/-0.00949)\n#> \n#> ── Iteration 27 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00169, rbf_sigma=0.0201, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8794 (+/-0.0119)\n#> \n#> ── Iteration 28 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0012, rbf_sigma=0.000867, num_comp=20\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.3452 (+/-0.116)\n#> \n#> ── Iteration 29 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00128, rbf_sigma=0.0138, num_comp=4\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8671 (+/-0.0132)\n#> \n#> ── Iteration 30 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0319, rbf_sigma=0.0279, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8805 (+/-0.0121)\n#> \n#> ── Iteration 31 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0011, rbf_sigma=0.00787, num_comp=8\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8762 (+/-0.0121)\n#> \n#> ── Iteration 32 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=7.06, rbf_sigma=0.00645, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.89 (+/-0.0102)\n#> \n#> ── Iteration 33 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.000998, rbf_sigma=0.305, num_comp=7\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8757 (+/-0.0126)\n#> \n#> ── Iteration 34 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00615, rbf_sigma=0.0134, num_comp=8\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.877 (+/-0.0122)\n#> \n#> ── Iteration 35 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.208, rbf_sigma=0.00946, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8793 (+/-0.0122)\n#> \n#> ── Iteration 36 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=31.6, rbf_sigma=0.00481, num_comp=15\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8961 (+/-0.00885)\n#> \n#> ── Iteration 37 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00108, rbf_sigma=0.653, num_comp=11\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.3649 (+/-0.106)\n#> \n#> ── Iteration 38 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00156, rbf_sigma=0.13, num_comp=5\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8816 (+/-0.0121)\n#> \n#> ── Iteration 39 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=7.03, rbf_sigma=0.235, num_comp=16\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8572 (+/-0.0117)\n#> \n#> ── Iteration 40 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00466, rbf_sigma=0.211, num_comp=1\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.7714 (+/-0.0105)\n#> \n#> ── Iteration 41 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0357, rbf_sigma=0.00126, num_comp=1\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.7725 (+/-0.0106)\n#> \n#> ── Iteration 42 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=23.1, rbf_sigma=0.0332, num_comp=16\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8612 (+/-0.0137)\n#> \n#> ── Iteration 43 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=3.56, rbf_sigma=0.0294, num_comp=3\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8724 (+/-0.0126)\n#> \n#> ── Iteration 44 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00168, rbf_sigma=0.0337, num_comp=7\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8791 (+/-0.0123)\n#> \n#> ── Iteration 45 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00128, rbf_sigma=0.00258, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.3444 (+/-0.114)\n#> \n#> ── Iteration 46 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.506, rbf_sigma=0.000548, num_comp=4\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.8653 (+/-0.0131)\n#> \n#> ── Iteration 47 ──────────────────────────────────────────────────────\n#> \n#> i Current best:\t\troc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00142, rbf_sigma=0.204, num_comp=18\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results:\troc_auc=0.3586 (+/-0.113)\n#> ! No improvement for 30 iterations; returning current results.\n```\n:::\n\n\nThe resulting tibble is a stacked set of rows of the rsample object with an additional column for the iteration number:\n\n\n::: {.cell layout-align=\"center\" hash='cache/show-iters_0c5a1059c1edff5eeab971a512ddaf07'}\n\n```{.r .cell-code}\nsearch_res\n#> # Tuning results\n#> # 10-fold cross-validation \n#> # A tibble: 480 × 5\n#> splits id .metrics .notes .iter\n#> \n#> 1 Fold01 0\n#> 2 Fold02 0\n#> 3 Fold03 0\n#> 4 Fold04 0\n#> 5 Fold05 0\n#> 6 Fold06 0\n#> 7 Fold07 0\n#> 8 Fold08 0\n#> 9 Fold09 0\n#> 10 Fold10 0\n#> # ℹ 470 more rows\n```\n:::\n\n\nAs with grid search, we can summarize the results over resamples:\n\n\n::: {.cell layout-align=\"center\" hash='cache/summarize-iters_5b523c02b6e0d62bf64ada84f224e2b3'}\n\n```{.r .cell-code}\nestimates <- \n collect_metrics(search_res) %>% \n arrange(.iter)\n\nestimates\n#> # A tibble: 52 × 10\n#> cost rbf_sigma num_comp .metric .estimator mean n std_err .config\n#> \n#> 1 0.00383 2.72e-6 17 roc_auc binary 0.348 10 0.114 Prepro…\n#> 2 0.250 1.55e-2 7 roc_auc binary 0.879 10 0.0122 Prepro…\n#> 3 0.0372 1.02e-9 3 roc_auc binary 0.242 10 0.0574 Prepro…\n#> 4 1.28 8.13e-8 8 roc_auc binary 0.344 10 0.114 Prepro…\n#> 5 10.3 1.37e-3 14 roc_auc binary 0.877 10 0.0117 Prepro…\n#> 6 29.2 7.07e-1 17 roc_auc binary 0.788 10 0.0111 Iter1 \n#> 7 30.4 8.70e-3 13 roc_auc binary 0.895 10 0.0101 Iter2 \n#> 8 0.0374 4.25e-3 11 roc_auc binary 0.875 10 0.0123 Iter3 \n#> 9 28.8 3.86e-3 4 roc_auc binary 0.874 10 0.0120 Iter4 \n#> 10 21.5 7.38e-2 11 roc_auc binary 0.852 10 0.0115 Iter5 \n#> # ℹ 42 more rows\n#> # ℹ 1 more variable: .iter \n```\n:::\n\n\n\nThe best performance of the initial set of candidate values was `AUC = 0.8793995 `. The best results were achieved at iteration 17 with a corresponding AUC value of 0.8995344. The five best results are:\n\n\n::: {.cell layout-align=\"center\" hash='cache/best_62f09d71954a0558c204598af5a9d34e'}\n\n```{.r .cell-code}\nshow_best(search_res, metric = \"roc_auc\")\n#> # A tibble: 5 × 10\n#> cost rbf_sigma num_comp .metric .estimator mean n std_err .config .iter\n#> \n#> 1 27.3 0.00829 9 roc_auc binary 0.900 10 0.00996 Iter17 17\n#> 2 29.3 0.00958 10 roc_auc binary 0.899 10 0.00959 Iter25 25\n#> 3 25.1 0.0111 9 roc_auc binary 0.899 10 0.00996 Iter11 11\n#> 4 31.1 0.00933 10 roc_auc binary 0.899 10 0.00968 Iter16 16\n#> 5 27.6 0.00901 9 roc_auc binary 0.899 10 0.0100 Iter22 22\n```\n:::\n\n\nA plot of the search iterations can be created via:\n\n\n::: {.cell layout-align=\"center\" hash='cache/bo-plot_ef6a7ea30999c593a6bb4de937b7cea5'}\n\n```{.r .cell-code}\nautoplot(search_res, type = \"performance\")\n```\n\n::: {.cell-output-display}\n![](figs/bo-plot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThere are many parameter combinations have roughly equivalent results. \n\nHow did the parameters change over iterations? \n\n\n\n::: {.cell layout-align=\"center\" hash='cache/bo-param-plot_6ae91e4892a836db44633e6bfd845f00'}\n\n```{.r .cell-code}\nautoplot(search_res, type = \"parameters\") + \n labs(x = \"Iterations\", y = NULL)\n```\n\n::: {.cell-output-display}\n![](figs/bo-param-plot-1.svg){fig-align='center' width=864}\n:::\n:::\n\n\n\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> kernlab * 0.9-32 2023-01-31 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang * 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> themis * 1.0.1 2023-04-14 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/work/case-weights/index/execute-results/html.json b/_freeze/learn/work/case-weights/index/execute-results/html.json new file mode 100644 index 00000000..2a806c44 --- /dev/null +++ b/_freeze/learn/work/case-weights/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "55b7ab5622916dc2f271ffad22543bcf", + "result": { + "markdown": "---\ntitle: \"Creating case weights based on time\"\ncategories:\n - model fitting\n - case weights\n - time series\ntype: learn-subsection\nweight: 5\ndescription: | \n Create models that use coefficients, extract them from fitted models, and visualize them.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: tidymodels.\n\nThis article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others.\n\n## Example Data\n\nTo demonstrate we will use the Chicago data from the modeldata package.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_b073b9c0d2d38aeb4a5224075d7fd41e'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\ndata(Chicago)\n\nChicago <- Chicago %>%\n select(ridership, date, one_of(stations))\n```\n:::\n\n\nFrom `?Chicago`\n\n> These data are from Kuhn and Johnson (2020) and contain an abbreviated training set for modeling the number of people (in thousands) who enter the Clark and Lake L station.\n\n> The date column corresponds to the current date. The columns with station names (Austin through California) are a sample of the columns used in the original analysis (for filesize reasons). These are 14 day lag variables (i.e. date - 14 days). There are columns related to weather and sports team schedules.\n\nFor simplicity, we have limited our view to the date and station variables.\n\n## Creating weights\n\nThis data set contains daily information from 2001-01-22 to 2016-08-28. We will pretend that it is January 1st, 2016 and we want to predict the ridership for the remainder of 2016 using the date and station variables as predictors. Without any weighting, all the previous observations would have the same influence on the model. This may not be ideal since some observations appear a long time ago and not be as representative of the future as more recent observations. \n\nWe could just use recent observations to fit the model, ensuring that the training data stays as close to the testing data as possible. While a tempting idea, it would throw out a lot of informative data. Instead let us assign a weight to each observation, related to how long ago the observation was taken. This way we are not completely throwing away any observation; we are only giving less weight to data farther in the past. \n\nWe need to decide on a way to calculate the case weights. The main thing constraint is that the weight cannot be negative, and it would be nice if today was weighted as 1. So we need a function that is 1 when `x = 0` and decreasing otherwise. There are many kinds of functions like that, and we will be using this exponential decay function\n\n$$ weight = base ^ x $$\n\nwhere `base` is some constant and `x` is the number of days. To make sure that we select a reasonable `base`, we need to do some manual testing, starting with looking at how old the oldest observation is.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-4_ef4371c9549adc79f61c9d1327fb804a'}\n\n```{.r .cell-code}\ndifftime(\"2016-01-01\", min(Chicago$date))\n#> Time difference of 5457.333 days\n```\n:::\n\n\nUsing this information we can visualize the weight curve, to see if we like the value of `base`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-5_c74cef63496f3567b4bd4942bad4892a'}\n\n```{.r .cell-code}\ntibble_days <- tibble(days = 0:5457)\n\ntibble_days %>%\n ggplot(aes(days)) +\n geom_function(fun = ~ 0.99 ^ .x)\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-5-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nsetting `base` to 0.99 appears to be down weighted too much. Any observation more than a year old would have no influence.\n\nLet us try a few more values to find \n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-6_958a7138f25afbfc3161fbf834e13d95'}\n\n```{.r .cell-code}\nmap_dfr(\n c(0.99, 0.999, 0.9999),\n ~ tibble_days %>% mutate(base = factor(.x), value = .x ^ days)\n) %>%\n ggplot(aes(days, value, group = base, color = base)) +\n geom_line()\n```\n\n::: {.cell-output-display}\n![](figs/unnamed-chunk-6-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nFrom this, we could pick something around 0.999 since it gives a better balance. Let's create a small function to help us encode this weight. \n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-7_6ab55e103a49d631a5cc38beedc580f3'}\n\n```{.r .cell-code}\nweights_from_dates <- function(x, ref) {\n if_else(\n condition = x >= ref,\n true = 1, # <- Notice that I'm setting any future weight to 1.\n false = 0.999 ^ as.numeric(difftime(ref, x, units = \"days\"))\n )\n}\n```\n:::\n\n\nWe then modify `Chicago` to add a weight column, explicitly making it an importance weight with `importance_weight()`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-8_a5652b9e9e09ac4c66ed19872be89eb8'}\n\n```{.r .cell-code}\nChicago <- Chicago %>%\n mutate(weight = weights_from_dates(date, \"2016-01-01\"),\n weight = importance_weights(weight))\n```\n:::\n\n\nThis approach to creating importance weights from dates is not limited to cases where we have daily observations. You are free to create similar weights if you have gaps or repeated observations within the same day. Likewise, you don't need to use days as the unit. Seconds, weeks, or years could be used as well.\n\n## Modeling\n\nWe start by splitting up our data into a training and testing set based on the day `\"2016-01-01\"`. We added weights to the data set before splitting it so each set has weights.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-9_8135e3246a09512d38ee24dcde4681f7'}\n\n```{.r .cell-code}\nChicago_train <- Chicago %>% filter(date < \"2016-01-01\")\nChicago_test <- Chicago %>% filter(date >= \"2016-01-01\")\n```\n:::\n\n\nNext, we are going to create a recipe. The weights won't have any influence on the preprocessing since none of these operations are supervised and we are using importance weights.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-10_ef84fffe914d6e05e47d4c33949c56e6'}\n\n```{.r .cell-code}\nbase_recipe <-\n recipe(ridership ~ ., data = Chicago_train) %>%\n # Create date features\n step_date(date) %>%\n step_holiday(date, keep_original_cols = FALSE) %>%\n # Remove any columns with a single unique value\n step_zv(all_predictors()) %>%\n # Normalize all the numerical features\n step_normalize(all_numeric_predictors()) %>%\n # Perform PCA to reduce the correlation bet the stations\n step_pca(all_numeric_predictors(), threshold = 0.95)\n```\n:::\n\n\nNext we need to build the rest of the workflow. We use a linear regression specification\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-11_97612c19efbb436b81e1a8d88fa0c208'}\n\n```{.r .cell-code}\nlm_spec <-\n linear_reg() %>%\n set_engine(\"lm\")\n```\n:::\n\n\nand we add these together in the workflow. To activate the case weights, we use the `add_case_weights()` function to specify the name of the case weights being used.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-12_896852baf2e4899bc3ea08d055cab4d2'}\n\n```{.r .cell-code}\nlm_wflow <-\n workflow() %>% \n add_case_weights(weight) %>%\n add_recipe(base_recipe) %>%\n add_model(lm_spec)\n\nlm_wflow\n#> ══ Workflow ══════════════════════════════════════════════════════════\n#> Preprocessor: Recipe\n#> Model: linear_reg()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> 5 Recipe Steps\n#> \n#> • step_date()\n#> • step_holiday()\n#> • step_zv()\n#> • step_normalize()\n#> • step_pca()\n#> \n#> ── Case Weights ──────────────────────────────────────────────────────\n#> weight\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> Linear Regression Model Specification (regression)\n#> \n#> Computational engine: lm\n```\n:::\n\n\nWith all that done we can fit the workflow with the usual syntax: \n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-13_c322328206c4095202be5989a8810759'}\n\n```{.r .cell-code}\nlm_fit <- fit(lm_wflow, data = Chicago_train)\nlm_fit\n#> ══ Workflow [trained] ════════════════════════════════════════════════\n#> Preprocessor: Recipe\n#> Model: linear_reg()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> 5 Recipe Steps\n#> \n#> • step_date()\n#> • step_holiday()\n#> • step_zv()\n#> • step_normalize()\n#> • step_pca()\n#> \n#> ── Case Weights ──────────────────────────────────────────────────────\n#> weight\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> \n#> Call:\n#> stats::lm(formula = ..y ~ ., data = data, weights = weights)\n#> \n#> Coefficients:\n#> (Intercept) date_dowMon date_dowTue date_dowWed date_dowThu \n#> 1.762599 13.307654 14.689027 14.620178 14.382313 \n#> date_dowFri date_dowSat date_monthFeb date_monthMar date_monthApr \n#> 13.695433 1.228233 0.364342 1.348229 1.409897 \n#> date_monthMay date_monthJun date_monthJul date_monthAug date_monthSep \n#> 1.188189 2.598296 2.219721 2.406998 1.932061 \n#> date_monthOct date_monthNov date_monthDec PC1 PC2 \n#> 2.655552 0.909007 -0.004751 0.073014 -1.591021 \n#> PC3 PC4 PC5 \n#> -0.608386 -0.205305 0.696010\n```\n:::\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/work/nested-resampling/index/execute-results/html.json b/_freeze/learn/work/nested-resampling/index/execute-results/html.json new file mode 100644 index 00000000..c07da536 --- /dev/null +++ b/_freeze/learn/work/nested-resampling/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "675316c530c826b8fab7e6e90401247f", + "result": { + "markdown": "---\ntitle: \"Nested resampling\"\ncategories:\n - nested resampling\n - SVMs\ntype: learn-subsection\nweight: 2\ndescription: | \n Estimate the best hyperparameters for a model using nested resampling.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: furrr, kernlab, mlbench, scales, and tidymodels.\n\nIn this article, we discuss an alternative method for evaluating and tuning models, called [nested resampling](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22nested+resampling%22+inner+outer&btnG=). While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance.\n\n## Resampling models\n\nA typical scheme for splitting the data when developing a predictive model is to create an initial split of the data into a training and test set. If resampling is used, it is executed on the training set. A series of binary splits is created. In rsample, we use the term *analysis set* for the data that are used to fit the model and the term *assessment set* for the set used to compute performance:\n\n\n::: {.cell layout-align=\"center\" hash='cache/resampling-fig_214078c494ebc8f5e36f063603746f5e'}\n::: {.cell-output-display}\n![](img/resampling.svg){fig-align='center' width=70%}\n:::\n:::\n\n\nA common method for tuning models is [grid search](/learn/work/tune-svm/) where a candidate set of tuning parameters is created. The full set of models for every combination of the tuning parameter grid and the resamples is fitted. Each time, the assessment data are used to measure performance and the average value is determined for each tuning parameter.\n\nThe potential problem is that once we pick the tuning parameter associated with the best performance, this performance value is usually quoted as the performance of the model. There is serious potential for *optimization bias* since we use the same data to tune the model and to assess performance. This would result in an optimistic estimate of performance.\n\nNested resampling uses an additional layer of resampling that separates the tuning activities from the process used to estimate the efficacy of the model. An *outer* resampling scheme is used and, for every split in the outer resample, another full set of resampling splits are created on the original analysis set. For example, if 10-fold cross-validation is used on the outside and 5-fold cross-validation on the inside, a total of 500 models will be fit. The parameter tuning will be conducted 10 times and the best parameters are determined from the average of the 5 assessment sets. This process occurs 10 times.\n\nOnce the tuning results are complete, a model is fit to each of the outer resampling splits using the best parameter associated with that resample. The average of the outer method's assessment sets are a unbiased estimate of the model.\n\nWe will simulate some regression data to illustrate the methods. The mlbench package has a function `mlbench::mlbench.friedman1()` that can simulate a complex regression data structure from the [original MARS publication](https://scholar.google.com/scholar?hl=en&q=%22Multivariate+adaptive+regression+splines%22&btnG=&as_sdt=1%2C7&as_sdtp=). A training set size of 100 data points are generated as well as a large set that will be used to characterize how well the resampling procedure performed.\n\n\n::: {.cell layout-align=\"center\" hash='cache/sim-data_e983661ec506849cbf1bd8ec69ce0743'}\n\n```{.r .cell-code}\nlibrary(mlbench)\nsim_data <- function(n) {\n tmp <- mlbench.friedman1(n, sd = 1)\n tmp <- cbind(tmp$x, tmp$y)\n tmp <- as.data.frame(tmp)\n names(tmp)[ncol(tmp)] <- \"y\"\n tmp\n}\n\nset.seed(9815)\ntrain_dat <- sim_data(100)\nlarge_dat <- sim_data(10^5)\n```\n:::\n\n\n## Nested resampling\n\nTo get started, the types of resampling methods need to be specified. This isn't a large data set, so 5 repeats of 10-fold cross validation will be used as the *outer* resampling method for generating the estimate of overall performance. To tune the model, it would be good to have precise estimates for each of the values of the tuning parameter so let's use 25 iterations of the bootstrap. This means that there will eventually be `5 * 10 * 25 = 1250` models that are fit to the data *per tuning parameter*. These models will be discarded once the performance of the model has been quantified.\n\nTo create the tibble with the resampling specifications:\n\n\n::: {.cell layout-align=\"center\" hash='cache/tibble-gen_ed46b63d3ba7bc40ee23e6405c95dba4'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\nresults <- nested_cv(train_dat, \n outside = vfold_cv(repeats = 5), \n inside = bootstraps(times = 25))\nresults\n#> # Nested resampling:\n#> # outer: 10-fold cross-validation repeated 5 times\n#> # inner: Bootstrap sampling\n#> # A tibble: 50 × 4\n#> splits id id2 inner_resamples\n#> \n#> 1 Repeat1 Fold01 \n#> 2 Repeat1 Fold02 \n#> 3 Repeat1 Fold03 \n#> 4 Repeat1 Fold04 \n#> 5 Repeat1 Fold05 \n#> 6 Repeat1 Fold06 \n#> 7 Repeat1 Fold07 \n#> 8 Repeat1 Fold08 \n#> 9 Repeat1 Fold09 \n#> 10 Repeat1 Fold10 \n#> # ℹ 40 more rows\n```\n:::\n\n\nThe splitting information for each resample is contained in the `split` objects. Focusing on the second fold of the first repeat:\n\n\n::: {.cell layout-align=\"center\" hash='cache/split-example_730f1d4661d7d82e99e7ae9d90d96252'}\n\n```{.r .cell-code}\nresults$splits[[2]]\n#> \n#> <90/10/100>\n```\n:::\n\n\n`<90/10/100>` indicates the number of observations in the analysis set, assessment set, and the original data.\n\nEach element of `inner_resamples` has its own tibble with the bootstrapping splits.\n\n\n::: {.cell layout-align=\"center\" hash='cache/inner-splits_0546fa38a72ce1afdce4680dececa5d8'}\n\n```{.r .cell-code}\nresults$inner_resamples[[5]]\n#> # Bootstrap sampling \n#> # A tibble: 25 × 2\n#> splits id \n#> \n#> 1 Bootstrap01\n#> 2 Bootstrap02\n#> 3 Bootstrap03\n#> 4 Bootstrap04\n#> 5 Bootstrap05\n#> 6 Bootstrap06\n#> 7 Bootstrap07\n#> 8 Bootstrap08\n#> 9 Bootstrap09\n#> 10 Bootstrap10\n#> # ℹ 15 more rows\n```\n:::\n\n\nThese are self-contained, meaning that the bootstrap sample is aware that it is a sample of a specific 90% of the data:\n\n\n::: {.cell layout-align=\"center\" hash='cache/inner-boot-split_d614aafea425464b013e25aa1e7fda68'}\n\n```{.r .cell-code}\nresults$inner_resamples[[5]]$splits[[1]]\n#> \n#> <90/31/90>\n```\n:::\n\n\nTo start, we need to define how the model will be created and measured. Let's use a radial basis support vector machine model via the function `kernlab::ksvm`. This model is generally considered to have *two* tuning parameters: the SVM cost value and the kernel parameter `sigma`. For illustration purposes here, only the cost value will be tuned and the function `kernlab::sigest` will be used to estimate `sigma` during each model fit. This is automatically done by `ksvm`.\n\nAfter the model is fit to the analysis set, the root-mean squared error (RMSE) is computed on the assessment set. **One important note:** for this model, it is critical to center and scale the predictors before computing dot products. We don't do this operation here because `mlbench.friedman1` simulates all of the predictors to be standardized uniform random variables.\n\nOur function to fit the model and compute the RMSE is:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rmse-func_234730528ec8a0043f8c594c31abc649'}\n\n```{.r .cell-code}\nlibrary(kernlab)\n\n# `object` will be an `rsplit` object from our `results` tibble\n# `cost` is the tuning parameter\nsvm_rmse <- function(object, cost = 1) {\n y_col <- ncol(object$data)\n mod <- \n svm_rbf(mode = \"regression\", cost = cost) %>% \n set_engine(\"kernlab\") %>% \n fit(y ~ ., data = analysis(object))\n \n holdout_pred <- \n predict(mod, assessment(object) %>% dplyr::select(-y)) %>% \n bind_cols(assessment(object) %>% dplyr::select(y))\n rmse(holdout_pred, truth = y, estimate = .pred)$.estimate\n}\n\n# In some case, we want to parameterize the function over the tuning parameter:\nrmse_wrapper <- function(cost, object) svm_rmse(object, cost)\n```\n:::\n\n\nFor the nested resampling, a model needs to be fit for each tuning parameter and each bootstrap split. To do this, create a wrapper:\n\n\n::: {.cell layout-align=\"center\" hash='cache/inner-tune-func_5699d21fd6e91cb7f1d70c07e072bc52'}\n\n```{.r .cell-code}\n# `object` will be an `rsplit` object for the bootstrap samples\ntune_over_cost <- function(object) {\n tibble(cost = 2 ^ seq(-2, 8, by = 1)) %>% \n mutate(RMSE = map_dbl(cost, rmse_wrapper, object = object))\n}\n```\n:::\n\n\nSince this will be called across the set of outer cross-validation splits, another wrapper is required:\n\n\n::: {.cell layout-align=\"center\" hash='cache/inner-func_e60f5a82560824dd74fe7a4086f7668e'}\n\n```{.r .cell-code}\n# `object` is an `rsplit` object in `results$inner_resamples` \nsummarize_tune_results <- function(object) {\n # Return row-bound tibble that has the 25 bootstrap results\n map_df(object$splits, tune_over_cost) %>%\n # For each value of the tuning parameter, compute the \n # average RMSE which is the inner bootstrap estimate. \n group_by(cost) %>%\n summarize(mean_RMSE = mean(RMSE, na.rm = TRUE),\n n = length(RMSE),\n .groups = \"drop\")\n}\n```\n:::\n\n\nNow that those functions are defined, we can execute all the inner resampling loops:\n\n\n::: {.cell layout-align=\"center\" hash='cache/inner-runs_ca610ccb577b23142f8afd7cda95089e'}\n\n```{.r .cell-code}\ntuning_results <- map(results$inner_resamples, summarize_tune_results) \n```\n:::\n\n\nAlternatively, since these computations can be run in parallel, we can use the furrr package. Instead of using `map()`, the function `future_map()` parallelizes the iterations using the [future package](https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html). The `multisession` plan uses the local cores to process the inner resampling loop. The end results are the same as the sequential computations.\n\n\n::: {.cell layout-align=\"center\" hash='cache/inner-runs-parallel_0e2e84113b39b62daa49eaad97238ac6'}\n\n```{.r .cell-code}\nlibrary(furrr)\nplan(multisession)\n\ntuning_results <- future_map(results$inner_resamples, summarize_tune_results) \n```\n:::\n\n\nThe object `tuning_results` is a list of data frames for each of the 50 outer resamples.\n\nLet's make a plot of the averaged results to see what the relationship is between the RMSE and the tuning parameters for each of the inner bootstrapping operations:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rmse-plot_47c97acb3c9e2590f03f265dcd2dd865'}\n\n```{.r .cell-code}\nlibrary(scales)\n\npooled_inner <- tuning_results %>% bind_rows\n\nbest_cost <- function(dat) dat[which.min(dat$mean_RMSE),]\n\np <- \n ggplot(pooled_inner, aes(x = cost, y = mean_RMSE)) + \n scale_x_continuous(trans = 'log2') +\n xlab(\"SVM Cost\") + ylab(\"Inner RMSE\")\n\nfor (i in 1:length(tuning_results))\n p <- p +\n geom_line(data = tuning_results[[i]], alpha = .2) +\n geom_point(data = best_cost(tuning_results[[i]]), pch = 16, alpha = 3/4)\n\np <- p + geom_smooth(data = pooled_inner, se = FALSE)\np\n```\n\n::: {.cell-output-display}\n![](figs/rmse-plot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nEach gray line is a separate bootstrap resampling curve created from a different 90% of the data. The blue line is a LOESS smooth of all the results pooled together.\n\nTo determine the best parameter estimate for each of the outer resampling iterations:\n\n\n::: {.cell layout-align=\"center\" hash='cache/choose_16a9265675bde0043286c8a94fcd67a9'}\n\n```{.r .cell-code}\ncost_vals <- \n tuning_results %>% \n map_df(best_cost) %>% \n select(cost)\n\nresults <- \n bind_cols(results, cost_vals) %>% \n mutate(cost = factor(cost, levels = paste(2 ^ seq(-2, 8, by = 1))))\n\nggplot(results, aes(x = cost)) + \n geom_bar() + \n xlab(\"SVM Cost\") + \n scale_x_discrete(drop = FALSE)\n```\n\n::: {.cell-output-display}\n![](figs/choose-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nMost of the resamples produced an optimal cost value of 2.0, but the distribution is right-skewed due to the flat trend in the resampling profile once the cost value becomes 10 or larger.\n\nNow that we have these estimates, we can compute the outer resampling results for each of the 50 splits using the corresponding tuning parameter value:\n\n\n::: {.cell layout-align=\"center\" hash='cache/run-out-r_4763ba770361039f73c24636628bc34f'}\n\n```{.r .cell-code}\nresults <- \n results %>% \n mutate(RMSE = map2_dbl(splits, cost, svm_rmse))\n\nsummary(results$RMSE)\n#> Min. 1st Qu. Median Mean 3rd Qu. Max. \n#> 1.672 2.095 2.685 2.690 3.252 4.254\n```\n:::\n\n\nThe estimated RMSE for the model tuning process is 2.69.\n\nWhat is the RMSE estimate for the non-nested procedure when only the outer resampling method is used? For each cost value in the tuning grid, 50 SVM models are fit and their RMSE values are averaged. The table of cost values and mean RMSE estimates is used to determine the best cost value. The associated RMSE is the biased estimate.\n\n\n::: {.cell layout-align=\"center\" hash='cache/not-nested_352b1fd09cc9828e5d75a4c8d1abc6bf'}\n\n```{.r .cell-code}\nnot_nested <- \n map(results$splits, tune_over_cost) %>%\n bind_rows\n\nouter_summary <- not_nested %>% \n group_by(cost) %>% \n summarize(outer_RMSE = mean(RMSE), n = length(RMSE))\n\nouter_summary\n#> # A tibble: 11 × 3\n#> cost outer_RMSE n\n#> \n#> 1 0.25 3.54 50\n#> 2 0.5 3.11 50\n#> 3 1 2.77 50\n#> 4 2 2.62 50\n#> 5 4 2.65 50\n#> 6 8 2.75 50\n#> 7 16 2.82 50\n#> 8 32 2.82 50\n#> 9 64 2.83 50\n#> 10 128 2.83 50\n#> 11 256 2.82 50\n\nggplot(outer_summary, aes(x = cost, y = outer_RMSE)) + \n geom_point() + \n geom_line() + \n scale_x_continuous(trans = 'log2') +\n xlab(\"SVM Cost\") + ylab(\"RMSE\")\n```\n\n::: {.cell-output-display}\n![](figs/not-nested-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThe non-nested procedure estimates the RMSE to be 2.62. Both estimates are fairly close.\n\nThe approximately true RMSE for an SVM model with a cost value of 2.0 can be approximated with the large sample that was simulated at the beginning.\n\n\n::: {.cell layout-align=\"center\" hash='cache/large-sample-estimate_e753225c1e50dcca8f214f4e41cc47bd'}\n\n```{.r .cell-code}\nfinalModel <- ksvm(y ~ ., data = train_dat, C = 2)\nlarge_pred <- predict(finalModel, large_dat[, -ncol(large_dat)])\nsqrt(mean((large_dat$y - large_pred) ^ 2, na.rm = TRUE))\n#> [1] 2.712059\n```\n:::\n\n\nThe nested procedure produces a closer estimate to the approximate truth but the non-nested estimate is very similar.\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> furrr * 0.3.1 2022-08-15 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> kernlab * 0.9-32 2023-01-31 [1] CRAN (R 4.3.0)\n#> mlbench * 2.1-3.1 2023-05-05 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/work/tune-svm/index/execute-results/html.json b/_freeze/learn/work/tune-svm/index/execute-results/html.json new file mode 100644 index 00000000..da7470b6 --- /dev/null +++ b/_freeze/learn/work/tune-svm/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "58aa6645896fc267d3e67e4a1c268964", + "result": { + "markdown": "---\ntitle: \"Model tuning via grid search\"\ncategories:\n - model tuning\n - SVMs\ntype: learn-subsection\nweight: 1\ndescription: | \n Choose hyperparameters for a model by training on a grid of many possible parameter values.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: kernlab, mlbench, and tidymodels.\n\nThis article demonstrates how to tune a model using grid search. Many models have **hyperparameters** that can't be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best. \n\n## Example data\n\nTo demonstrate model tuning, we'll use the Ionosphere data in the mlbench package:\n\n\n::: {.cell layout-align=\"center\" hash='cache/load-data_ccd52d98bd5d4c80115efb123b0cd574'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\nlibrary(mlbench)\ndata(Ionosphere)\n```\n:::\n\n\nFrom `?Ionosphere`:\n\n> This radar data was collected by a system in Goose Bay, Labrador. This system consists of a phased array of 16 high-frequency antennas with a total transmitted power on the order of 6.4 kilowatts. See the paper for more details. The targets were free electrons in the ionosphere. \"good\" radar returns are those showing evidence of some type of structure in the ionosphere. \"bad\" returns are those that do not; their signals pass through the ionosphere.\n\n> Received signals were processed using an autocorrelation function whose arguments are the time of a pulse and the pulse number. There were 17 pulse numbers for the Goose Bay system. Instances in this databse are described by 2 attributes per pulse number, corresponding to the complex values returned by the function resulting from the complex electromagnetic signal. See cited below for more details.\n\nThere are 43 predictors and a factor outcome. Two of the predictors are factors (`V1` and `V2`) and the rest are numeric variables that have been scaled to a range of -1 to 1. Note that the two factor predictors have sparse distributions:\n\n\n::: {.cell layout-align=\"center\" hash='cache/factor-pred_4de54531f8e5660a66ab0c8927650253'}\n\n```{.r .cell-code}\ntable(Ionosphere$V1)\n#> \n#> 0 1 \n#> 38 313\ntable(Ionosphere$V2)\n#> \n#> 0 \n#> 351\n```\n:::\n\n\nThere's no point of putting `V2` into any model since is is a zero-variance predictor. `V1` is not but it _could_ be if the resampling process ends up sampling all of the same value. Is this an issue? It might be since the standard R formula infrastructure fails when there is only a single observed value:\n\n\n::: {.cell layout-align=\"center\" hash='cache/glm-fail_8ead2e62aec261b8a14c5179b5b1071c'}\n\n```{.r .cell-code}\nglm(Class ~ ., data = Ionosphere, family = binomial)\n\n# Surprisingly, this doesn't help: \n\nglm(Class ~ . - V2, data = Ionosphere, family = binomial)\n```\n:::\n\n\nLet's remove these two problematic variables:\n\n\n::: {.cell layout-align=\"center\" hash='cache/ion-rm_49b6be55c3beb2f49da6f91955e1aaf5'}\n\n```{.r .cell-code}\nIonosphere <- Ionosphere %>% select(-V1, -V2)\n```\n:::\n\n\n## Inputs for the search\n\nTo demonstrate, we'll fit a radial basis function support vector machine to these data and tune the SVM cost parameter and the $\\sigma$ parameter in the kernel function:\n\n\n::: {.cell layout-align=\"center\" hash='cache/svm-mod_d415239acd947bcb02cfc9c30198bde0'}\n\n```{.r .cell-code}\nsvm_mod <-\n svm_rbf(cost = tune(), rbf_sigma = tune()) %>%\n set_mode(\"classification\") %>%\n set_engine(\"kernlab\")\n```\n:::\n\n\nIn this article, tuning will be demonstrated in two ways, using:\n\n- a standard R formula, and \n- a recipe.\n\nLet's create a simple recipe here:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rec_392e244f6df3a5cc50ee6ee0f194a480'}\n\n```{.r .cell-code}\niono_rec <-\n recipe(Class ~ ., data = Ionosphere) %>%\n # remove any zero variance predictors\n step_zv(all_predictors()) %>% \n # remove any linear combinations\n step_lincomb(all_numeric())\n```\n:::\n\n\nThe only other required item for tuning is a resampling strategy as defined by an rsample object. Let's demonstrate using basic bootstrapping:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rs_51fdb695637efb06c3026aacba88d1bc'}\n\n```{.r .cell-code}\nset.seed(4943)\niono_rs <- bootstraps(Ionosphere, times = 30)\n```\n:::\n\n\n## Optional inputs\n\nAn _optional_ step for model tuning is to specify which metrics should be computed using the out-of-sample predictions. For classification, the default is to calculate the log-likelihood statistic and overall accuracy. Instead of the defaults, the area under the ROC curve will be used. To do this, a yardstick package function can be used to create a metric set:\n\n\n::: {.cell layout-align=\"center\" hash='cache/roc_9fbed0538a5de5e5f3fc808bf244c1dd'}\n\n```{.r .cell-code}\nroc_vals <- metric_set(roc_auc)\n```\n:::\n\n\nIf no grid or parameters are provided, a set of 10 hyperparameters are created using a space-filling design (via a Latin hypercube). A grid can be given in a data frame where the parameters are in columns and parameter combinations are in rows. Here, the default will be used.\n\nAlso, a control object can be passed that specifies different aspects of the search. Here, the verbose option is turned off and the option to save the out-of-sample predictions is turned on. \n\n\n::: {.cell layout-align=\"center\" hash='cache/ctrl_59b3e85dfc0d1270330de4d3c49306b0'}\n\n```{.r .cell-code}\nctrl <- control_grid(verbose = FALSE, save_pred = TRUE)\n```\n:::\n\n\n## Executing with a formula\n\nFirst, we can use the formula interface:\n\n\n::: {.cell layout-align=\"center\" hash='cache/grid_f1f79c8a8bae5826ef211f59f6120ebb'}\n\n```{.r .cell-code}\nset.seed(35)\nformula_res <-\n svm_mod %>% \n tune_grid(\n Class ~ .,\n resamples = iono_rs,\n metrics = roc_vals,\n control = ctrl\n )\nformula_res\n#> # Tuning results\n#> # Bootstrap sampling \n#> # A tibble: 30 × 5\n#> splits id .metrics .notes .predictions\n#> \n#> 1 Bootstrap01 \n#> 2 Bootstrap02 \n#> 3 Bootstrap03 \n#> 4 Bootstrap04 \n#> 5 Bootstrap05 \n#> 6 Bootstrap06 \n#> 7 Bootstrap07 \n#> 8 Bootstrap08 \n#> 9 Bootstrap09 \n#> 10 Bootstrap10 \n#> # ℹ 20 more rows\n```\n:::\n\n\nThe `.metrics` column contains tibbles of the performance metrics for each tuning parameter combination:\n\n\n::: {.cell layout-align=\"center\" hash='cache/raw-metrics_746b04a957450c71063d8c0473f311f8'}\n\n```{.r .cell-code}\nformula_res %>% \n select(.metrics) %>% \n slice(1) %>% \n pull(1)\n#> [[1]]\n#> # A tibble: 10 × 6\n#> cost rbf_sigma .metric .estimator .estimate .config \n#> \n#> 1 0.00849 1.11e-10 roc_auc binary 0.815 Preprocessor1_Model01\n#> 2 0.176 7.28e- 8 roc_auc binary 0.839 Preprocessor1_Model02\n#> 3 14.9 3.93e- 4 roc_auc binary 0.870 Preprocessor1_Model03\n#> 4 5.51 2.10e- 3 roc_auc binary 0.919 Preprocessor1_Model04\n#> 5 1.87 3.53e- 7 roc_auc binary 0.838 Preprocessor1_Model05\n#> 6 0.00719 1.45e- 5 roc_auc binary 0.832 Preprocessor1_Model06\n#> 7 0.00114 8.41e- 2 roc_auc binary 0.969 Preprocessor1_Model07\n#> 8 0.950 1.74e- 1 roc_auc binary 0.984 Preprocessor1_Model08\n#> 9 0.189 3.13e- 6 roc_auc binary 0.832 Preprocessor1_Model09\n#> 10 0.0364 4.96e- 9 roc_auc binary 0.839 Preprocessor1_Model10\n```\n:::\n\n\nTo get the final resampling estimates, the `collect_metrics()` function can be used on the grid object:\n\n\n::: {.cell layout-align=\"center\" hash='cache/metric-estimates_67ce86f3ea65fac48e17549a457f4e79'}\n\n```{.r .cell-code}\nestimates <- collect_metrics(formula_res)\nestimates\n#> # A tibble: 10 × 8\n#> cost rbf_sigma .metric .estimator mean n std_err .config \n#> \n#> 1 0.00849 1.11e-10 roc_auc binary 0.822 30 0.00718 Preprocessor1_Mode…\n#> 2 0.176 7.28e- 8 roc_auc binary 0.871 30 0.00525 Preprocessor1_Mode…\n#> 3 14.9 3.93e- 4 roc_auc binary 0.916 30 0.00497 Preprocessor1_Mode…\n#> 4 5.51 2.10e- 3 roc_auc binary 0.960 30 0.00378 Preprocessor1_Mode…\n#> 5 1.87 3.53e- 7 roc_auc binary 0.871 30 0.00524 Preprocessor1_Mode…\n#> 6 0.00719 1.45e- 5 roc_auc binary 0.871 30 0.00534 Preprocessor1_Mode…\n#> 7 0.00114 8.41e- 2 roc_auc binary 0.966 30 0.00301 Preprocessor1_Mode…\n#> 8 0.950 1.74e- 1 roc_auc binary 0.979 30 0.00204 Preprocessor1_Mode…\n#> 9 0.189 3.13e- 6 roc_auc binary 0.871 30 0.00536 Preprocessor1_Mode…\n#> 10 0.0364 4.96e- 9 roc_auc binary 0.871 30 0.00537 Preprocessor1_Mode…\n```\n:::\n\n\nThe top combinations are:\n\n\n::: {.cell layout-align=\"center\" hash='cache/sorted-metrics_f9e61627272474db2c62116db39a02c6'}\n\n```{.r .cell-code}\nshow_best(formula_res, metric = \"roc_auc\")\n#> # A tibble: 5 × 8\n#> cost rbf_sigma .metric .estimator mean n std_err .config \n#> \n#> 1 0.950 0.174 roc_auc binary 0.979 30 0.00204 Preprocessor1_Model…\n#> 2 0.00114 0.0841 roc_auc binary 0.966 30 0.00301 Preprocessor1_Model…\n#> 3 5.51 0.00210 roc_auc binary 0.960 30 0.00378 Preprocessor1_Model…\n#> 4 14.9 0.000393 roc_auc binary 0.916 30 0.00497 Preprocessor1_Model…\n#> 5 0.00719 0.0000145 roc_auc binary 0.871 30 0.00534 Preprocessor1_Model…\n```\n:::\n\n\n## Executing with a recipe\n\nNext, we can use the same syntax but pass a *recipe* in as the pre-processor argument:\n\n\n::: {.cell layout-align=\"center\" hash='cache/recipe_890acb94821e471582dc23a1823d3b03'}\n\n```{.r .cell-code}\nset.seed(325)\nrecipe_res <-\n svm_mod %>% \n tune_grid(\n iono_rec,\n resamples = iono_rs,\n metrics = roc_vals,\n control = ctrl\n )\nrecipe_res\n#> # Tuning results\n#> # Bootstrap sampling \n#> # A tibble: 30 × 5\n#> splits id .metrics .notes .predictions\n#> \n#> 1 Bootstrap01 \n#> 2 Bootstrap02 \n#> 3 Bootstrap03 \n#> 4 Bootstrap04 \n#> 5 Bootstrap05 \n#> 6 Bootstrap06 \n#> 7 Bootstrap07 \n#> 8 Bootstrap08 \n#> 9 Bootstrap09 \n#> 10 Bootstrap10 \n#> # ℹ 20 more rows\n```\n:::\n\n\nThe best setting here is:\n\n\n::: {.cell layout-align=\"center\" hash='cache/best-rec_d64b1cb94969614f0d931378ba119298'}\n\n```{.r .cell-code}\nshow_best(recipe_res, metric = \"roc_auc\")\n#> # A tibble: 5 × 8\n#> cost rbf_sigma .metric .estimator mean n std_err .config \n#> \n#> 1 15.6 0.182 roc_auc binary 0.981 30 0.00213 Preprocessor1_Model04\n#> 2 0.385 0.0276 roc_auc binary 0.978 30 0.00222 Preprocessor1_Model03\n#> 3 0.143 0.00243 roc_auc binary 0.930 30 0.00443 Preprocessor1_Model06\n#> 4 0.841 0.000691 roc_auc binary 0.892 30 0.00504 Preprocessor1_Model07\n#> 5 0.0499 0.0000335 roc_auc binary 0.872 30 0.00521 Preprocessor1_Model08\n```\n:::\n\n\n## Out-of-sample predictions\n\nIf we used `save_pred = TRUE` to keep the out-of-sample predictions for each resample during tuning, we can obtain those predictions, along with the tuning parameters and resample identifier, using `collect_predictions()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rec-preds_7c8bd3092e13cf7052f2ed1428302f59'}\n\n```{.r .cell-code}\ncollect_predictions(recipe_res)\n#> # A tibble: 38,740 × 8\n#> id .pred_bad .pred_good .row cost rbf_sigma Class .config \n#> \n#> 1 Bootstrap01 0.333 0.667 1 0.00296 0.00000383 good Preprocessor…\n#> 2 Bootstrap01 0.333 0.667 9 0.00296 0.00000383 good Preprocessor…\n#> 3 Bootstrap01 0.333 0.667 10 0.00296 0.00000383 bad Preprocessor…\n#> 4 Bootstrap01 0.333 0.667 12 0.00296 0.00000383 bad Preprocessor…\n#> 5 Bootstrap01 0.333 0.667 14 0.00296 0.00000383 bad Preprocessor…\n#> 6 Bootstrap01 0.333 0.667 15 0.00296 0.00000383 good Preprocessor…\n#> 7 Bootstrap01 0.333 0.667 16 0.00296 0.00000383 bad Preprocessor…\n#> 8 Bootstrap01 0.334 0.666 22 0.00296 0.00000383 bad Preprocessor…\n#> 9 Bootstrap01 0.333 0.667 23 0.00296 0.00000383 good Preprocessor…\n#> 10 Bootstrap01 0.334 0.666 24 0.00296 0.00000383 bad Preprocessor…\n#> # ℹ 38,730 more rows\n```\n:::\n\n\nWe can obtain the hold-out sets for all the resamples augmented with the predictions using `augment()`, which provides opportunities for flexible visualization of model results:\n\n\n::: {.cell layout-align=\"center\" hash='cache/augment-preds_614bc71af7c867a0d948052868406aa5'}\n\n```{.r .cell-code}\naugment(recipe_res) %>%\n ggplot(aes(V3, .pred_good, color = Class)) +\n geom_point(show.legend = FALSE) +\n facet_wrap(~Class)\n```\n\n::: {.cell-output-display}\n![](figs/augment-preds-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> kernlab * 0.9-32 2023-01-31 [1] CRAN (R 4.3.0)\n#> mlbench * 2.1-3.1 2023-05-05 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/learn/work/tune-text/index/execute-results/html.json b/_freeze/learn/work/tune-text/index/execute-results/html.json new file mode 100644 index 00000000..2b6e280f --- /dev/null +++ b/_freeze/learn/work/tune-text/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "b9991a9b6f389fbd23afc288db128f49", + "result": { + "markdown": "---\ntitle: \"Tuning text models\"\ncategories:\n - model tuning\n - text analysis\n - logistic regression\n - Bayesian optimization\n - extracting results\n\ntype: learn-subsection\nweight: 4\ndescription: | \n Prepare text data for predictive modeling and tune with both grid and iterative search.\ntoc: true\ntoc-depth: 2\ninclude-after-body: ../../../resources.html\n---\n\n\n\n\n\n\n## Introduction\n\nTo use code in this article, you will need to install the following packages: stopwords, textfeatures, textrecipes, and tidymodels.\n\nThis article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on.\n\n\n## Text as data\n\nThe text data we'll use in this article are from Amazon: \n\n> This dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review.\n\nThis article uses a small subset of the total reviews [available at the original source](https://snap.stanford.edu/data/web-FineFoods.html). We sampled a single review from 5,000 random products and allocated 80% of these data to the training set, with the remaining 1,000 reviews held out for the test set. \n\nThere is a column for the product, a column for the text of the review, and a factor column for the outcome variable. The outcome is whether the reviewer gave the product a five-star rating or not.\n\n\n::: {.cell layout-align=\"center\" hash='cache/data_2f1bbe66683be79af71aa7f4f8934a47'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\n\ndata(\"small_fine_foods\")\ntraining_data\n#> # A tibble: 4,000 × 3\n#> product review score\n#> \n#> 1 B000J0LSBG \"this stuff is not stuffing its not good at all save yo… other\n#> 2 B000EYLDYE \"I absolutely LOVE this dried fruit. LOVE IT. Whenever I … great\n#> 3 B0026LIO9A \"GREAT DEAL, CONVENIENT TOO. Much cheaper than WalMart and… great\n#> 4 B00473P8SK \"Great flavor, we go through a ton of this sauce! I discove… great\n#> 5 B001SAWTNM \"This is excellent salsa/hot sauce, but you can get it for … great\n#> 6 B000FAG90U \"Again, this is the best dogfood out there. One suggestion… great\n#> 7 B006BXTCEK \"The box I received was filled with teas, hot chocolates, a… other\n#> 8 B002GWH5OY \"This is delicious coffee which compares favorably with muc… great\n#> 9 B003R0MFYY \"Don't let these little tiny cans fool you. They pack a lo… great\n#> 10 B001EO5ZXI \"One of the nicest, smoothest cup of chai I've made. Nice m… great\n#> # ℹ 3,990 more rows\n```\n:::\n\n\nOur modeling goal is to create modeling features from the text of the reviews to predict whether the review was five-star or not.\n\n## Inputs for the search\n\nText, perhaps more so than tabular data we often deal with, must be heavily processed to be used as predictor data for modeling. There are multiple ways to process and prepare text for modeling; let's add several steps together to create different kinds of features:\n\n* Create an initial set of count-based features, such as the number of words, spaces, lower- or uppercase characters, URLs, and so on; we can use the [textfeatures](https://github.com/mkearney/textfeatures) package for this.\n\n* [Tokenize](https://smltar.com/tokenization.html) the text (i.e. break the text into smaller components such as words).\n\n* Remove stop words such as \"the\", \"an\", \"of\", etc.\n\n* [Stem](https://smltar.com/stemming.html) tokens to a common root where possible.\n\n* Convert tokens to dummy variables via a [signed, binary hash function](https://bookdown.org/max/FES/encoding-predictors-with-many-categories.html).\n\n* Optionally transform non-token features (the count-based features like number of lowercase characters) to a more symmetric state using a [Yeo-Johnson transformation](https://bookdown.org/max/FES/numeric-one-to-one.html).\n\n* Remove predictors with a single distinct value.\n\n* Center and scale all predictors. \n\n\n::: {.callout-note}\n We will end up with two kinds of features:\n\n- dummy/indicator variables for the count-based features like number of digits or punctuation characters \n- hash features for the tokens like \"salsa\" or \"delicious\". \n:::\n\nSome of these preprocessing steps (such as stemming) may or may not be good ideas but a full discussion of their effects is beyond the scope of this article. In this preprocessing approach, the main tuning parameter is the number of hashing features to use. \n\nBefore we start building our preprocessing recipe, we need some helper objects. For example, for the Yeo-Johnson transformation, we need to know the set of count-based text features: \n\n\n::: {.cell layout-align=\"center\" hash='cache/feat-list_a3cc8d87ca8582b63d4b5e0ea7d50a40'}\n\n```{.r .cell-code}\nlibrary(textfeatures)\n\nbasics <- names(textfeatures:::count_functions)\nhead(basics)\n#> [1] \"n_words\" \"n_uq_words\" \"n_charS\" \"n_uq_charS\" \"n_digits\" \n#> [6] \"n_hashtags\"\n```\n:::\n\n\nAlso, the implementation of feature hashes does not produce the binary values we need. This small function will help convert the scores to values of -1, 0, or 1:\n\n\n::: {.cell layout-align=\"center\" hash='cache/hash-func_68c1b2409ad7177ecbf965914132a07f'}\n\n```{.r .cell-code}\nbinary_hash <- function(x) {\n x <- ifelse(x < 0, -1, x)\n x <- ifelse(x > 0, 1, x)\n x\n}\n```\n:::\n\n\nNow, let's put this all together in one recipe:\n\n\n::: {.cell layout-align=\"center\" hash='cache/text-rec_978533f723b1d81e1af61f8fc9331f63'}\n\n```{.r .cell-code}\nlibrary(textrecipes)\n\npre_proc <-\n recipe(score ~ product + review, data = training_data) %>%\n # Do not use the product ID as a predictor\n update_role(product, new_role = \"id\") %>%\n # Make a copy of the raw text\n step_mutate(review_raw = review) %>%\n # Compute the initial features. This removes the `review_raw` column\n step_textfeature(review_raw) %>%\n # Make the feature names shorter\n step_rename_at(\n starts_with(\"textfeature_\"),\n fn = ~ gsub(\"textfeature_review_raw_\", \"\", .)\n ) %>%\n step_tokenize(review) %>%\n step_stopwords(review) %>%\n step_stem(review) %>%\n # Here is where the tuning parameter is declared\n step_texthash(review, signed = TRUE, num_terms = tune()) %>%\n # Simplify these names\n step_rename_at(starts_with(\"review_hash\"), fn = ~ gsub(\"review_\", \"\", .)) %>%\n # Convert the features from counts to values of -1, 0, or 1\n step_mutate_at(starts_with(\"hash\"), fn = binary_hash) %>%\n # Transform the initial feature set\n step_YeoJohnson(one_of(!!basics)) %>%\n step_zv(all_predictors()) %>%\n step_normalize(all_predictors())\n```\n:::\n\n\n::: {.callout-warning}\n Note that, when objects from the global environment are used, they are injected into the step objects via `!!`. For some parallel processing technologies, these objects may not be found by the worker processes. \n:::\n\nThe preprocessing recipe is long and complex (often typical for working with text data) but the model we'll use is more straightforward. Let's stick with a regularized logistic regression model: \n\n\n::: {.cell layout-align=\"center\" hash='cache/lr_0247cf0e53ca56c2bf301c584128f50b'}\n\n```{.r .cell-code}\nlr_mod <-\n logistic_reg(penalty = tune(), mixture = tune()) %>%\n set_engine(\"glmnet\")\n```\n:::\n\n\nThere are three tuning parameters for this data analysis:\n\n- `num_terms`, the number of feature hash variables to create\n- `penalty`, the amount of regularization for the model\n- `mixture`, the proportion of L1 regularization\n\n## Resampling\n\nThere are enough data here so that 10-fold resampling would hold out 400 reviews at a time to estimate performance. Performance estimates using this many observations have sufficiently low noise to measure and tune models. \n\n\n::: {.cell layout-align=\"center\" hash='cache/folds_84bb63416d335dc1c5d533767c1bbfae'}\n\n```{.r .cell-code}\nset.seed(8935)\nfolds <- vfold_cv(training_data)\nfolds\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 2\n#> splits id \n#> \n#> 1 Fold01\n#> 2 Fold02\n#> 3 Fold03\n#> 4 Fold04\n#> 5 Fold05\n#> 6 Fold06\n#> 7 Fold07\n#> 8 Fold08\n#> 9 Fold09\n#> 10 Fold10\n```\n:::\n\n\n## Grid search\n\nLet's begin our tuning with [grid search](https://www.tidymodels.org/learn/work/tune-svm/) and a regular grid. For glmnet models, evaluating penalty values is fairly cheap because of the use of the [\"submodel-trick\"](https://tune.tidymodels.org/articles/extras/optimizations.html#sub-model-speed-ups-1). The grid will use 20 penalty values, 5 mixture values, and 3 values for the number of hash features. \n\n\n::: {.cell layout-align=\"center\" hash='cache/grid_3838262ac4d091f3c7ff6eb49828c662'}\n\n```{.r .cell-code}\nfive_star_grid <- \n crossing(\n penalty = 10^seq(-3, 0, length = 20),\n mixture = c(0.01, 0.25, 0.50, 0.75, 1),\n num_terms = 2^c(8, 10, 12)\n )\nfive_star_grid\n#> # A tibble: 300 × 3\n#> penalty mixture num_terms\n#> \n#> 1 0.001 0.01 256\n#> 2 0.001 0.01 1024\n#> 3 0.001 0.01 4096\n#> 4 0.001 0.25 256\n#> 5 0.001 0.25 1024\n#> 6 0.001 0.25 4096\n#> 7 0.001 0.5 256\n#> 8 0.001 0.5 1024\n#> 9 0.001 0.5 4096\n#> 10 0.001 0.75 256\n#> # ℹ 290 more rows\n```\n:::\n\n\nNote that, for each resample, the (computationally expensive) text preprocessing recipe is only prepped 6 times. This increases the efficiency of the analysis by avoiding redundant work. \n\nLet's save information on the number of predictors by penalty value for each glmnet model. This can help us understand how many features were used across the penalty values. Use an extraction function to do this:\n\n\n::: {.cell layout-align=\"center\" hash='cache/extract_e3809319a2c9be6bdbad00461fca19ab'}\n\n```{.r .cell-code}\nglmnet_vars <- function(x) {\n # `x` will be a workflow object\n mod <- extract_model(x)\n # `df` is the number of model terms for each penalty value\n tibble(penalty = mod$lambda, num_vars = mod$df)\n}\n\nctrl <- control_grid(extract = glmnet_vars, verbose = TRUE)\n```\n:::\n\n\nFinally, let's run the grid search:\n\n\n::: {.cell layout-align=\"center\" hash='cache/grid-search_0cf4fb4f284aa63f66f3d1ec7a031f0a'}\n\n```{.r .cell-code}\nroc_scores <- metric_set(roc_auc)\n\nset.seed(1559)\nfive_star_glmnet <- \n tune_grid(\n lr_mod, \n pre_proc, \n resamples = folds, \n grid = five_star_grid, \n metrics = roc_scores, \n control = ctrl\n )\n\nfive_star_glmnet\n#> # Tuning results\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 5\n#> splits id .metrics .notes .extracts\n#> \n#> 1 Fold01 \n#> 2 Fold02 \n#> 3 Fold03 \n#> 4 Fold04 \n#> 5 Fold05 \n#> 6 Fold06 \n#> 7 Fold07 \n#> 8 Fold08 \n#> 9 Fold09 \n#> 10 Fold10 \n#> \n#> There were issues with some computations:\n#> \n#> - Warning(s) x10: `extract_model()` was deprecated in tune 0.1.6. ℹ Please use `ext...\n#> \n#> Run `show_notes(.Last.tune.result)` for more information.\n```\n:::\n\n\nThis took a while to complete! What do the results look like? Let's get the resampling estimates of the area under the ROC curve for each tuning parameter:\n\n\n::: {.cell layout-align=\"center\" hash='cache/grid-roc_4b2e3dd820f99c05a480b4aef94671a6'}\n\n```{.r .cell-code}\ngrid_roc <- \n collect_metrics(five_star_glmnet) %>% \n arrange(desc(mean))\ngrid_roc\n#> # A tibble: 300 × 9\n#> penalty mixture num_terms .metric .estimator mean n std_err .config \n#> \n#> 1 0.695 0.01 4096 roc_auc binary 0.811 10 0.00799 Preprocesso…\n#> 2 0.483 0.01 4096 roc_auc binary 0.811 10 0.00797 Preprocesso…\n#> 3 0.0379 0.25 4096 roc_auc binary 0.809 10 0.00755 Preprocesso…\n#> 4 0.0183 0.5 4096 roc_auc binary 0.807 10 0.00776 Preprocesso…\n#> 5 0.0264 0.25 4096 roc_auc binary 0.807 10 0.00792 Preprocesso…\n#> 6 0.0127 0.75 4096 roc_auc binary 0.807 10 0.00773 Preprocesso…\n#> 7 0.336 0.01 4096 roc_auc binary 0.806 10 0.00781 Preprocesso…\n#> 8 0.00886 1 4096 roc_auc binary 0.806 10 0.00783 Preprocesso…\n#> 9 1 0.01 4096 roc_auc binary 0.806 10 0.00801 Preprocesso…\n#> 10 0.0546 0.25 4096 roc_auc binary 0.805 10 0.00783 Preprocesso…\n#> # ℹ 290 more rows\n```\n:::\n\n\nThe best results have a fairly high penalty value and focus on the ridge penalty (i.e. no feature selection via the lasso's L1 penalty). The best solutions also use the largest number of hashing features. \n\nWhat is the relationship between performance and the tuning parameters? \n\n\n::: {.cell layout-align=\"center\" hash='cache/grid-plot_2399bf692a8e4d2328208a07997b49b2'}\n\n```{.r .cell-code}\nautoplot(five_star_glmnet, metric = \"roc_auc\")\n```\n\n::: {.cell-output-display}\n![](figs/grid-plot-1.svg){fig-align='center' width=960}\n:::\n:::\n\n\n- We can definitely see that performance improves with the number of features included. In this article, we've used a small sample of the overall data set available. When more data are used, an even larger feature set is optimal. \n\n- The profiles with larger mixture values (greater than 0.01) have steep drop-offs in performance. What's that about? Those are cases where the lasso penalty is removing too many (and perhaps all) features from the model. \n- The panel with at least 4096 features shows that there are several parameter combinations that have about the same performance; there isn't much difference between the best performance for the different mixture values. A case could be made that we should choose a _larger_ mixture value and a _smaller_ penalty to select a simpler model that contains fewer predictors. \n\n- If more experimentation were conducted, a larger set of features (more than 4096) should also be considered. \n\nWe'll come back to the extracted glmnet components at the end of this article. \n\n## Directed search\n\nWhat if we had started with Bayesian optimization? Would a good set of conditions have been found more efficiently? \n\nLet's pretend that we haven't seen the grid search results. We'll initialize the Gaussian process model with five tuning parameter combinations chosen with a space-filling design. \n\nIt might be good to use a custom `dials` object for the number of hash terms. The default object, `num_terms()`, uses a linear range and tries to set the upper bound of the parameter using the data. Instead, let's create a parameter set, change the scale to be `log2`, and define the same range as was used in grid search. \n\n\n::: {.cell layout-align=\"center\" hash='cache/hash-range_5c3e9069c3bbfe07585df1aad0c95ccd'}\n\n```{.r .cell-code}\nhash_range <- num_terms(c(8, 12), trans = log2_trans())\nhash_range\n#> # Model Terms (quantitative)\n#> Transformer: log-2 [1e-100, Inf]\n#> Range (transformed scale): [8, 12]\n```\n:::\n\n\nTo use this, we have to merge the recipe and `parsnip` model object into a workflow:\n\n\n::: {.cell layout-align=\"center\" hash='cache/wflow_4cdae3dcac28070282bd045f58521a73'}\n\n```{.r .cell-code}\nfive_star_wflow <-\n workflow() %>%\n add_recipe(pre_proc) %>%\n add_model(lr_mod)\n```\n:::\n\n\nThen we can extract and manipulate the corresponding parameter set:\n\n\n::: {.cell layout-align=\"center\" hash='cache/search-set_834d2c2e8b20ceabbc2d5433ca14283b'}\n\n```{.r .cell-code}\nfive_star_set <-\n five_star_wflow %>%\n parameters() %>%\n update(\n num_terms = hash_range, \n penalty = penalty(c(-3, 0)),\n mixture = mixture(c(0.05, 1.00))\n )\n#> Warning: `parameters.workflow()` was deprecated in tune 0.1.6.9003.\n#> ℹ Please use `hardhat::extract_parameter_set_dials()` instead.\n```\n:::\n\n\nThis is passed to the search function via the `param_info` argument. \n\nThe initial rounds of search can be biased more towards exploration of the parameter space (as opposed to staying near the current best results). If expected improvement is used as the acquisition function, the trade-off value can be slowly moved from exploration to exploitation over iterations (see the tune vignette on [acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html) for more details). The tune package has a built-in function called `expo_decay()` that can help accomplish this:\n\n\n::: {.cell layout-align=\"center\" hash='cache/decay_1566d0cfde06065be0655ed022d6b242'}\n\n```{.r .cell-code}\ntrade_off_decay <- function(iter) {\n expo_decay(iter, start_val = .01, limit_val = 0, slope = 1/4)\n}\n```\n:::\n\n\nUsing these values, let's run the search:\n\n\n::: {.cell layout-align=\"center\" hash='cache/search_a43f097c89fb60dda84a9ad516642765'}\n\n```{.r .cell-code}\nset.seed(12)\nfive_star_search <-\n tune_bayes(\n five_star_wflow, \n resamples = folds,\n param_info = five_star_set,\n initial = 5,\n iter = 30,\n metrics = roc_scores,\n objective = exp_improve(trade_off_decay),\n control = control_bayes(verbose_iter = TRUE)\n )\n#> Optimizing roc_auc using the expected improvement with variable trade-off\n#> values.\n#> ! No improvement for 10 iterations; returning current results.\n\nfive_star_search\n#> # Tuning results\n#> # 10-fold cross-validation \n#> # A tibble: 290 × 5\n#> splits id .metrics .notes .iter\n#> \n#> 1 Fold01 0\n#> 2 Fold02 0\n#> 3 Fold03 0\n#> 4 Fold04 0\n#> 5 Fold05 0\n#> 6 Fold06 0\n#> 7 Fold07 0\n#> 8 Fold08 0\n#> 9 Fold09 0\n#> 10 Fold10 0\n#> # ℹ 280 more rows\n```\n:::\n\n\nThese results show some improvement over the initial set. One issue is that so many settings are sub-optimal (as shown in the plot above for grid search) so there are poor results periodically. There are regions where the penalty parameter becomes too large and all of the predictors are removed from the model. These regions are also dependent on the number of terms. There is a fairly narrow ridge (sorry, pun intended!) where good performance can be achieved. Using more iterations would probably result in the search finding better results. \nLet's look at a plot of model performance versus the search iterations:\n\n\n::: {.cell layout-align=\"center\" hash='cache/iter-plot_d5313073f7a96fe8c51a5b2dafd0d3e7'}\n\n```{.r .cell-code}\nautoplot(five_star_search, type = \"performance\")\n```\n\n::: {.cell-output-display}\n![](figs/iter-plot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\n::: {.callout-note}\nWhat would we do if we knew about the grid search results and wanted to try directed, iterative search? We would restrict the range for the number of hash features to be larger (especially with more data). We might also restrict the penalty and mixture parameters to have a lower upper bound. \n:::\n\n## Extracted results\n\nLet's return to the grid search results and examine the results of our `extract` function. For each _fitted model_, a tibble was saved that contains the relationship between the number of predictors and the penalty value. Let's look at these results for the best model:\n\n\n::: {.cell layout-align=\"center\" hash='cache/best-res_7925dd0d3ff872cb0d860d10ea6bcb4a'}\n\n```{.r .cell-code}\nparams <- select_best(five_star_glmnet, metric = \"roc_auc\")\nparams\n#> # A tibble: 1 × 4\n#> penalty mixture num_terms .config \n#> \n#> 1 0.695 0.01 4096 Preprocessor3_Model019\n```\n:::\n\n\nRecall that we saved the glmnet results in a tibble. The column `five_star_glmnet$.extracts` is a list of tibbles. As an example, the first element of the list is:\n\n\n::: {.cell layout-align=\"center\" hash='cache/first-elem_8cd46baaa8cf8d495d7df862edef8f5e'}\n\n```{.r .cell-code}\nfive_star_glmnet$.extracts[[1]]\n#> # A tibble: 300 × 5\n#> num_terms penalty mixture .extracts .config \n#> \n#> 1 256 1 0.01 Preprocessor1_Model001\n#> 2 256 1 0.01 Preprocessor1_Model002\n#> 3 256 1 0.01 Preprocessor1_Model003\n#> 4 256 1 0.01 Preprocessor1_Model004\n#> 5 256 1 0.01 Preprocessor1_Model005\n#> 6 256 1 0.01 Preprocessor1_Model006\n#> 7 256 1 0.01 Preprocessor1_Model007\n#> 8 256 1 0.01 Preprocessor1_Model008\n#> 9 256 1 0.01 Preprocessor1_Model009\n#> 10 256 1 0.01 Preprocessor1_Model010\n#> # ℹ 290 more rows\n```\n:::\n\n\nMore nested tibbles! Let's `unnest()` the `five_star_glmnet$.extracts` column:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnest_e4298bf23fcc5af43fa8ca89245bd802'}\n\n```{.r .cell-code}\nlibrary(tidyr)\nextracted <- \n five_star_glmnet %>% \n dplyr::select(id, .extracts) %>% \n unnest(cols = .extracts)\nextracted\n#> # A tibble: 3,000 × 6\n#> id num_terms penalty mixture .extracts .config \n#> \n#> 1 Fold01 256 1 0.01 Preprocessor1_Model001\n#> 2 Fold01 256 1 0.01 Preprocessor1_Model002\n#> 3 Fold01 256 1 0.01 Preprocessor1_Model003\n#> 4 Fold01 256 1 0.01 Preprocessor1_Model004\n#> 5 Fold01 256 1 0.01 Preprocessor1_Model005\n#> 6 Fold01 256 1 0.01 Preprocessor1_Model006\n#> 7 Fold01 256 1 0.01 Preprocessor1_Model007\n#> 8 Fold01 256 1 0.01 Preprocessor1_Model008\n#> 9 Fold01 256 1 0.01 Preprocessor1_Model009\n#> 10 Fold01 256 1 0.01 Preprocessor1_Model010\n#> # ℹ 2,990 more rows\n```\n:::\n\n\nOne thing to realize here is that `tune_grid()` [may not fit all of the models](https://tune.tidymodels.org/articles/extras/optimizations.html) that are evaluated. In this case, for each value of `mixture` and `num_terms`, the model is fit over _all_ penalty values (this is a feature of this particular model and is not generally true for other engines). To select the best parameter set, we can exclude the `penalty` column in `extracted`:\n\n\n\n::: {.cell layout-align=\"center\" hash='cache/select-best_0a942b1c95c1973b89862c3a300c6449'}\n\n```{.r .cell-code}\nextracted <- \n extracted %>% \n dplyr::select(-penalty) %>% \n inner_join(params, by = c(\"num_terms\", \"mixture\")) %>% \n # Now remove it from the final results\n dplyr::select(-penalty)\nextracted\n#> # A tibble: 200 × 6\n#> id num_terms mixture .extracts .config.x .config.y \n#> \n#> 1 Fold01 4096 0.01 Preprocessor3_Model001 Preproces…\n#> 2 Fold01 4096 0.01 Preprocessor3_Model002 Preproces…\n#> 3 Fold01 4096 0.01 Preprocessor3_Model003 Preproces…\n#> 4 Fold01 4096 0.01 Preprocessor3_Model004 Preproces…\n#> 5 Fold01 4096 0.01 Preprocessor3_Model005 Preproces…\n#> 6 Fold01 4096 0.01 Preprocessor3_Model006 Preproces…\n#> 7 Fold01 4096 0.01 Preprocessor3_Model007 Preproces…\n#> 8 Fold01 4096 0.01 Preprocessor3_Model008 Preproces…\n#> 9 Fold01 4096 0.01 Preprocessor3_Model009 Preproces…\n#> 10 Fold01 4096 0.01 Preprocessor3_Model010 Preproces…\n#> # ℹ 190 more rows\n```\n:::\n\n\nNow we can get at the results that we want using another `unnest()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/final-unnest_cd0a261a2e4163a0dd61251c39d73aba'}\n\n```{.r .cell-code}\nextracted <- \n extracted %>% \n unnest(col = .extracts) # <- these contain a `penalty` column\nextracted\n#> # A tibble: 20,000 × 7\n#> id num_terms mixture penalty num_vars .config.x .config.y \n#> \n#> 1 Fold01 4096 0.01 8.60 0 Preprocessor3_Model001 Preprocesso…\n#> 2 Fold01 4096 0.01 8.21 2 Preprocessor3_Model001 Preprocesso…\n#> 3 Fold01 4096 0.01 7.84 2 Preprocessor3_Model001 Preprocesso…\n#> 4 Fold01 4096 0.01 7.48 3 Preprocessor3_Model001 Preprocesso…\n#> 5 Fold01 4096 0.01 7.14 3 Preprocessor3_Model001 Preprocesso…\n#> 6 Fold01 4096 0.01 6.82 3 Preprocessor3_Model001 Preprocesso…\n#> 7 Fold01 4096 0.01 6.51 4 Preprocessor3_Model001 Preprocesso…\n#> 8 Fold01 4096 0.01 6.21 6 Preprocessor3_Model001 Preprocesso…\n#> 9 Fold01 4096 0.01 5.93 7 Preprocessor3_Model001 Preprocesso…\n#> 10 Fold01 4096 0.01 5.66 7 Preprocessor3_Model001 Preprocesso…\n#> # ℹ 19,990 more rows\n```\n:::\n\n\nLet's look at a plot of these results (per resample):\n\n\n::: {.cell layout-align=\"center\" hash='cache/var-plot_616d52ac7a85c22f87e414b707232dcb'}\n\n```{.r .cell-code}\nggplot(extracted, aes(x = penalty, y = num_vars)) + \n geom_line(aes(group = id, col = id), alpha = .5) + \n ylab(\"Number of retained predictors\") + \n scale_x_log10() + \n ggtitle(paste(\"mixture = \", params$mixture, \"and\", params$num_terms, \"features\")) + \n theme(legend.position = \"none\")\n```\n\n::: {.cell-output-display}\n![](figs/var-plot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThese results might help guide the choice of the `penalty` range if more optimization was conducted. \n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> stopwords * 2.3 2021-10-28 [1] CRAN (R 4.3.0)\n#> textfeatures * 0.3.3 2019-09-03 [1] CRAN (R 4.3.0)\n#> textrecipes * 1.0.3 2023-04-14 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/site_libs/clipboard/clipboard.min.js b/_freeze/site_libs/clipboard/clipboard.min.js new file mode 100644 index 00000000..1103f811 --- /dev/null +++ b/_freeze/site_libs/clipboard/clipboard.min.js @@ -0,0 +1,7 @@ +/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1 123,456,666.7890 +var markInterval = function(d, digits, interval, mark, decMark, precision) { + x = precision ? d.toPrecision(digits) : d.toFixed(digits); + if (!/^-?[\d.]+$/.test(x)) return x; + var xv = x.split('.'); + if (xv.length > 2) return x; // should have at most one decimal point + xv[0] = xv[0].replace(new RegExp('\\B(?=(\\d{' + interval + '})+(?!\\d))', 'g'), mark); + return xv.join(decMark); +}; + +DTWidget.formatCurrency = function(data, currency, digits, interval, mark, decMark, before, zeroPrint) { + var d = parseFloat(data); + if (isNaN(d)) return ''; + if (zeroPrint !== null && d === 0.0) return zeroPrint; + var res = markInterval(d, digits, interval, mark, decMark); + res = before ? (/^-/.test(res) ? '-' + currency + res.replace(/^-/, '') : currency + res) : + res + currency; + return res; +}; + +DTWidget.formatString = function(data, prefix, suffix) { + var d = data; + if (d === null) return ''; + return prefix + d + suffix; +}; + +DTWidget.formatPercentage = function(data, digits, interval, mark, decMark, zeroPrint) { + var d = parseFloat(data); + if (isNaN(d)) return ''; + if (zeroPrint !== null && d === 0.0) return zeroPrint; + return markInterval(d * 100, digits, interval, mark, decMark) + '%'; +}; + +DTWidget.formatRound = function(data, digits, interval, mark, decMark, zeroPrint) { + var d = parseFloat(data); + if (isNaN(d)) return ''; + if (zeroPrint !== null && d === 0.0) return zeroPrint; + return markInterval(d, digits, interval, mark, decMark); +}; + +DTWidget.formatSignif = function(data, digits, interval, mark, decMark, zeroPrint) { + var d = parseFloat(data); + if (isNaN(d)) return ''; + if (zeroPrint !== null && d === 0.0) return zeroPrint; + return markInterval(d, digits, interval, mark, decMark, true); +}; + +DTWidget.formatDate = function(data, method, params) { + var d = data; + if (d === null) return ''; + // (new Date('2015-10-28')).toDateString() may return 2015-10-27 because the + // actual time created could be like 'Tue Oct 27 2015 19:00:00 GMT-0500 (CDT)', + // i.e. the date-only string is treated as UTC time instead of local time + if ((method === 'toDateString' || method === 'toLocaleDateString') && /^\d{4,}\D\d{2}\D\d{2}$/.test(d)) { + d = d.split(/\D/); + d = new Date(d[0], d[1] - 1, d[2]); + } else { + d = new Date(d); + } + return d[method].apply(d, params); +}; + +window.DTWidget = DTWidget; + +// A helper function to update the properties of existing filters +var setFilterProps = function(td, props) { + // Update enabled/disabled state + var $input = $(td).find('input').first(); + var searchable = $input.data('searchable'); + $input.prop('disabled', !searchable || props.disabled); + + // Based on the filter type, set its new values + var type = td.getAttribute('data-type'); + if (['factor', 'logical'].includes(type)) { + // Reformat the new dropdown options for use with selectize + var new_vals = props.params.options.map(function(item) { + return { text: item, value: item }; + }); + + // Find the selectize object + var dropdown = $(td).find('.selectized').eq(0)[0].selectize; + + // Note the current values + var old_vals = dropdown.getValue(); + + // Remove the existing values + dropdown.clearOptions(); + + // Add the new options + dropdown.addOption(new_vals); + + // Preserve the existing values + dropdown.setValue(old_vals); + + } else if (['number', 'integer', 'date', 'time'].includes(type)) { + // Apply internal scaling to new limits. Updating scale not yet implemented. + var slider = $(td).find('.noUi-target').eq(0); + var scale = Math.pow(10, Math.max(0, +slider.data('scale') || 0)); + var new_vals = [props.params.min * scale, props.params.max * scale]; + + // Note what the new limits will be just for this filter + var new_lims = new_vals.slice(); + + // Determine the current values and limits + var old_vals = slider.val().map(Number); + var old_lims = slider.noUiSlider('options').range; + old_lims = [old_lims.min, old_lims.max]; + + // Preserve the current values if filters have been applied; otherwise, apply no filtering + if (old_vals[0] != old_lims[0]) { + new_vals[0] = Math.max(old_vals[0], new_vals[0]); + } + + if (old_vals[1] != old_lims[1]) { + new_vals[1] = Math.min(old_vals[1], new_vals[1]); + } + + // Update the endpoints of the slider + slider.noUiSlider({ + start: new_vals, + range: {'min': new_lims[0], 'max': new_lims[1]} + }, true); + } +}; + +var transposeArray2D = function(a) { + return a.length === 0 ? a : HTMLWidgets.transposeArray2D(a); +}; + +var crosstalkPluginsInstalled = false; + +function maybeInstallCrosstalkPlugins() { + if (crosstalkPluginsInstalled) + return; + crosstalkPluginsInstalled = true; + + $.fn.dataTable.ext.afnFiltering.push( + function(oSettings, aData, iDataIndex) { + var ctfilter = oSettings.nTable.ctfilter; + if (ctfilter && !ctfilter[iDataIndex]) + return false; + + var ctselect = oSettings.nTable.ctselect; + if (ctselect && !ctselect[iDataIndex]) + return false; + + return true; + } + ); +} + +HTMLWidgets.widget({ + name: "datatables", + type: "output", + renderOnNullValue: true, + initialize: function(el, width, height) { + // in order that the type=number inputs return a number + $.valHooks.number = { + get: function(el) { + var value = parseFloat(el.value); + return isNaN(value) ? "" : value; + } + }; + $(el).html(' '); + return { + data: null, + ctfilterHandle: new crosstalk.FilterHandle(), + ctfilterSubscription: null, + ctselectHandle: new crosstalk.SelectionHandle(), + ctselectSubscription: null + }; + }, + renderValue: function(el, data, instance) { + if (el.offsetWidth === 0 || el.offsetHeight === 0) { + instance.data = data; + return; + } + instance.data = null; + var $el = $(el); + $el.empty(); + + if (data === null) { + $el.append(' '); + // clear previous Shiny inputs (if any) + for (var i in instance.clearInputs) instance.clearInputs[i](); + instance.clearInputs = {}; + return; + } + + var crosstalkOptions = data.crosstalkOptions; + if (!crosstalkOptions) crosstalkOptions = { + 'key': null, 'group': null + }; + if (crosstalkOptions.group) { + maybeInstallCrosstalkPlugins(); + instance.ctfilterHandle.setGroup(crosstalkOptions.group); + instance.ctselectHandle.setGroup(crosstalkOptions.group); + } + + // if we are in the viewer then we always want to fillContainer and + // and autoHideNavigation (unless the user has explicitly set these) + if (window.HTMLWidgets.viewerMode) { + if (!data.hasOwnProperty("fillContainer")) + data.fillContainer = true; + if (!data.hasOwnProperty("autoHideNavigation")) + data.autoHideNavigation = true; + } + + // propagate fillContainer to instance (so we have it in resize) + instance.fillContainer = data.fillContainer; + + var cells = data.data; + + if (cells instanceof Array) cells = transposeArray2D(cells); + + $el.append(data.container); + var $table = $el.find('table'); + if (data.class) $table.addClass(data.class); + if (data.caption) $table.prepend(data.caption); + + if (!data.selection) data.selection = { + mode: 'none', selected: null, target: 'row', selectable: null + }; + if (HTMLWidgets.shinyMode && data.selection.mode !== 'none' && + data.selection.target === 'row+column') { + if ($table.children('tfoot').length === 0) { + $table.append($('')); + $table.find('thead tr').clone().appendTo($table.find('tfoot')); + } + } + + // column filters + var filterRow; + switch (data.filter) { + case 'top': + $table.children('thead').append(data.filterHTML); + filterRow = $table.find('thead tr:last td'); + break; + case 'bottom': + if ($table.children('tfoot').length === 0) { + $table.append($('')); + } + $table.children('tfoot').prepend(data.filterHTML); + filterRow = $table.find('tfoot tr:first td'); + break; + } + + var options = { searchDelay: 1000 }; + if (cells !== null) $.extend(options, { + data: cells + }); + + // options for fillContainer + var bootstrapActive = typeof($.fn.popover) != 'undefined'; + if (instance.fillContainer) { + + // force scrollX/scrollY and turn off autoWidth + options.scrollX = true; + options.scrollY = "100px"; // can be any value, we'll adjust below + + // if we aren't paginating then move around the info/filter controls + // to save space at the bottom and rephrase the info callback + if (data.options.paging === false) { + + // we know how to do this cleanly for bootstrap, not so much + // for other themes/layouts + if (bootstrapActive) { + options.dom = "<'row'<'col-sm-4'i><'col-sm-8'f>>" + + "<'row'<'col-sm-12'tr>>"; + } + + options.fnInfoCallback = function(oSettings, iStart, iEnd, + iMax, iTotal, sPre) { + return Number(iTotal).toLocaleString() + " records"; + }; + } + } + + // auto hide navigation if requested + // Note, this only works on client-side processing mode as on server-side, + // cells (data.data) is null; In addition, we require the pageLength option + // being provided explicitly to enable this. Despite we may be able to deduce + // the default value of pageLength, it may complicate things so we'd rather + // put this responsiblity to users and warn them on the R side. + if (data.autoHideNavigation === true && data.options.paging !== false) { + // strip all nav if length >= cells + if ((cells instanceof Array) && data.options.pageLength >= cells.length) + options.dom = bootstrapActive ? "<'row'<'col-sm-12'tr>>" : "t"; + // alternatively lean things out for flexdashboard mobile portrait + else if (bootstrapActive && window.FlexDashboard && window.FlexDashboard.isMobilePhone()) + options.dom = "<'row'<'col-sm-12'f>>" + + "<'row'<'col-sm-12'tr>>" + + "<'row'<'col-sm-12'p>>"; + } + + $.extend(true, options, data.options || {}); + + var searchCols = options.searchCols; + if (searchCols) { + searchCols = searchCols.map(function(x) { + return x === null ? '' : x.search; + }); + // FIXME: this means I don't respect the escapeRegex setting + delete options.searchCols; + } + + // server-side processing? + var server = options.serverSide === true; + + // use the dataSrc function to pre-process JSON data returned from R + var DT_rows_all = [], DT_rows_current = []; + if (server && HTMLWidgets.shinyMode && typeof options.ajax === 'object' && + /^session\/[\da-z]+\/dataobj/.test(options.ajax.url) && !options.ajax.dataSrc) { + options.ajax.dataSrc = function(json) { + DT_rows_all = $.makeArray(json.DT_rows_all); + DT_rows_current = $.makeArray(json.DT_rows_current); + var data = json.data; + if (!colReorderEnabled()) return data; + var table = $table.DataTable(), order = table.colReorder.order(), flag = true, i, j, row; + for (i = 0; i < order.length; ++i) if (order[i] !== i) flag = false; + if (flag) return data; + for (i = 0; i < data.length; ++i) { + row = data[i].slice(); + for (j = 0; j < order.length; ++j) data[i][j] = row[order[j]]; + } + return data; + }; + } + + var thiz = this; + if (instance.fillContainer) $table.on('init.dt', function(e) { + thiz.fillAvailableHeight(el, $(el).innerHeight()); + }); + // If the page contains serveral datatables and one of which enables colReorder, + // the table.colReorder.order() function will exist but throws error when called. + // So it seems like the only way to know if colReorder is enabled or not is to + // check the options. + var colReorderEnabled = function() { return "colReorder" in options; }; + var table = $table.DataTable(options); + $el.data('datatable', table); + + // Unregister previous Crosstalk event subscriptions, if they exist + if (instance.ctfilterSubscription) { + instance.ctfilterHandle.off("change", instance.ctfilterSubscription); + instance.ctfilterSubscription = null; + } + if (instance.ctselectSubscription) { + instance.ctselectHandle.off("change", instance.ctselectSubscription); + instance.ctselectSubscription = null; + } + + if (!crosstalkOptions.group) { + $table[0].ctfilter = null; + $table[0].ctselect = null; + } else { + var key = crosstalkOptions.key; + function keysToMatches(keys) { + if (!keys) { + return null; + } else { + var selectedKeys = {}; + for (var i = 0; i < keys.length; i++) { + selectedKeys[keys[i]] = true; + } + var matches = {}; + for (var j = 0; j < key.length; j++) { + if (selectedKeys[key[j]]) + matches[j] = true; + } + return matches; + } + } + + function applyCrosstalkFilter(e) { + $table[0].ctfilter = keysToMatches(e.value); + table.draw(); + } + instance.ctfilterSubscription = instance.ctfilterHandle.on("change", applyCrosstalkFilter); + applyCrosstalkFilter({value: instance.ctfilterHandle.filteredKeys}); + + function applyCrosstalkSelection(e) { + if (e.sender !== instance.ctselectHandle) { + table + .rows('.' + selClass, {search: 'applied'}) + .nodes() + .to$() + .removeClass(selClass); + if (selectedRows) + changeInput('rows_selected', selectedRows(), void 0, true); + } + + if (e.sender !== instance.ctselectHandle && e.value && e.value.length) { + var matches = keysToMatches(e.value); + + // persistent selection with plotly (& leaflet) + var ctOpts = crosstalk.var("plotlyCrosstalkOpts").get() || {}; + if (ctOpts.persistent === true) { + var matches = $.extend(matches, $table[0].ctselect); + } + + $table[0].ctselect = matches; + table.draw(); + } else { + if ($table[0].ctselect) { + $table[0].ctselect = null; + table.draw(); + } + } + } + instance.ctselectSubscription = instance.ctselectHandle.on("change", applyCrosstalkSelection); + // TODO: This next line doesn't seem to work when renderDataTable is used + applyCrosstalkSelection({value: instance.ctselectHandle.value}); + } + + var inArray = function(val, array) { + return $.inArray(val, $.makeArray(array)) > -1; + }; + + // search the i-th column + var searchColumn = function(i, value) { + var regex = false, ci = true; + if (options.search) { + regex = options.search.regex, + ci = options.search.caseInsensitive !== false; + } + return table.column(i).search(value, regex, !regex, ci); + }; + + if (data.filter !== 'none') { + + filterRow.each(function(i, td) { + + var $td = $(td), type = $td.data('type'), filter; + var $input = $td.children('div').first().children('input'); + var disabled = $input.prop('disabled'); + var searchable = table.settings()[0].aoColumns[i].bSearchable; + $input.prop('disabled', !searchable || disabled); + $input.data('searchable', searchable); // for updating later + $input.on('input blur', function() { + $input.next('span').toggle(Boolean($input.val())); + }); + // Bootstrap sets pointer-events to none and we won't be able to click + // the clear button + $input.next('span').css('pointer-events', 'auto').hide().click(function() { + $(this).hide().prev('input').val('').trigger('input').focus(); + }); + var searchCol; // search string for this column + if (searchCols && searchCols[i]) { + searchCol = searchCols[i]; + $input.val(searchCol).trigger('input'); + } + var $x = $td.children('div').last(); + + // remove the overflow: hidden attribute of the scrollHead + // (otherwise the scrolling table body obscures the filters) + // The workaround and the discussion from + // https://github.com/rstudio/DT/issues/554#issuecomment-518007347 + // Otherwise the filter selection will not be anchored to the values + // when the columns number is many and scrollX is enabled. + var scrollHead = $(el).find('.dataTables_scrollHead,.dataTables_scrollFoot'); + var cssOverflowHead = scrollHead.css('overflow'); + var scrollBody = $(el).find('.dataTables_scrollBody'); + var cssOverflowBody = scrollBody.css('overflow'); + var scrollTable = $(el).find('.dataTables_scroll'); + var cssOverflowTable = scrollTable.css('overflow'); + if (cssOverflowHead === 'hidden') { + $x.on('show hide', function(e) { + if (e.type === 'show') { + scrollHead.css('overflow', 'visible'); + scrollBody.css('overflow', 'visible'); + scrollTable.css('overflow-x', 'scroll'); + } else { + scrollHead.css('overflow', cssOverflowHead); + scrollBody.css('overflow', cssOverflowBody); + scrollTable.css('overflow-x', cssOverflowTable); + } + }); + $x.css('z-index', 25); + } + + if (inArray(type, ['factor', 'logical'])) { + $input.on({ + click: function() { + $input.parent().hide(); $x.show().trigger('show'); filter[0].selectize.focus(); + }, + input: function() { + if ($input.val() === '') filter[0].selectize.setValue([]); + } + }); + var $input2 = $x.children('select'); + filter = $input2.selectize({ + options: $input2.data('options').map(function(v, i) { + return ({text: v, value: v}); + }), + plugins: ['remove_button'], + hideSelected: true, + onChange: function(value) { + if (value === null) value = []; // compatibility with jQuery 3.0 + $input.val(value.length ? JSON.stringify(value) : ''); + if (value.length) $input.trigger('input'); + $input.attr('title', $input.val()); + if (server) { + table.column(i).search(value.length ? JSON.stringify(value) : '').draw(); + return; + } + // turn off filter if nothing selected + $td.data('filter', value.length > 0); + table.draw(); // redraw table, and filters will be applied + } + }); + if (searchCol) filter[0].selectize.setValue(JSON.parse(searchCol)); + filter[0].selectize.on('blur', function() { + $x.hide().trigger('hide'); $input.parent().show(); $input.trigger('blur'); + }); + filter.next('div').css('margin-bottom', 'auto'); + } else if (type === 'character') { + var fun = function() { + searchColumn(i, $input.val()).draw(); + }; + if (server) { + fun = $.fn.dataTable.util.throttle(fun, options.searchDelay); + } + $input.on('input', fun); + } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { + var $x0 = $x; + $x = $x0.children('div').first(); + $x0.css({ + 'background-color': '#fff', + 'border': '1px #ddd solid', + 'border-radius': '4px', + 'padding': data.vertical ? '35px 20px': '20px 20px 10px 20px' + }); + var $spans = $x0.children('span').css({ + 'margin-top': data.vertical ? '0' : '10px', + 'white-space': 'nowrap' + }); + var $span1 = $spans.first(), $span2 = $spans.last(); + var r1 = +$x.data('min'), r2 = +$x.data('max'); + // when the numbers are too small or have many decimal places, the + // slider may have numeric precision problems (#150) + var scale = Math.pow(10, Math.max(0, +$x.data('scale') || 0)); + r1 = Math.round(r1 * scale); r2 = Math.round(r2 * scale); + var scaleBack = function(x, scale) { + if (scale === 1) return x; + var d = Math.round(Math.log(scale) / Math.log(10)); + // to avoid problems like 3.423/100 -> 0.034230000000000003 + return (x / scale).toFixed(d); + }; + var slider_min = function() { + return filter.noUiSlider('options').range.min; + }; + var slider_max = function() { + return filter.noUiSlider('options').range.max; + }; + $input.on({ + focus: function() { + $x0.show().trigger('show'); + // first, make sure the slider div leaves at least 20px between + // the two (slider value) span's + $x0.width(Math.max(160, $span1.outerWidth() + $span2.outerWidth() + 20)); + // then, if the input is really wide or slider is vertical, + // make the slider the same width as the input + if ($x0.outerWidth() < $input.outerWidth() || data.vertical) { + $x0.outerWidth($input.outerWidth()); + } + // make sure the slider div does not reach beyond the right margin + if ($(window).width() < $x0.offset().left + $x0.width()) { + $x0.offset({ + 'left': $input.offset().left + $input.outerWidth() - $x0.outerWidth() + }); + } + }, + blur: function() { + $x0.hide().trigger('hide'); + }, + input: function() { + if ($input.val() === '') filter.val([slider_min(), slider_max()]); + }, + change: function() { + var v = $input.val().replace(/\s/g, ''); + if (v === '') return; + v = v.split('...'); + if (v.length !== 2) { + $input.parent().addClass('has-error'); + return; + } + if (v[0] === '') v[0] = slider_min(); + if (v[1] === '') v[1] = slider_max(); + $input.parent().removeClass('has-error'); + // treat date as UTC time at midnight + var strTime = function(x) { + var s = type === 'date' ? 'T00:00:00Z' : ''; + var t = new Date(x + s).getTime(); + // add 10 minutes to date since it does not hurt the date, and + // it helps avoid the tricky floating point arithmetic problems, + // e.g. sometimes the date may be a few milliseconds earlier + // than the midnight due to precision problems in noUiSlider + return type === 'date' ? t + 3600000 : t; + }; + if (inArray(type, ['date', 'time'])) { + v[0] = strTime(v[0]); + v[1] = strTime(v[1]); + } + if (v[0] != slider_min()) v[0] *= scale; + if (v[1] != slider_max()) v[1] *= scale; + filter.val(v); + } + }); + var formatDate = function(d, isoFmt) { + d = scaleBack(d, scale); + if (type === 'number') return d; + if (type === 'integer') return parseInt(d); + var x = new Date(+d); + var fmt = ('filterDateFmt' in data) ? data.filterDateFmt[i] : undefined; + if (fmt !== undefined && isoFmt === false) return x[fmt.method].apply(x, fmt.params); + if (type === 'date') { + var pad0 = function(x) { + return ('0' + x).substr(-2, 2); + }; + return x.getUTCFullYear() + '-' + pad0(1 + x.getUTCMonth()) + + '-' + pad0(x.getUTCDate()); + } else { + return x.toISOString(); + } + }; + var opts = type === 'date' ? { step: 60 * 60 * 1000 } : + type === 'integer' ? { step: 1 } : {}; + + opts.orientation = data.vertical ? 'vertical': 'horizontal'; + opts.direction = data.vertical ? 'rtl': 'ltr'; + + filter = $x.noUiSlider($.extend({ + start: [r1, r2], + range: {min: r1, max: r2}, + connect: true + }, opts)); + if (scale > 1) (function() { + var t1 = r1, t2 = r2; + var val = filter.val(); + while (val[0] > r1 || val[1] < r2) { + if (val[0] > r1) { + t1 -= val[0] - r1; + } + if (val[1] < r2) { + t2 += r2 - val[1]; + } + filter = $x.noUiSlider($.extend({ + start: [t1, t2], + range: {min: t1, max: t2}, + connect: true + }, opts), true); + val = filter.val(); + } + r1 = t1; r2 = t2; + })(); + var updateSliderText = function(v1, v2) { + $span1.text(formatDate(v1, false)); $span2.text(formatDate(v2, false)); + }; + updateSliderText(r1, r2); + var updateSlider = function(e) { + var val = filter.val(); + // turn off filter if in full range + $td.data('filter', val[0] > slider_min() || val[1] < slider_max()); + var v1 = formatDate(val[0]), v2 = formatDate(val[1]), ival; + if ($td.data('filter')) { + ival = v1 + ' ... ' + v2; + $input.attr('title', ival).val(ival).trigger('input'); + } else { + $input.attr('title', '').val(''); + } + updateSliderText(val[0], val[1]); + if (e.type === 'slide') return; // no searching when sliding only + if (server) { + table.column(i).search($td.data('filter') ? ival : '').draw(); + return; + } + table.draw(); + }; + filter.on({ + set: updateSlider, + slide: updateSlider + }); + } + + // server-side processing will be handled by R (or whatever server + // language you use); the following code is only needed for client-side + // processing + if (server) { + // if a search string has been pre-set, search now + if (searchCol) searchColumn(i, searchCol).draw(); + return; + } + + var customFilter = function(settings, data, dataIndex) { + // there is no way to attach a search function to a specific table, + // and we need to make sure a global search function is not applied to + // all tables (i.e. a range filter in a previous table should not be + // applied to the current table); we use the settings object to + // determine if we want to perform searching on the current table, + // since settings.sTableId will be different to different tables + if (table.settings()[0] !== settings) return true; + // no filter on this column or no need to filter this column + if (typeof filter === 'undefined' || !$td.data('filter')) return true; + + var r = filter.val(), v, r0, r1; + var i_data = function(i) { + if (!colReorderEnabled()) return i; + var order = table.colReorder.order(), k; + for (k = 0; k < order.length; ++k) if (order[k] === i) return k; + return i; // in theory it will never be here... + } + v = data[i_data(i)]; + if (type === 'number' || type === 'integer') { + v = parseFloat(v); + // how to handle NaN? currently exclude these rows + if (isNaN(v)) return(false); + r0 = parseFloat(scaleBack(r[0], scale)) + r1 = parseFloat(scaleBack(r[1], scale)); + if (v >= r0 && v <= r1) return true; + } else if (type === 'date' || type === 'time') { + v = new Date(v); + r0 = new Date(r[0] / scale); r1 = new Date(r[1] / scale); + if (v >= r0 && v <= r1) return true; + } else if (type === 'factor') { + if (r.length === 0 || inArray(v, r)) return true; + } else if (type === 'logical') { + if (r.length === 0) return true; + if (inArray(v === '' ? 'na' : v, r)) return true; + } + return false; + }; + + $.fn.dataTable.ext.search.push(customFilter); + + // search for the preset search strings if it is non-empty + if (searchCol) { + if (inArray(type, ['factor', 'logical'])) { + filter[0].selectize.setValue(JSON.parse(searchCol)); + } else if (type === 'character') { + $input.trigger('input'); + } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { + $input.trigger('change'); + } + } + + }); + + } + + // highlight search keywords + var highlight = function() { + var body = $(table.table().body()); + // removing the old highlighting first + body.unhighlight(); + + // don't highlight the "not found" row, so we get the rows using the api + if (table.rows({ filter: 'applied' }).data().length === 0) return; + // highlight global search keywords + body.highlight($.trim(table.search()).split(/\s+/)); + // then highlight keywords from individual column filters + if (filterRow) filterRow.each(function(i, td) { + var $td = $(td), type = $td.data('type'); + if (type !== 'character') return; + var $input = $td.children('div').first().children('input'); + var column = table.column(i).nodes().to$(), + val = $.trim($input.val()); + if (type !== 'character' || val === '') return; + column.highlight(val.split(/\s+/)); + }); + }; + + if (options.searchHighlight) { + table + .on('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth', highlight) + .on('destroy', function() { + // remove event handler + table.off('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth'); + }); + + // Set the option for escaping regex characters in our search string. This will be used + // for all future matching. + jQuery.fn.highlight.options.escapeRegex = (!options.search || !options.search.regex); + + // initial highlight for state saved conditions and initial states + highlight(); + } + + // run the callback function on the table instance + if (typeof data.callback === 'function') data.callback(table); + + // double click to edit the cell, row, column, or all cells + if (data.editable) table.on('dblclick.dt', 'tbody td', function(e) { + // only bring up the editor when the cell itself is dbclicked, and ignore + // other dbclick events bubbled up (e.g. from the ) + if (e.target !== this) return; + var target = [], immediate = false; + switch (data.editable.target) { + case 'cell': + target = [this]; + immediate = true; // edit will take effect immediately + break; + case 'row': + target = table.cells(table.cell(this).index().row, '*').nodes(); + break; + case 'column': + target = table.cells('*', table.cell(this).index().column).nodes(); + break; + case 'all': + target = table.cells().nodes(); + break; + default: + throw 'The editable parameter must be "cell", "row", "column", or "all"'; + } + var disableCols = data.editable.disable ? data.editable.disable.columns : null; + var numericCols = data.editable.numeric; + var areaCols = data.editable.area; + for (var i = 0; i < target.length; i++) { + (function(cell, current) { + var $cell = $(cell), html = $cell.html(); + var _cell = table.cell(cell), value = _cell.data(), index = _cell.index().column; + var $input; + if (inArray(index, numericCols)) { + $input = $(''); + } else if (inArray(index, areaCols)) { + $input = $(''); + } else { + $input = $(''); + } + if (!immediate) { + $cell.data('input', $input).data('html', html); + $input.attr('title', 'Hit Ctrl+Enter to finish editing, or Esc to cancel'); + } + $input.val(value); + if (inArray(index, disableCols)) { + $input.attr('readonly', '').css('filter', 'invert(25%)'); + } + $cell.empty().append($input); + if (cell === current) $input.focus(); + $input.css('width', '100%'); + + if (immediate) $input.on('blur', function(e) { + var valueNew = $input.val(); + if (valueNew !== value) { + _cell.data(valueNew); + if (HTMLWidgets.shinyMode) { + changeInput('cell_edit', [cellInfo(cell)], 'DT.cellInfo', null, {priority: 'event'}); + } + // for server-side processing, users have to call replaceData() to update the table + if (!server) table.draw(false); + } else { + $cell.html(html); + } + }).on('keyup', function(e) { + // hit Escape to cancel editing + if (e.keyCode === 27) $input.trigger('blur'); + }); + + // bulk edit (row, column, or all) + if (!immediate) $input.on('keyup', function(e) { + var removeInput = function($cell, restore) { + $cell.data('input').remove(); + if (restore) $cell.html($cell.data('html')); + } + if (e.keyCode === 27) { + for (var i = 0; i < target.length; i++) { + removeInput($(target[i]), true); + } + } else if (e.keyCode === 13 && e.ctrlKey) { + // Ctrl + Enter + var cell, $cell, _cell, cellData = []; + for (var i = 0; i < target.length; i++) { + cell = target[i]; $cell = $(cell); _cell = table.cell(cell); + _cell.data($cell.data('input').val()); + HTMLWidgets.shinyMode && cellData.push(cellInfo(cell)); + removeInput($cell, false); + } + if (HTMLWidgets.shinyMode) { + changeInput('cell_edit', cellData, 'DT.cellInfo', null, {priority: "event"}); + } + if (!server) table.draw(false); + } + }); + })(target[i], this); + } + }); + + // interaction with shiny + if (!HTMLWidgets.shinyMode && !crosstalkOptions.group) return; + + var methods = {}; + var shinyData = {}; + + methods.updateCaption = function(caption) { + if (!caption) return; + $table.children('caption').replaceWith(caption); + } + + // register clear functions to remove input values when the table is removed + instance.clearInputs = {}; + + var changeInput = function(id, value, type, noCrosstalk, opts) { + var event = id; + id = el.id + '_' + id; + if (type) id = id + ':' + type; + // do not update if the new value is the same as old value + if (event !== 'cell_edit' && !/_clicked$/.test(event) && shinyData.hasOwnProperty(id) && shinyData[id] === JSON.stringify(value)) + return; + shinyData[id] = JSON.stringify(value); + if (HTMLWidgets.shinyMode && Shiny.setInputValue) { + Shiny.setInputValue(id, value, opts); + if (!instance.clearInputs[id]) instance.clearInputs[id] = function() { + Shiny.setInputValue(id, null); + } + } + + // HACK + if (event === "rows_selected" && !noCrosstalk) { + if (crosstalkOptions.group) { + var keys = crosstalkOptions.key; + var selectedKeys = null; + if (value) { + selectedKeys = []; + for (var i = 0; i < value.length; i++) { + // The value array's contents use 1-based row numbers, so we must + // convert to 0-based before indexing into the keys array. + selectedKeys.push(keys[value[i] - 1]); + } + } + instance.ctselectHandle.set(selectedKeys); + } + } + }; + + var addOne = function(x) { + return x.map(function(i) { return 1 + i; }); + }; + + var unique = function(x) { + var ux = []; + $.each(x, function(i, el){ + if ($.inArray(el, ux) === -1) ux.push(el); + }); + return ux; + } + + // change the row index of a cell + var tweakCellIndex = function(cell) { + var info = cell.index(); + // some cell may not be valid. e.g, #759 + // when using the RowGroup extension, datatables will + // generate the row label and the cells are not part of + // the data thus contain no row/col info + if (info === undefined) + return {row: null, col: null}; + if (server) { + info.row = DT_rows_current[info.row]; + } else { + info.row += 1; + } + return {row: info.row, col: info.column}; + } + + var cleanSelectedValues = function() { + changeInput('rows_selected', []); + changeInput('columns_selected', []); + changeInput('cells_selected', transposeArray2D([]), 'shiny.matrix'); + } + // #828 we should clean the selection on the server-side when the table reloads + cleanSelectedValues(); + + // a flag to indicates if select extension is initialized or not + var flagSelectExt = table.settings()[0]._select !== undefined; + // the Select extension should only be used in the client mode and + // when the selection.mode is set to none + if (data.selection.mode === 'none' && !server && flagSelectExt) { + var updateRowsSelected = function() { + var rows = table.rows({selected: true}); + var selected = []; + $.each(rows.indexes().toArray(), function(i, v) { + selected.push(v + 1); + }); + changeInput('rows_selected', selected); + } + var updateColsSelected = function() { + var columns = table.columns({selected: true}); + changeInput('columns_selected', columns.indexes().toArray()); + } + var updateCellsSelected = function() { + var cells = table.cells({selected: true}); + var selected = []; + cells.every(function() { + var row = this.index().row; + var col = this.index().column; + selected = selected.concat([[row + 1, col]]); + }); + changeInput('cells_selected', transposeArray2D(selected), 'shiny.matrix'); + } + table.on('select deselect', function(e, dt, type, indexes) { + updateRowsSelected(); + updateColsSelected(); + updateCellsSelected(); + }) + } + + var selMode = data.selection.mode, selTarget = data.selection.target; + var selDisable = data.selection.selectable === false; + if (inArray(selMode, ['single', 'multiple'])) { + var selClass = inArray(data.style, ['bootstrap', 'bootstrap4']) ? 'active' : 'selected'; + // selected1: row indices; selected2: column indices + var initSel = function(x) { + if (x === null || typeof x === 'boolean' || selTarget === 'cell') { + return {rows: [], cols: []}; + } else if (selTarget === 'row') { + return {rows: $.makeArray(x), cols: []}; + } else if (selTarget === 'column') { + return {rows: [], cols: $.makeArray(x)}; + } else if (selTarget === 'row+column') { + return {rows: $.makeArray(x.rows), cols: $.makeArray(x.cols)}; + } + } + var selected = data.selection.selected; + var selected1 = initSel(selected).rows, selected2 = initSel(selected).cols; + // selectable should contain either all positive or all non-positive values, not both + // positive values indicate "selectable" while non-positive values means "nonselectable" + // the assertion is performed on R side. (only column indicides could be zero which indicates + // the row name) + var selectable = data.selection.selectable; + var selectable1 = initSel(selectable).rows, selectable2 = initSel(selectable).cols; + + // After users reorder the rows or filter the table, we cannot use the table index + // directly. Instead, we need this function to find out the rows between the two clicks. + // If user filter the table again between the start click and the end click, the behavior + // would be undefined, but it should not be a problem. + var shiftSelRowsIndex = function(start, end) { + var indexes = server ? DT_rows_all : table.rows({ search: 'applied' }).indexes().toArray(); + start = indexes.indexOf(start); end = indexes.indexOf(end); + // if start is larger than end, we need to swap + if (start > end) { + var tmp = end; end = start; start = tmp; + } + return indexes.slice(start, end + 1); + } + + var serverRowIndex = function(clientRowIndex) { + return server ? DT_rows_current[clientRowIndex] : clientRowIndex + 1; + } + + // row, column, or cell selection + var lastClickedRow; + if (inArray(selTarget, ['row', 'row+column'])) { + // Get the current selected rows. It will also + // update the selected1's value based on the current row selection state + // Note we can't put this function inside selectRows() directly, + // the reason is method.selectRows() will override selected1's value but this + // function will add rows to selected1 (keep the existing selection), which is + // inconsistent with column and cell selection. + var selectedRows = function() { + var rows = table.rows('.' + selClass); + var idx = rows.indexes().toArray(); + if (!server) { + selected1 = addOne(idx); + return selected1; + } + idx = idx.map(function(i) { + return DT_rows_current[i]; + }); + selected1 = selMode === 'multiple' ? unique(selected1.concat(idx)) : idx; + return selected1; + } + // Change selected1's value based on selectable1, then refresh the row state + var onlyKeepSelectableRows = function() { + if (selDisable) { // users can't select; useful when only want backend select + selected1 = []; + return; + } + if (selectable1.length === 0) return; + var nonselectable = selectable1[0] <= 0; + if (nonselectable) { + // should make selectable1 positive + selected1 = $(selected1).not(selectable1.map(function(i) { return -i; })).get(); + } else { + selected1 = $(selected1).filter(selectable1).get(); + } + } + // Change selected1's value based on selectable1, then + // refresh the row selection state according to values in selected1 + var selectRows = function(ignoreSelectable) { + if (!ignoreSelectable) onlyKeepSelectableRows(); + table.$('tr.' + selClass).removeClass(selClass); + if (selected1.length === 0) return; + if (server) { + table.rows({page: 'current'}).every(function() { + if (inArray(DT_rows_current[this.index()], selected1)) { + $(this.node()).addClass(selClass); + } + }); + } else { + var selected0 = selected1.map(function(i) { return i - 1; }); + $(table.rows(selected0).nodes()).addClass(selClass); + } + } + table.on('mousedown.dt', 'tbody tr', function(e) { + var $this = $(this), thisRow = table.row(this); + if (selMode === 'multiple') { + if (e.shiftKey && lastClickedRow !== undefined) { + // select or de-select depends on the last clicked row's status + var flagSel = !$this.hasClass(selClass); + var crtClickedRow = serverRowIndex(thisRow.index()); + if (server) { + var rowsIndex = shiftSelRowsIndex(lastClickedRow, crtClickedRow); + // update current page's selClass + rowsIndex.map(function(i) { + var rowIndex = DT_rows_current.indexOf(i); + if (rowIndex >= 0) { + var row = table.row(rowIndex).nodes().to$(); + var flagRowSel = !row.hasClass(selClass); + if (flagSel === flagRowSel) row.toggleClass(selClass); + } + }); + // update selected1 + if (flagSel) { + selected1 = unique(selected1.concat(rowsIndex)); + } else { + selected1 = selected1.filter(function(index) { + return !inArray(index, rowsIndex); + }); + } + } else { + // js starts from 0 + shiftSelRowsIndex(lastClickedRow - 1, crtClickedRow - 1).map(function(value) { + var row = table.row(value).nodes().to$(); + var flagRowSel = !row.hasClass(selClass); + if (flagSel === flagRowSel) row.toggleClass(selClass); + }); + } + e.preventDefault(); + } else { + $this.toggleClass(selClass); + } + } else { + if ($this.hasClass(selClass)) { + $this.removeClass(selClass); + } else { + table.$('tr.' + selClass).removeClass(selClass); + $this.addClass(selClass); + } + } + if (server && !$this.hasClass(selClass)) { + var id = DT_rows_current[thisRow.index()]; + // remove id from selected1 since its class .selected has been removed + if (inArray(id, selected1)) selected1.splice($.inArray(id, selected1), 1); + } + selectedRows(); // update selected1's value based on selClass + selectRows(false); // only keep the selectable rows + changeInput('rows_selected', selected1); + changeInput('row_last_clicked', serverRowIndex(thisRow.index()), null, null, {priority: 'event'}); + lastClickedRow = serverRowIndex(thisRow.index()); + }); + selectRows(false); // in case users have specified pre-selected rows + // restore selected rows after the table is redrawn (e.g. sort/search/page); + // client-side tables will preserve the selections automatically; for + // server-side tables, we have to *real* row indices are in `selected1` + changeInput('rows_selected', selected1); + if (server) table.on('draw.dt', function(e) { selectRows(false); }); + methods.selectRows = function(selected, ignoreSelectable) { + selected1 = $.makeArray(selected); + selectRows(ignoreSelectable); + changeInput('rows_selected', selected1); + } + } + + if (inArray(selTarget, ['column', 'row+column'])) { + if (selTarget === 'row+column') { + $(table.columns().footer()).css('cursor', 'pointer'); + } + // update selected2's value based on selectable2 + var onlyKeepSelectableCols = function() { + if (selDisable) { // users can't select; useful when only want backend select + selected2 = []; + return; + } + if (selectable2.length === 0) return; + var nonselectable = selectable2[0] <= 0; + if (nonselectable) { + // need to make selectable2 positive + selected2 = $(selected2).not(selectable2.map(function(i) { return -i; })).get(); + } else { + selected2 = $(selected2).filter(selectable2).get(); + } + } + // update selected2 and then + // refresh the col selection state according to values in selected2 + var selectCols = function(ignoreSelectable) { + if (!ignoreSelectable) onlyKeepSelectableCols(); + // if selected2 is not a valide index (e.g., larger than the column number) + // table.columns(selected2) will fail and result in a blank table + // this is different from the table.rows(), where the out-of-range indexes + // doesn't affect at all + selected2 = $(selected2).filter(table.columns().indexes()).get(); + table.columns().nodes().flatten().to$().removeClass(selClass); + if (selected2.length > 0) + table.columns(selected2).nodes().flatten().to$().addClass(selClass); + } + var callback = function() { + var colIdx = selTarget === 'column' ? table.cell(this).index().column : + $.inArray(this, table.columns().footer()), + thisCol = $(table.column(colIdx).nodes()); + if (colIdx === -1) return; + if (thisCol.hasClass(selClass)) { + thisCol.removeClass(selClass); + selected2.splice($.inArray(colIdx, selected2), 1); + } else { + if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); + thisCol.addClass(selClass); + selected2 = selMode === 'single' ? [colIdx] : unique(selected2.concat([colIdx])); + } + selectCols(false); // update selected2 based on selectable + changeInput('columns_selected', selected2); + } + if (selTarget === 'column') { + $(table.table().body()).on('click.dt', 'td', callback); + } else { + $(table.table().footer()).on('click.dt', 'tr th', callback); + } + selectCols(false); // in case users have specified pre-selected columns + changeInput('columns_selected', selected2); + if (server) table.on('draw.dt', function(e) { selectCols(false); }); + methods.selectColumns = function(selected, ignoreSelectable) { + selected2 = $.makeArray(selected); + selectCols(ignoreSelectable); + changeInput('columns_selected', selected2); + } + } + + if (selTarget === 'cell') { + var selected3 = [], selectable3 = []; + if (selected !== null) selected3 = selected; + if (selectable !== null && typeof selectable !== 'boolean') selectable3 = selectable; + var findIndex = function(ij, sel) { + for (var i = 0; i < sel.length; i++) { + if (ij[0] === sel[i][0] && ij[1] === sel[i][1]) return i; + } + return -1; + } + // Change selected3's value based on selectable3, then refresh the cell state + var onlyKeepSelectableCells = function() { + if (selDisable) { // users can't select; useful when only want backend select + selected3 = []; + return; + } + if (selectable3.length === 0) return; + var nonselectable = selectable3[0][0] <= 0; + var out = []; + if (nonselectable) { + selected3.map(function(ij) { + // should make selectable3 positive + if (findIndex([-ij[0], -ij[1]], selectable3) === -1) { out.push(ij); } + }); + } else { + selected3.map(function(ij) { + if (findIndex(ij, selectable3) > -1) { out.push(ij); } + }); + } + selected3 = out; + } + // Change selected3's value based on selectable3, then + // refresh the cell selection state according to values in selected3 + var selectCells = function(ignoreSelectable) { + if (!ignoreSelectable) onlyKeepSelectableCells(); + table.$('td.' + selClass).removeClass(selClass); + if (selected3.length === 0) return; + if (server) { + table.cells({page: 'current'}).every(function() { + var info = tweakCellIndex(this); + if (findIndex([info.row, info.col], selected3) > -1) + $(this.node()).addClass(selClass); + }); + } else { + selected3.map(function(ij) { + $(table.cell(ij[0] - 1, ij[1]).node()).addClass(selClass); + }); + } + }; + table.on('click.dt', 'tbody td', function() { + var $this = $(this), info = tweakCellIndex(table.cell(this)); + if ($this.hasClass(selClass)) { + $this.removeClass(selClass); + selected3.splice(findIndex([info.row, info.col], selected3), 1); + } else { + if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); + $this.addClass(selClass); + selected3 = selMode === 'single' ? [[info.row, info.col]] : + unique(selected3.concat([[info.row, info.col]])); + } + selectCells(false); // must call this to update selected3 based on selectable3 + changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); + }); + selectCells(false); // in case users have specified pre-selected columns + changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); + + if (server) table.on('draw.dt', function(e) { selectCells(false); }); + methods.selectCells = function(selected, ignoreSelectable) { + selected3 = selected ? selected : []; + selectCells(ignoreSelectable); + changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); + } + } + } + + // expose some table info to Shiny + var updateTableInfo = function(e, settings) { + // TODO: is anyone interested in the page info? + // changeInput('page_info', table.page.info()); + var updateRowInfo = function(id, modifier) { + var idx; + if (server) { + idx = modifier.page === 'current' ? DT_rows_current : DT_rows_all; + } else { + var rows = table.rows($.extend({ + search: 'applied', + page: 'all' + }, modifier)); + idx = addOne(rows.indexes().toArray()); + } + changeInput('rows' + '_' + id, idx); + }; + updateRowInfo('current', {page: 'current'}); + updateRowInfo('all', {}); + } + table.on('draw.dt', updateTableInfo); + updateTableInfo(); + + // state info + table.on('draw.dt column-visibility.dt', function() { + changeInput('state', table.state()); + }); + changeInput('state', table.state()); + + // search info + var updateSearchInfo = function() { + changeInput('search', table.search()); + if (filterRow) changeInput('search_columns', filterRow.toArray().map(function(td) { + return $(td).find('input').first().val(); + })); + } + table.on('draw.dt', updateSearchInfo); + updateSearchInfo(); + + var cellInfo = function(thiz) { + var info = tweakCellIndex(table.cell(thiz)); + info.value = table.cell(thiz).data(); + return info; + } + // the current cell clicked on + table.on('click.dt', 'tbody td', function() { + changeInput('cell_clicked', cellInfo(this), null, null, {priority: 'event'}); + }) + changeInput('cell_clicked', {}); + + // do not trigger table selection when clicking on links unless they have classes + table.on('click.dt', 'tbody td a', function(e) { + if (this.className === '') e.stopPropagation(); + }); + + methods.addRow = function(data, rowname, resetPaging) { + var n = table.columns().indexes().length, d = n - data.length; + if (d === 1) { + data = rowname.concat(data) + } else if (d !== 0) { + console.log(data); + console.log(table.columns().indexes()); + throw 'New data must be of the same length as current data (' + n + ')'; + }; + table.row.add(data).draw(resetPaging); + } + + methods.updateSearch = function(keywords) { + if (keywords.global !== null) + $(table.table().container()).find('input[type=search]').first() + .val(keywords.global).trigger('input'); + var columns = keywords.columns; + if (!filterRow || columns === null) return; + filterRow.toArray().map(function(td, i) { + var v = typeof columns === 'string' ? columns : columns[i]; + if (typeof v === 'undefined') { + console.log('The search keyword for column ' + i + ' is undefined') + return; + } + $(td).find('input').first().val(v); + searchColumn(i, v); + }); + table.draw(); + } + + methods.hideCols = function(hide, reset) { + if (reset) table.columns().visible(true, false); + table.columns(hide).visible(false); + } + + methods.showCols = function(show, reset) { + if (reset) table.columns().visible(false, false); + table.columns(show).visible(true); + } + + methods.colReorder = function(order, origOrder) { + table.colReorder.order(order, origOrder); + } + + methods.selectPage = function(page) { + if (table.page.info().pages < page || page < 1) { + throw 'Selected page is out of range'; + }; + table.page(page - 1).draw(false); + } + + methods.reloadData = function(resetPaging, clearSelection) { + // empty selections first if necessary + if (methods.selectRows && inArray('row', clearSelection)) methods.selectRows([]); + if (methods.selectColumns && inArray('column', clearSelection)) methods.selectColumns([]); + if (methods.selectCells && inArray('cell', clearSelection)) methods.selectCells([]); + table.ajax.reload(null, resetPaging); + } + + // update table filters (set new limits of sliders) + methods.updateFilters = function(newProps) { + // loop through each filter in the filter row + filterRow.each(function(i, td) { + var k = i; + if (filterRow.length > newProps.length) { + if (i === 0) return; // first column is row names + k = i - 1; + } + // Update the filters to reflect the updated data. + // Allow "falsy" (e.g. NULL) to signify a no-op. + if (newProps[k]) { + setFilterProps(td, newProps[k]); + } + }); + }; + + table.shinyMethods = methods; + }, + resize: function(el, width, height, instance) { + if (instance.data) this.renderValue(el, instance.data, instance); + + // dynamically adjust height if fillContainer = TRUE + if (instance.fillContainer) + this.fillAvailableHeight(el, height); + + this.adjustWidth(el); + }, + + // dynamically set the scroll body to fill available height + // (used with fillContainer = TRUE) + fillAvailableHeight: function(el, availableHeight) { + + // see how much of the table is occupied by header/footer elements + // and use that to compute a target scroll body height + var dtWrapper = $(el).find('div.dataTables_wrapper'); + var dtScrollBody = $(el).find($('div.dataTables_scrollBody')); + var framingHeight = dtWrapper.innerHeight() - dtScrollBody.innerHeight(); + var scrollBodyHeight = availableHeight - framingHeight; + + // we need to set `max-height` to none as datatables library now sets this + // to a fixed height, disabling the ability to resize to fill the window, + // as it will be set to a fixed 100px under such circumstances, e.g., RStudio IDE, + // or FlexDashboard + // see https://github.com/rstudio/DT/issues/951#issuecomment-1026464509 + dtScrollBody.css('max-height', 'none'); + // set the height + dtScrollBody.height(scrollBodyHeight + 'px'); + }, + + // adjust the width of columns; remove the hard-coded widths on table and the + // scroll header when scrollX/Y are enabled + adjustWidth: function(el) { + var $el = $(el), table = $el.data('datatable'); + if (table) table.columns.adjust(); + $el.find('.dataTables_scrollHeadInner').css('width', '') + .children('table').css('margin-left', ''); + } +}); + + if (!HTMLWidgets.shinyMode) return; + + Shiny.addCustomMessageHandler('datatable-calls', function(data) { + var id = data.id; + var el = document.getElementById(id); + var table = el ? $(el).data('datatable') : null; + if (!table) { + console.log("Couldn't find table with id " + id); + return; + } + + var methods = table.shinyMethods, call = data.call; + if (methods[call.method]) { + methods[call.method].apply(table, call.args); + } else { + console.log("Unknown method " + call.method); + } + }); + +})(); diff --git a/_freeze/site_libs/datatables-css-0.0.0/datatables-crosstalk.css b/_freeze/site_libs/datatables-css-0.0.0/datatables-crosstalk.css new file mode 100644 index 00000000..bd1159c8 --- /dev/null +++ b/_freeze/site_libs/datatables-css-0.0.0/datatables-crosstalk.css @@ -0,0 +1,32 @@ +.dt-crosstalk-fade { + opacity: 0.2; +} + +html body div.DTS div.dataTables_scrollBody { + background: none; +} + + +/* +Fix https://github.com/rstudio/DT/issues/563 +If the `table.display` is set to "block" (e.g., pkgdown), the browser will display +datatable objects strangely. The search panel and the page buttons will still be +in full-width but the table body will be "compact" and shorter. +In therory, having this attributes will affect `dom="t"` +with `display: block` users. But in reality, there should be no one. +We may remove the below lines in the future if the upstream agree to have this there. +See https://github.com/DataTables/DataTablesSrc/issues/160 +*/ + +table.dataTable { + display: table; +} + + +/* +When DTOutput(fill = TRUE), it receives a .html-fill-item class (via htmltools::bindFillRole()), which effectively amounts to `flex: 1 1 auto`. That's mostly fine, but the case where `fillContainer=TRUE`+`height:auto`+`flex-basis:auto` and the container (e.g., a bslib::card()) doesn't have a defined height is a bit problematic since the table wants to fit the parent but the parent wants to fit the table, which results pretty small table height (maybe because there is a minimum height somewhere?). It seems better in this case to impose a 400px height default for the table, which we can do by setting `flex-basis` to 400px (the table is still allowed to grow/shrink when the container has an opinionated height). +*/ + +.html-fill-container > .html-fill-item.datatables { + flex-basis: 400px; +} diff --git a/content/find/all/index_files/dt-core/css/jquery.dataTables.extra.css b/_freeze/site_libs/dt-core-1.12.1/css/jquery.dataTables.extra.css similarity index 100% rename from content/find/all/index_files/dt-core/css/jquery.dataTables.extra.css rename to _freeze/site_libs/dt-core-1.12.1/css/jquery.dataTables.extra.css diff --git a/content/find/all/index_files/dt-core/css/jquery.dataTables.min.css b/_freeze/site_libs/dt-core-1.12.1/css/jquery.dataTables.min.css similarity index 100% rename from content/find/all/index_files/dt-core/css/jquery.dataTables.min.css rename to _freeze/site_libs/dt-core-1.12.1/css/jquery.dataTables.min.css diff --git a/content/find/all/index_files/dt-core/js/jquery.dataTables.min.js b/_freeze/site_libs/dt-core-1.12.1/js/jquery.dataTables.min.js similarity index 100% rename from content/find/all/index_files/dt-core/js/jquery.dataTables.min.js rename to _freeze/site_libs/dt-core-1.12.1/js/jquery.dataTables.min.js diff --git a/_freeze/site_libs/htmlwidgets-1.6.2/htmlwidgets.js b/_freeze/site_libs/htmlwidgets-1.6.2/htmlwidgets.js new file mode 100644 index 00000000..1067d029 --- /dev/null +++ b/_freeze/site_libs/htmlwidgets-1.6.2/htmlwidgets.js @@ -0,0 +1,901 @@ +(function() { + // If window.HTMLWidgets is already defined, then use it; otherwise create a + // new object. This allows preceding code to set options that affect the + // initialization process (though none currently exist). + window.HTMLWidgets = window.HTMLWidgets || {}; + + // See if we're running in a viewer pane. If not, we're in a web browser. + var viewerMode = window.HTMLWidgets.viewerMode = + /\bviewer_pane=1\b/.test(window.location); + + // See if we're running in Shiny mode. If not, it's a static document. + // Note that static widgets can appear in both Shiny and static modes, but + // obviously, Shiny widgets can only appear in Shiny apps/documents. + var shinyMode = window.HTMLWidgets.shinyMode = + typeof(window.Shiny) !== "undefined" && !!window.Shiny.outputBindings; + + // We can't count on jQuery being available, so we implement our own + // version if necessary. + function querySelectorAll(scope, selector) { + if (typeof(jQuery) !== "undefined" && scope instanceof jQuery) { + return scope.find(selector); + } + if (scope.querySelectorAll) { + return scope.querySelectorAll(selector); + } + } + + function asArray(value) { + if (value === null) + return []; + if ($.isArray(value)) + return value; + return [value]; + } + + // Implement jQuery's extend + function extend(target /*, ... */) { + if (arguments.length == 1) { + return target; + } + for (var i = 1; i < arguments.length; i++) { + var source = arguments[i]; + for (var prop in source) { + if (source.hasOwnProperty(prop)) { + target[prop] = source[prop]; + } + } + } + return target; + } + + // IE8 doesn't support Array.forEach. + function forEach(values, callback, thisArg) { + if (values.forEach) { + values.forEach(callback, thisArg); + } else { + for (var i = 0; i < values.length; i++) { + callback.call(thisArg, values[i], i, values); + } + } + } + + // Replaces the specified method with the return value of funcSource. + // + // Note that funcSource should not BE the new method, it should be a function + // that RETURNS the new method. funcSource receives a single argument that is + // the overridden method, it can be called from the new method. The overridden + // method can be called like a regular function, it has the target permanently + // bound to it so "this" will work correctly. + function overrideMethod(target, methodName, funcSource) { + var superFunc = target[methodName] || function() {}; + var superFuncBound = function() { + return superFunc.apply(target, arguments); + }; + target[methodName] = funcSource(superFuncBound); + } + + // Add a method to delegator that, when invoked, calls + // delegatee.methodName. If there is no such method on + // the delegatee, but there was one on delegator before + // delegateMethod was called, then the original version + // is invoked instead. + // For example: + // + // var a = { + // method1: function() { console.log('a1'); } + // method2: function() { console.log('a2'); } + // }; + // var b = { + // method1: function() { console.log('b1'); } + // }; + // delegateMethod(a, b, "method1"); + // delegateMethod(a, b, "method2"); + // a.method1(); + // a.method2(); + // + // The output would be "b1", "a2". + function delegateMethod(delegator, delegatee, methodName) { + var inherited = delegator[methodName]; + delegator[methodName] = function() { + var target = delegatee; + var method = delegatee[methodName]; + + // The method doesn't exist on the delegatee. Instead, + // call the method on the delegator, if it exists. + if (!method) { + target = delegator; + method = inherited; + } + + if (method) { + return method.apply(target, arguments); + } + }; + } + + // Implement a vague facsimilie of jQuery's data method + function elementData(el, name, value) { + if (arguments.length == 2) { + return el["htmlwidget_data_" + name]; + } else if (arguments.length == 3) { + el["htmlwidget_data_" + name] = value; + return el; + } else { + throw new Error("Wrong number of arguments for elementData: " + + arguments.length); + } + } + + // http://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex + function escapeRegExp(str) { + return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); + } + + function hasClass(el, className) { + var re = new RegExp("\\b" + escapeRegExp(className) + "\\b"); + return re.test(el.className); + } + + // elements - array (or array-like object) of HTML elements + // className - class name to test for + // include - if true, only return elements with given className; + // if false, only return elements *without* given className + function filterByClass(elements, className, include) { + var results = []; + for (var i = 0; i < elements.length; i++) { + if (hasClass(elements[i], className) == include) + results.push(elements[i]); + } + return results; + } + + function on(obj, eventName, func) { + if (obj.addEventListener) { + obj.addEventListener(eventName, func, false); + } else if (obj.attachEvent) { + obj.attachEvent(eventName, func); + } + } + + function off(obj, eventName, func) { + if (obj.removeEventListener) + obj.removeEventListener(eventName, func, false); + else if (obj.detachEvent) { + obj.detachEvent(eventName, func); + } + } + + // Translate array of values to top/right/bottom/left, as usual with + // the "padding" CSS property + // https://developer.mozilla.org/en-US/docs/Web/CSS/padding + function unpackPadding(value) { + if (typeof(value) === "number") + value = [value]; + if (value.length === 1) { + return {top: value[0], right: value[0], bottom: value[0], left: value[0]}; + } + if (value.length === 2) { + return {top: value[0], right: value[1], bottom: value[0], left: value[1]}; + } + if (value.length === 3) { + return {top: value[0], right: value[1], bottom: value[2], left: value[1]}; + } + if (value.length === 4) { + return {top: value[0], right: value[1], bottom: value[2], left: value[3]}; + } + } + + // Convert an unpacked padding object to a CSS value + function paddingToCss(paddingObj) { + return paddingObj.top + "px " + paddingObj.right + "px " + paddingObj.bottom + "px " + paddingObj.left + "px"; + } + + // Makes a number suitable for CSS + function px(x) { + if (typeof(x) === "number") + return x + "px"; + else + return x; + } + + // Retrieves runtime widget sizing information for an element. + // The return value is either null, or an object with fill, padding, + // defaultWidth, defaultHeight fields. + function sizingPolicy(el) { + var sizingEl = document.querySelector("script[data-for='" + el.id + "'][type='application/htmlwidget-sizing']"); + if (!sizingEl) + return null; + var sp = JSON.parse(sizingEl.textContent || sizingEl.text || "{}"); + if (viewerMode) { + return sp.viewer; + } else { + return sp.browser; + } + } + + // @param tasks Array of strings (or falsy value, in which case no-op). + // Each element must be a valid JavaScript expression that yields a + // function. Or, can be an array of objects with "code" and "data" + // properties; in this case, the "code" property should be a string + // of JS that's an expr that yields a function, and "data" should be + // an object that will be added as an additional argument when that + // function is called. + // @param target The object that will be "this" for each function + // execution. + // @param args Array of arguments to be passed to the functions. (The + // same arguments will be passed to all functions.) + function evalAndRun(tasks, target, args) { + if (tasks) { + forEach(tasks, function(task) { + var theseArgs = args; + if (typeof(task) === "object") { + theseArgs = theseArgs.concat([task.data]); + task = task.code; + } + var taskFunc = tryEval(task); + if (typeof(taskFunc) !== "function") { + throw new Error("Task must be a function! Source:\n" + task); + } + taskFunc.apply(target, theseArgs); + }); + } + } + + // Attempt eval() both with and without enclosing in parentheses. + // Note that enclosing coerces a function declaration into + // an expression that eval() can parse + // (otherwise, a SyntaxError is thrown) + function tryEval(code) { + var result = null; + try { + result = eval("(" + code + ")"); + } catch(error) { + if (!(error instanceof SyntaxError)) { + throw error; + } + try { + result = eval(code); + } catch(e) { + if (e instanceof SyntaxError) { + throw error; + } else { + throw e; + } + } + } + return result; + } + + function initSizing(el) { + var sizing = sizingPolicy(el); + if (!sizing) + return; + + var cel = document.getElementById("htmlwidget_container"); + if (!cel) + return; + + if (typeof(sizing.padding) !== "undefined") { + document.body.style.margin = "0"; + document.body.style.padding = paddingToCss(unpackPadding(sizing.padding)); + } + + if (sizing.fill) { + document.body.style.overflow = "hidden"; + document.body.style.width = "100%"; + document.body.style.height = "100%"; + document.documentElement.style.width = "100%"; + document.documentElement.style.height = "100%"; + cel.style.position = "absolute"; + var pad = unpackPadding(sizing.padding); + cel.style.top = pad.top + "px"; + cel.style.right = pad.right + "px"; + cel.style.bottom = pad.bottom + "px"; + cel.style.left = pad.left + "px"; + el.style.width = "100%"; + el.style.height = "100%"; + + return { + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } + }; + + } else { + el.style.width = px(sizing.width); + el.style.height = px(sizing.height); + + return { + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } + }; + } + } + + // Default implementations for methods + var defaults = { + find: function(scope) { + return querySelectorAll(scope, "." + this.name); + }, + renderError: function(el, err) { + var $el = $(el); + + this.clearError(el); + + // Add all these error classes, as Shiny does + var errClass = "shiny-output-error"; + if (err.type !== null) { + // use the classes of the error condition as CSS class names + errClass = errClass + " " + $.map(asArray(err.type), function(type) { + return errClass + "-" + type; + }).join(" "); + } + errClass = errClass + " htmlwidgets-error"; + + // Is el inline or block? If inline or inline-block, just display:none it + // and add an inline error. + var display = $el.css("display"); + $el.data("restore-display-mode", display); + + if (display === "inline" || display === "inline-block") { + $el.hide(); + if (err.message !== "") { + var errorSpan = $("").addClass(errClass); + errorSpan.text(err.message); + $el.after(errorSpan); + } + } else if (display === "block") { + // If block, add an error just after the el, set visibility:none on the + // el, and position the error to be on top of the el. + // Mark it with a unique ID and CSS class so we can remove it later. + $el.css("visibility", "hidden"); + if (err.message !== "") { + var errorDiv = $("
").addClass(errClass).css("position", "absolute") + .css("top", el.offsetTop) + .css("left", el.offsetLeft) + // setting width can push out the page size, forcing otherwise + // unnecessary scrollbars to appear and making it impossible for + // the element to shrink; so use max-width instead + .css("maxWidth", el.offsetWidth) + .css("height", el.offsetHeight); + errorDiv.text(err.message); + $el.after(errorDiv); + + // Really dumb way to keep the size/position of the error in sync with + // the parent element as the window is resized or whatever. + var intId = setInterval(function() { + if (!errorDiv[0].parentElement) { + clearInterval(intId); + return; + } + errorDiv + .css("top", el.offsetTop) + .css("left", el.offsetLeft) + .css("maxWidth", el.offsetWidth) + .css("height", el.offsetHeight); + }, 500); + } + } + }, + clearError: function(el) { + var $el = $(el); + var display = $el.data("restore-display-mode"); + $el.data("restore-display-mode", null); + + if (display === "inline" || display === "inline-block") { + if (display) + $el.css("display", display); + $(el.nextSibling).filter(".htmlwidgets-error").remove(); + } else if (display === "block"){ + $el.css("visibility", "inherit"); + $(el.nextSibling).filter(".htmlwidgets-error").remove(); + } + }, + sizing: {} + }; + + // Called by widget bindings to register a new type of widget. The definition + // object can contain the following properties: + // - name (required) - A string indicating the binding name, which will be + // used by default as the CSS classname to look for. + // - initialize (optional) - A function(el) that will be called once per + // widget element; if a value is returned, it will be passed as the third + // value to renderValue. + // - renderValue (required) - A function(el, data, initValue) that will be + // called with data. Static contexts will cause this to be called once per + // element; Shiny apps will cause this to be called multiple times per + // element, as the data changes. + window.HTMLWidgets.widget = function(definition) { + if (!definition.name) { + throw new Error("Widget must have a name"); + } + if (!definition.type) { + throw new Error("Widget must have a type"); + } + // Currently we only support output widgets + if (definition.type !== "output") { + throw new Error("Unrecognized widget type '" + definition.type + "'"); + } + // TODO: Verify that .name is a valid CSS classname + + // Support new-style instance-bound definitions. Old-style class-bound + // definitions have one widget "object" per widget per type/class of + // widget; the renderValue and resize methods on such widget objects + // take el and instance arguments, because the widget object can't + // store them. New-style instance-bound definitions have one widget + // object per widget instance; the definition that's passed in doesn't + // provide renderValue or resize methods at all, just the single method + // factory(el, width, height) + // which returns an object that has renderValue(x) and resize(w, h). + // This enables a far more natural programming style for the widget + // author, who can store per-instance state using either OO-style + // instance fields or functional-style closure variables (I guess this + // is in contrast to what can only be called C-style pseudo-OO which is + // what we required before). + if (definition.factory) { + definition = createLegacyDefinitionAdapter(definition); + } + + if (!definition.renderValue) { + throw new Error("Widget must have a renderValue function"); + } + + // For static rendering (non-Shiny), use a simple widget registration + // scheme. We also use this scheme for Shiny apps/documents that also + // contain static widgets. + window.HTMLWidgets.widgets = window.HTMLWidgets.widgets || []; + // Merge defaults into the definition; don't mutate the original definition. + var staticBinding = extend({}, defaults, definition); + overrideMethod(staticBinding, "find", function(superfunc) { + return function(scope) { + var results = superfunc(scope); + // Filter out Shiny outputs, we only want the static kind + return filterByClass(results, "html-widget-output", false); + }; + }); + window.HTMLWidgets.widgets.push(staticBinding); + + if (shinyMode) { + // Shiny is running. Register the definition with an output binding. + // The definition itself will not be the output binding, instead + // we will make an output binding object that delegates to the + // definition. This is because we foolishly used the same method + // name (renderValue) for htmlwidgets definition and Shiny bindings + // but they actually have quite different semantics (the Shiny + // bindings receive data that includes lots of metadata that it + // strips off before calling htmlwidgets renderValue). We can't + // just ignore the difference because in some widgets it's helpful + // to call this.renderValue() from inside of resize(), and if + // we're not delegating, then that call will go to the Shiny + // version instead of the htmlwidgets version. + + // Merge defaults with definition, without mutating either. + var bindingDef = extend({}, defaults, definition); + + // This object will be our actual Shiny binding. + var shinyBinding = new Shiny.OutputBinding(); + + // With a few exceptions, we'll want to simply use the bindingDef's + // version of methods if they are available, otherwise fall back to + // Shiny's defaults. NOTE: If Shiny's output bindings gain additional + // methods in the future, and we want them to be overrideable by + // HTMLWidget binding definitions, then we'll need to add them to this + // list. + delegateMethod(shinyBinding, bindingDef, "getId"); + delegateMethod(shinyBinding, bindingDef, "onValueChange"); + delegateMethod(shinyBinding, bindingDef, "onValueError"); + delegateMethod(shinyBinding, bindingDef, "renderError"); + delegateMethod(shinyBinding, bindingDef, "clearError"); + delegateMethod(shinyBinding, bindingDef, "showProgress"); + + // The find, renderValue, and resize are handled differently, because we + // want to actually decorate the behavior of the bindingDef methods. + + shinyBinding.find = function(scope) { + var results = bindingDef.find(scope); + + // Only return elements that are Shiny outputs, not static ones + var dynamicResults = results.filter(".html-widget-output"); + + // It's possible that whatever caused Shiny to think there might be + // new dynamic outputs, also caused there to be new static outputs. + // Since there might be lots of different htmlwidgets bindings, we + // schedule execution for later--no need to staticRender multiple + // times. + if (results.length !== dynamicResults.length) + scheduleStaticRender(); + + return dynamicResults; + }; + + // Wrap renderValue to handle initialization, which unfortunately isn't + // supported natively by Shiny at the time of this writing. + + shinyBinding.renderValue = function(el, data) { + Shiny.renderDependencies(data.deps); + // Resolve strings marked as javascript literals to objects + if (!(data.evals instanceof Array)) data.evals = [data.evals]; + for (var i = 0; data.evals && i < data.evals.length; i++) { + window.HTMLWidgets.evaluateStringMember(data.x, data.evals[i]); + } + if (!bindingDef.renderOnNullValue) { + if (data.x === null) { + el.style.visibility = "hidden"; + return; + } else { + el.style.visibility = "inherit"; + } + } + if (!elementData(el, "initialized")) { + initSizing(el); + + elementData(el, "initialized", true); + if (bindingDef.initialize) { + var rect = el.getBoundingClientRect(); + var result = bindingDef.initialize(el, rect.width, rect.height); + elementData(el, "init_result", result); + } + } + bindingDef.renderValue(el, data.x, elementData(el, "init_result")); + evalAndRun(data.jsHooks.render, elementData(el, "init_result"), [el, data.x]); + }; + + // Only override resize if bindingDef implements it + if (bindingDef.resize) { + shinyBinding.resize = function(el, width, height) { + // Shiny can call resize before initialize/renderValue have been + // called, which doesn't make sense for widgets. + if (elementData(el, "initialized")) { + bindingDef.resize(el, width, height, elementData(el, "init_result")); + } + }; + } + + Shiny.outputBindings.register(shinyBinding, bindingDef.name); + } + }; + + var scheduleStaticRenderTimerId = null; + function scheduleStaticRender() { + if (!scheduleStaticRenderTimerId) { + scheduleStaticRenderTimerId = setTimeout(function() { + scheduleStaticRenderTimerId = null; + window.HTMLWidgets.staticRender(); + }, 1); + } + } + + // Render static widgets after the document finishes loading + // Statically render all elements that are of this widget's class + window.HTMLWidgets.staticRender = function() { + var bindings = window.HTMLWidgets.widgets || []; + forEach(bindings, function(binding) { + var matches = binding.find(document.documentElement); + forEach(matches, function(el) { + var sizeObj = initSizing(el, binding); + + var getSize = function(el) { + if (sizeObj) { + return {w: sizeObj.getWidth(), h: sizeObj.getHeight()} + } else { + var rect = el.getBoundingClientRect(); + return {w: rect.width, h: rect.height} + } + }; + + if (hasClass(el, "html-widget-static-bound")) + return; + el.className = el.className + " html-widget-static-bound"; + + var initResult; + if (binding.initialize) { + var size = getSize(el); + initResult = binding.initialize(el, size.w, size.h); + elementData(el, "init_result", initResult); + } + + if (binding.resize) { + var lastSize = getSize(el); + var resizeHandler = function(e) { + var size = getSize(el); + if (size.w === 0 && size.h === 0) + return; + if (size.w === lastSize.w && size.h === lastSize.h) + return; + lastSize = size; + binding.resize(el, size.w, size.h, initResult); + }; + + on(window, "resize", resizeHandler); + + // This is needed for cases where we're running in a Shiny + // app, but the widget itself is not a Shiny output, but + // rather a simple static widget. One example of this is + // an rmarkdown document that has runtime:shiny and widget + // that isn't in a render function. Shiny only knows to + // call resize handlers for Shiny outputs, not for static + // widgets, so we do it ourselves. + if (window.jQuery) { + window.jQuery(document).on( + "shown.htmlwidgets shown.bs.tab.htmlwidgets shown.bs.collapse.htmlwidgets", + resizeHandler + ); + window.jQuery(document).on( + "hidden.htmlwidgets hidden.bs.tab.htmlwidgets hidden.bs.collapse.htmlwidgets", + resizeHandler + ); + } + + // This is needed for the specific case of ioslides, which + // flips slides between display:none and display:block. + // Ideally we would not have to have ioslide-specific code + // here, but rather have ioslides raise a generic event, + // but the rmarkdown package just went to CRAN so the + // window to getting that fixed may be long. + if (window.addEventListener) { + // It's OK to limit this to window.addEventListener + // browsers because ioslides itself only supports + // such browsers. + on(document, "slideenter", resizeHandler); + on(document, "slideleave", resizeHandler); + } + } + + var scriptData = document.querySelector("script[data-for='" + el.id + "'][type='application/json']"); + if (scriptData) { + var data = JSON.parse(scriptData.textContent || scriptData.text); + // Resolve strings marked as javascript literals to objects + if (!(data.evals instanceof Array)) data.evals = [data.evals]; + for (var k = 0; data.evals && k < data.evals.length; k++) { + window.HTMLWidgets.evaluateStringMember(data.x, data.evals[k]); + } + binding.renderValue(el, data.x, initResult); + evalAndRun(data.jsHooks.render, initResult, [el, data.x]); + } + }); + }); + + invokePostRenderHandlers(); + } + + + function has_jQuery3() { + if (!window.jQuery) { + return false; + } + var $version = window.jQuery.fn.jquery; + var $major_version = parseInt($version.split(".")[0]); + return $major_version >= 3; + } + + /* + / Shiny 1.4 bumped jQuery from 1.x to 3.x which means jQuery's + / on-ready handler (i.e., $(fn)) is now asyncronous (i.e., it now + / really means $(setTimeout(fn)). + / https://jquery.com/upgrade-guide/3.0/#breaking-change-document-ready-handlers-are-now-asynchronous + / + / Since Shiny uses $() to schedule initShiny, shiny>=1.4 calls initShiny + / one tick later than it did before, which means staticRender() is + / called renderValue() earlier than (advanced) widget authors might be expecting. + / https://github.com/rstudio/shiny/issues/2630 + / + / For a concrete example, leaflet has some methods (e.g., updateBounds) + / which reference Shiny methods registered in initShiny (e.g., setInputValue). + / Since leaflet is privy to this life-cycle, it knows to use setTimeout() to + / delay execution of those methods (until Shiny methods are ready) + / https://github.com/rstudio/leaflet/blob/18ec981/javascript/src/index.js#L266-L268 + / + / Ideally widget authors wouldn't need to use this setTimeout() hack that + / leaflet uses to call Shiny methods on a staticRender(). In the long run, + / the logic initShiny should be broken up so that method registration happens + / right away, but binding happens later. + */ + function maybeStaticRenderLater() { + if (shinyMode && has_jQuery3()) { + window.jQuery(window.HTMLWidgets.staticRender); + } else { + window.HTMLWidgets.staticRender(); + } + } + + if (document.addEventListener) { + document.addEventListener("DOMContentLoaded", function() { + document.removeEventListener("DOMContentLoaded", arguments.callee, false); + maybeStaticRenderLater(); + }, false); + } else if (document.attachEvent) { + document.attachEvent("onreadystatechange", function() { + if (document.readyState === "complete") { + document.detachEvent("onreadystatechange", arguments.callee); + maybeStaticRenderLater(); + } + }); + } + + + window.HTMLWidgets.getAttachmentUrl = function(depname, key) { + // If no key, default to the first item + if (typeof(key) === "undefined") + key = 1; + + var link = document.getElementById(depname + "-" + key + "-attachment"); + if (!link) { + throw new Error("Attachment " + depname + "/" + key + " not found in document"); + } + return link.getAttribute("href"); + }; + + window.HTMLWidgets.dataframeToD3 = function(df) { + var names = []; + var length; + for (var name in df) { + if (df.hasOwnProperty(name)) + names.push(name); + if (typeof(df[name]) !== "object" || typeof(df[name].length) === "undefined") { + throw new Error("All fields must be arrays"); + } else if (typeof(length) !== "undefined" && length !== df[name].length) { + throw new Error("All fields must be arrays of the same length"); + } + length = df[name].length; + } + var results = []; + var item; + for (var row = 0; row < length; row++) { + item = {}; + for (var col = 0; col < names.length; col++) { + item[names[col]] = df[names[col]][row]; + } + results.push(item); + } + return results; + }; + + window.HTMLWidgets.transposeArray2D = function(array) { + if (array.length === 0) return array; + var newArray = array[0].map(function(col, i) { + return array.map(function(row) { + return row[i] + }) + }); + return newArray; + }; + // Split value at splitChar, but allow splitChar to be escaped + // using escapeChar. Any other characters escaped by escapeChar + // will be included as usual (including escapeChar itself). + function splitWithEscape(value, splitChar, escapeChar) { + var results = []; + var escapeMode = false; + var currentResult = ""; + for (var pos = 0; pos < value.length; pos++) { + if (!escapeMode) { + if (value[pos] === splitChar) { + results.push(currentResult); + currentResult = ""; + } else if (value[pos] === escapeChar) { + escapeMode = true; + } else { + currentResult += value[pos]; + } + } else { + currentResult += value[pos]; + escapeMode = false; + } + } + if (currentResult !== "") { + results.push(currentResult); + } + return results; + } + // Function authored by Yihui/JJ Allaire + window.HTMLWidgets.evaluateStringMember = function(o, member) { + var parts = splitWithEscape(member, '.', '\\'); + for (var i = 0, l = parts.length; i < l; i++) { + var part = parts[i]; + // part may be a character or 'numeric' member name + if (o !== null && typeof o === "object" && part in o) { + if (i == (l - 1)) { // if we are at the end of the line then evalulate + if (typeof o[part] === "string") + o[part] = tryEval(o[part]); + } else { // otherwise continue to next embedded object + o = o[part]; + } + } + } + }; + + // Retrieve the HTMLWidget instance (i.e. the return value of an + // HTMLWidget binding's initialize() or factory() function) + // associated with an element, or null if none. + window.HTMLWidgets.getInstance = function(el) { + return elementData(el, "init_result"); + }; + + // Finds the first element in the scope that matches the selector, + // and returns the HTMLWidget instance (i.e. the return value of + // an HTMLWidget binding's initialize() or factory() function) + // associated with that element, if any. If no element matches the + // selector, or the first matching element has no HTMLWidget + // instance associated with it, then null is returned. + // + // The scope argument is optional, and defaults to window.document. + window.HTMLWidgets.find = function(scope, selector) { + if (arguments.length == 1) { + selector = scope; + scope = document; + } + + var el = scope.querySelector(selector); + if (el === null) { + return null; + } else { + return window.HTMLWidgets.getInstance(el); + } + }; + + // Finds all elements in the scope that match the selector, and + // returns the HTMLWidget instances (i.e. the return values of + // an HTMLWidget binding's initialize() or factory() function) + // associated with the elements, in an array. If elements that + // match the selector don't have an associated HTMLWidget + // instance, the returned array will contain nulls. + // + // The scope argument is optional, and defaults to window.document. + window.HTMLWidgets.findAll = function(scope, selector) { + if (arguments.length == 1) { + selector = scope; + scope = document; + } + + var nodes = scope.querySelectorAll(selector); + var results = []; + for (var i = 0; i < nodes.length; i++) { + results.push(window.HTMLWidgets.getInstance(nodes[i])); + } + return results; + }; + + var postRenderHandlers = []; + function invokePostRenderHandlers() { + while (postRenderHandlers.length) { + var handler = postRenderHandlers.shift(); + if (handler) { + handler(); + } + } + } + + // Register the given callback function to be invoked after the + // next time static widgets are rendered. + window.HTMLWidgets.addPostRenderHandler = function(callback) { + postRenderHandlers.push(callback); + }; + + // Takes a new-style instance-bound definition, and returns an + // old-style class-bound definition. This saves us from having + // to rewrite all the logic in this file to accomodate both + // types of definitions. + function createLegacyDefinitionAdapter(defn) { + var result = { + name: defn.name, + type: defn.type, + initialize: function(el, width, height) { + return defn.factory(el, width, height); + }, + renderValue: function(el, x, instance) { + return instance.renderValue(x); + }, + resize: function(el, width, height, instance) { + return instance.resize(width, height); + } + }; + + if (defn.find) + result.find = defn.find; + if (defn.renderError) + result.renderError = defn.renderError; + if (defn.clearError) + result.clearError = defn.clearError; + + return result; + } +})(); diff --git a/content/find/all/index_files/jquery/jquery-3.6.0.js b/_freeze/site_libs/jquery-3.6.0/jquery-3.6.0.js similarity index 100% rename from content/find/all/index_files/jquery/jquery-3.6.0.js rename to _freeze/site_libs/jquery-3.6.0/jquery-3.6.0.js diff --git a/content/find/all/index_files/jquery/jquery-3.6.0.min.js b/_freeze/site_libs/jquery-3.6.0/jquery-3.6.0.min.js similarity index 100% rename from content/find/all/index_files/jquery/jquery-3.6.0.min.js rename to _freeze/site_libs/jquery-3.6.0/jquery-3.6.0.min.js diff --git a/content/find/all/index_files/jquery/jquery-3.6.0.min.map b/_freeze/site_libs/jquery-3.6.0/jquery-3.6.0.min.map similarity index 100% rename from content/find/all/index_files/jquery/jquery-3.6.0.min.map rename to _freeze/site_libs/jquery-3.6.0/jquery-3.6.0.min.map diff --git a/content/learn/develop/broom/index_files/kePrint/kePrint.js b/_freeze/site_libs/kePrint-0.0.1/kePrint.js similarity index 100% rename from content/learn/develop/broom/index_files/kePrint/kePrint.js rename to _freeze/site_libs/kePrint-0.0.1/kePrint.js diff --git a/content/learn/develop/broom/index_files/lightable/lightable.css b/_freeze/site_libs/lightable-0.0.1/lightable.css similarity index 100% rename from content/learn/develop/broom/index_files/lightable/lightable.css rename to _freeze/site_libs/lightable-0.0.1/lightable.css diff --git a/content/find/all/index_files/nouislider/jquery.nouislider.min.css b/_freeze/site_libs/nouislider-7.0.10/jquery.nouislider.min.css similarity index 100% rename from content/find/all/index_files/nouislider/jquery.nouislider.min.css rename to _freeze/site_libs/nouislider-7.0.10/jquery.nouislider.min.css diff --git a/content/find/all/index_files/nouislider/jquery.nouislider.min.js b/_freeze/site_libs/nouislider-7.0.10/jquery.nouislider.min.js similarity index 100% rename from content/find/all/index_files/nouislider/jquery.nouislider.min.js rename to _freeze/site_libs/nouislider-7.0.10/jquery.nouislider.min.js diff --git a/_freeze/site_libs/quarto-listing/list.min.js b/_freeze/site_libs/quarto-listing/list.min.js new file mode 100644 index 00000000..81318815 --- /dev/null +++ b/_freeze/site_libs/quarto-listing/list.min.js @@ -0,0 +1,2 @@ +var List;List=function(){var t={"./src/add-async.js":function(t){t.exports=function(t){return function e(r,n,s){var i=r.splice(0,50);s=(s=s||[]).concat(t.add(i)),r.length>0?setTimeout((function(){e(r,n,s)}),1):(t.update(),n(s))}}},"./src/filter.js":function(t){t.exports=function(t){return t.handlers.filterStart=t.handlers.filterStart||[],t.handlers.filterComplete=t.handlers.filterComplete||[],function(e){if(t.trigger("filterStart"),t.i=1,t.reset.filter(),void 0===e)t.filtered=!1;else{t.filtered=!0;for(var r=t.items,n=0,s=r.length;nv.page,a=new g(t[s],void 0,n),v.items.push(a),r.push(a)}return v.update(),r}m(t.slice(0),e)}},this.show=function(t,e){return this.i=t,this.page=e,v.update(),v},this.remove=function(t,e,r){for(var n=0,s=0,i=v.items.length;s-1&&r.splice(n,1),v},this.trigger=function(t){for(var e=v.handlers[t].length;e--;)v.handlers[t][e](v);return v},this.reset={filter:function(){for(var t=v.items,e=t.length;e--;)t[e].filtered=!1;return v},search:function(){for(var t=v.items,e=t.length;e--;)t[e].found=!1;return v}},this.update=function(){var t=v.items,e=t.length;v.visibleItems=[],v.matchingItems=[],v.templater.clear();for(var r=0;r=v.i&&v.visibleItems.lengthe},innerWindow:function(t,e,r){return t>=e-r&&t<=e+r},dotted:function(t,e,r,n,s,i,a){return this.dottedLeft(t,e,r,n,s,i)||this.dottedRight(t,e,r,n,s,i,a)},dottedLeft:function(t,e,r,n,s,i){return e==r+1&&!this.innerWindow(e,s,i)&&!this.right(e,n)},dottedRight:function(t,e,r,n,s,i,a){return!t.items[a-1].values().dotted&&(e==n&&!this.innerWindow(e,s,i)&&!this.right(e,n))}};return function(e){var n=new i(t.listContainer.id,{listClass:e.paginationClass||"pagination",item:e.item||"
  • ",valueNames:["page","dotted"],searchClass:"pagination-search-that-is-not-supposed-to-exist",sortClass:"pagination-sort-that-is-not-supposed-to-exist"});s.bind(n.listContainer,"click",(function(e){var r=e.target||e.srcElement,n=t.utils.getAttribute(r,"data-page"),s=t.utils.getAttribute(r,"data-i");s&&t.show((s-1)*n+1,n)})),t.on("updated",(function(){r(n,e)})),r(n,e)}}},"./src/parse.js":function(t,e,r){t.exports=function(t){var e=r("./src/item.js")(t),n=function(r,n){for(var s=0,i=r.length;s0?setTimeout((function(){e(r,s)}),1):(t.update(),t.trigger("parseComplete"))};return t.handlers.parseComplete=t.handlers.parseComplete||[],function(){var e=function(t){for(var e=t.childNodes,r=[],n=0,s=e.length;n]/g.exec(t)){var e=document.createElement("tbody");return e.innerHTML=t,e.firstElementChild}if(-1!==t.indexOf("<")){var r=document.createElement("div");return r.innerHTML=t,r.firstElementChild}}},a=function(e,r,n){var s=void 0,i=function(e){for(var r=0,n=t.valueNames.length;r=1;)t.list.removeChild(t.list.firstChild)},function(){var r;if("function"!=typeof t.item){if(!(r="string"==typeof t.item?-1===t.item.indexOf("<")?document.getElementById(t.item):i(t.item):s()))throw new Error("The list needs to have at least one item on init otherwise you'll have to add a template.");r=n(r,t.valueNames),e=function(){return r.cloneNode(!0)}}else e=function(e){var r=t.item(e);return i(r)}}()};t.exports=function(t){return new e(t)}},"./src/utils/classes.js":function(t,e,r){var n=r("./src/utils/index-of.js"),s=/\s+/;Object.prototype.toString;function i(t){if(!t||!t.nodeType)throw new Error("A DOM element reference is required");this.el=t,this.list=t.classList}t.exports=function(t){return new i(t)},i.prototype.add=function(t){if(this.list)return this.list.add(t),this;var e=this.array();return~n(e,t)||e.push(t),this.el.className=e.join(" "),this},i.prototype.remove=function(t){if(this.list)return this.list.remove(t),this;var e=this.array(),r=n(e,t);return~r&&e.splice(r,1),this.el.className=e.join(" "),this},i.prototype.toggle=function(t,e){return this.list?(void 0!==e?e!==this.list.toggle(t,e)&&this.list.toggle(t):this.list.toggle(t),this):(void 0!==e?e?this.add(t):this.remove(t):this.has(t)?this.remove(t):this.add(t),this)},i.prototype.array=function(){var t=(this.el.getAttribute("class")||"").replace(/^\s+|\s+$/g,"").split(s);return""===t[0]&&t.shift(),t},i.prototype.has=i.prototype.contains=function(t){return this.list?this.list.contains(t):!!~n(this.array(),t)}},"./src/utils/events.js":function(t,e,r){var n=window.addEventListener?"addEventListener":"attachEvent",s=window.removeEventListener?"removeEventListener":"detachEvent",i="addEventListener"!==n?"on":"",a=r("./src/utils/to-array.js");e.bind=function(t,e,r,s){for(var o=0,l=(t=a(t)).length;o32)return!1;var a=n,o=function(){var t,r={};for(t=0;t=p;b--){var j=o[t.charAt(b-1)];if(C[b]=0===m?(C[b+1]<<1|1)&j:(C[b+1]<<1|1)&j|(v[b+1]|v[b])<<1|1|v[b+1],C[b]&d){var x=l(m,b-1);if(x<=u){if(u=x,!((c=b-1)>a))break;p=Math.max(1,2*a-c)}}}if(l(m+1,a)>u)break;v=C}return!(c<0)}},"./src/utils/get-attribute.js":function(t){t.exports=function(t,e){var r=t.getAttribute&&t.getAttribute(e)||null;if(!r)for(var n=t.attributes,s=n.length,i=0;i=48&&t<=57}function i(t,e){for(var i=(t+="").length,a=(e+="").length,o=0,l=0;o=i&&l=a?-1:l>=a&&o=i?1:i-a}i.caseInsensitive=i.i=function(t,e){return i((""+t).toLowerCase(),(""+e).toLowerCase())},Object.defineProperties(i,{alphabet:{get:function(){return e},set:function(t){r=[];var s=0;if(e=t)for(;s { + if (categoriesLoaded) { + activateCategory(category); + setCategoryHash(category); + } +}; + +window["quarto-listing-loaded"] = () => { + // Process any existing hash + const hash = getHash(); + + if (hash) { + // If there is a category, switch to that + if (hash.category) { + activateCategory(hash.category); + } + // Paginate a specific listing + const listingIds = Object.keys(window["quarto-listings"]); + for (const listingId of listingIds) { + const page = hash[getListingPageKey(listingId)]; + if (page) { + showPage(listingId, page); + } + } + } + + const listingIds = Object.keys(window["quarto-listings"]); + for (const listingId of listingIds) { + // The actual list + const list = window["quarto-listings"][listingId]; + + // Update the handlers for pagination events + refreshPaginationHandlers(listingId); + + // Render any visible items that need it + renderVisibleProgressiveImages(list); + + // Whenever the list is updated, we also need to + // attach handlers to the new pagination elements + // and refresh any newly visible items. + list.on("updated", function () { + renderVisibleProgressiveImages(list); + setTimeout(() => refreshPaginationHandlers(listingId)); + + // Show or hide the no matching message + toggleNoMatchingMessage(list); + }); + } +}; + +window.document.addEventListener("DOMContentLoaded", function (_event) { + // Attach click handlers to categories + const categoryEls = window.document.querySelectorAll( + ".quarto-listing-category .category" + ); + + for (const categoryEl of categoryEls) { + const category = categoryEl.getAttribute("data-category"); + categoryEl.onclick = () => { + activateCategory(category); + setCategoryHash(category); + }; + } + + // Attach a click handler to the category title + // (there should be only one, but since it is a class name, handle N) + const categoryTitleEls = window.document.querySelectorAll( + ".quarto-listing-category-title" + ); + for (const categoryTitleEl of categoryTitleEls) { + categoryTitleEl.onclick = () => { + activateCategory(""); + setCategoryHash(""); + }; + } + + categoriesLoaded = true; +}); + +function toggleNoMatchingMessage(list) { + const selector = `#${list.listContainer.id} .listing-no-matching`; + const noMatchingEl = window.document.querySelector(selector); + if (noMatchingEl) { + if (list.visibleItems.length === 0) { + noMatchingEl.classList.remove("d-none"); + } else { + if (!noMatchingEl.classList.contains("d-none")) { + noMatchingEl.classList.add("d-none"); + } + } + } +} + +function setCategoryHash(category) { + setHash({ category }); +} + +function setPageHash(listingId, page) { + const currentHash = getHash() || {}; + currentHash[getListingPageKey(listingId)] = page; + setHash(currentHash); +} + +function getListingPageKey(listingId) { + return `${listingId}-page`; +} + +function refreshPaginationHandlers(listingId) { + const listingEl = window.document.getElementById(listingId); + const paginationEls = listingEl.querySelectorAll( + ".pagination li.page-item:not(.disabled) .page.page-link" + ); + for (const paginationEl of paginationEls) { + paginationEl.onclick = (sender) => { + setPageHash(listingId, sender.target.getAttribute("data-i")); + showPage(listingId, sender.target.getAttribute("data-i")); + return false; + }; + } +} + +function renderVisibleProgressiveImages(list) { + // Run through the visible items and render any progressive images + for (const item of list.visibleItems) { + const itemEl = item.elm; + if (itemEl) { + const progressiveImgs = itemEl.querySelectorAll( + `img[${kProgressiveAttr}]` + ); + for (const progressiveImg of progressiveImgs) { + const srcValue = progressiveImg.getAttribute(kProgressiveAttr); + if (srcValue) { + progressiveImg.setAttribute("src", srcValue); + } + progressiveImg.removeAttribute(kProgressiveAttr); + } + } + } +} + +function getHash() { + // Hashes are of the form + // #name:value|name1:value1|name2:value2 + const currentUrl = new URL(window.location); + const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined; + return parseHash(hashRaw); +} + +const kAnd = "&"; +const kEquals = "="; + +function parseHash(hash) { + if (!hash) { + return undefined; + } + const hasValuesStrs = hash.split(kAnd); + const hashValues = hasValuesStrs + .map((hashValueStr) => { + const vals = hashValueStr.split(kEquals); + if (vals.length === 2) { + return { name: vals[0], value: vals[1] }; + } else { + return undefined; + } + }) + .filter((value) => { + return value !== undefined; + }); + + const hashObj = {}; + hashValues.forEach((hashValue) => { + hashObj[hashValue.name] = decodeURIComponent(hashValue.value); + }); + return hashObj; +} + +function makeHash(obj) { + return Object.keys(obj) + .map((key) => { + return `${key}${kEquals}${obj[key]}`; + }) + .join(kAnd); +} + +function setHash(obj) { + const hash = makeHash(obj); + window.history.pushState(null, null, `#${hash}`); +} + +function showPage(listingId, page) { + const list = window["quarto-listings"][listingId]; + if (list) { + list.show((page - 1) * list.page + 1, list.page); + } +} + +function activateCategory(category) { + // Deactivate existing categories + const activeEls = window.document.querySelectorAll( + ".quarto-listing-category .category.active" + ); + for (const activeEl of activeEls) { + activeEl.classList.remove("active"); + } + + // Activate this category + const categoryEl = window.document.querySelector( + `.quarto-listing-category .category[data-category='${category}'` + ); + if (categoryEl) { + categoryEl.classList.add("active"); + } + + // Filter the listings to this category + filterListingCategory(category); +} + +function filterListingCategory(category) { + const listingIds = Object.keys(window["quarto-listings"]); + for (const listingId of listingIds) { + const list = window["quarto-listings"][listingId]; + if (list) { + if (category === "") { + // resets the filter + list.filter(); + } else { + // filter to this category + list.filter(function (item) { + const itemValues = item.values(); + if (itemValues.categories !== null) { + const categories = itemValues.categories.split(","); + return categories.includes(category); + } else { + return false; + } + }); + } + } + } +} diff --git a/content/find/all/index_files/selectize/selectize.bootstrap3.css b/_freeze/site_libs/selectize-0.12.0/selectize.bootstrap3.css similarity index 100% rename from content/find/all/index_files/selectize/selectize.bootstrap3.css rename to _freeze/site_libs/selectize-0.12.0/selectize.bootstrap3.css diff --git a/content/find/all/index_files/selectize/selectize.min.js b/_freeze/site_libs/selectize-0.12.0/selectize.min.js similarity index 100% rename from content/find/all/index_files/selectize/selectize.min.js rename to _freeze/site_libs/selectize-0.12.0/selectize.min.js diff --git a/_freeze/start/case-study/index/execute-results/html.json b/_freeze/start/case-study/index/execute-results/html.json new file mode 100644 index 00000000..bfbefb4f --- /dev/null +++ b/_freeze/start/case-study/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "1cb3fe1e15d9cca566a4620baa74a343", + "result": { + "markdown": "---\ntitle: \"A predictive modeling case study\"\nweight: 5\ncategories:\n - model fitting\n - tuning\n - parsnip\n - recipes\n - rsample\n - workflows\n - tune\ndescription: | \n Develop, from beginning to end, a predictive model using best practices.\ntoc-location: body\ntoc-depth: 2\ntoc-title: \"\"\ncss: ../styles.css\ninclude-after-body: ../repo-actions-delete.html\n---\n\n\n\n\n\n\n## Introduction {#intro}\n\nEach of the four previous [*Get Started*](/start/) articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you'll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_daceac73654e71fde9b75108eb311a24'}\n::: {.cell-output-display}\n![](img/hotel.jpg){fig-align='center' width=90%}\n:::\n:::\n\n\nTo use code in this article, you will need to install the following packages: glmnet, ranger, readr, tidymodels, and vip.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-4_f7db2ba43d4675b69291ec75546df849'}\n\n```{.r .cell-code}\nlibrary(tidymodels) \n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(vip) # for variable importance plots\n```\n:::\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n\n\n## The Hotel Bookings Data {#data}\n\nLet's use hotel bookings data from [Antonio, Almeida, and Nunes (2019)](https://doi.org/10.1016/j.dib.2018.11.126) to predict which hotel stays included children and/or babies, based on the other characteristics of the stays such as which hotel the guests stay at, how much they pay, etc. This was also a [`#TidyTuesday`](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11) dataset with a [data dictionary](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11#data-dictionary) you may want to look over to learn more about the variables. We'll use a slightly [edited version of the dataset](https://gist.github.com/topepo/05a74916c343e57a71c51d6bc32a21ce) for this case study.\n\nTo start, let's read our hotel data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (\"\"):\n\n\n::: {.cell layout-align=\"center\" hash='cache/hotel-import_74cc1fcf8fe97947ad7255887dfdf6e8'}\n\n```{.r .cell-code}\nlibrary(tidymodels)\nlibrary(readr)\n\nhotels <- \n read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>%\n mutate(across(where(is.character), as.factor))\n\ndim(hotels)\n#> [1] 50000 23\n```\n:::\n\n\nIn the original paper, the [authors](https://doi.org/10.1016/j.dib.2018.11.126) caution that the distribution of many variables (such as number of adults/children, room type, meals bought, country of origin of the guests, and so forth) is different for hotel stays that were canceled versus not canceled. This makes sense because much of that information is gathered (or gathered again more accurately) when guests check in for their stay, so canceled bookings are likely to have more missing data than non-canceled bookings, and/or to have different characteristics when data is not missing. Given this, it is unlikely that we can reliably detect meaningful differences between guests who cancel their bookings and those who do not with this dataset. To build our models here, we have already filtered the data to include only the bookings that did not cancel, so we'll be analyzing *hotel stays* only.\n\n\n::: {.cell layout-align=\"center\" hash='cache/glimpse-hotels_fefde56d9a0d74646d115d87420ceaf9'}\n\n```{.r .cell-code}\nglimpse(hotels)\n#> Rows: 50,000\n#> Columns: 23\n#> $ hotel City_Hotel, City_Hotel, Resort_Hotel, R…\n#> $ lead_time 217, 2, 95, 143, 136, 67, 47, 56, 80, 6…\n#> $ stays_in_weekend_nights 1, 0, 2, 2, 1, 2, 0, 0, 0, 2, 1, 0, 1, …\n#> $ stays_in_week_nights 3, 1, 5, 6, 4, 2, 2, 3, 4, 2, 2, 1, 2, …\n#> $ adults 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 1, 2, …\n#> $ children none, none, none, none, none, none, chi…\n#> $ meal BB, BB, BB, HB, HB, SC, BB, BB, BB, BB,…\n#> $ country DEU, PRT, GBR, ROU, PRT, GBR, ESP, ESP,…\n#> $ market_segment Offline_TA/TO, Direct, Online_TA, Onlin…\n#> $ distribution_channel TA/TO, Direct, TA/TO, TA/TO, Direct, TA…\n#> $ is_repeated_guest 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ previous_cancellations 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ previous_bookings_not_canceled 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ reserved_room_type A, D, A, A, F, A, C, B, D, A, A, D, A, …\n#> $ assigned_room_type A, K, A, A, F, A, C, A, D, A, D, D, A, …\n#> $ booking_changes 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ deposit_type No_Deposit, No_Deposit, No_Deposit, No_…\n#> $ days_in_waiting_list 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ customer_type Transient-Party, Transient, Transient, …\n#> $ average_daily_rate 80.75, 170.00, 8.00, 81.00, 157.60, 49.…\n#> $ required_car_parking_spaces none, none, none, none, none, none, non…\n#> $ total_of_special_requests 1, 3, 2, 1, 4, 1, 1, 1, 1, 1, 0, 1, 0, …\n#> $ arrival_date 2016-09-01, 2017-08-25, 2016-11-19, 20…\n```\n:::\n\n\nWe will build a model to predict which actual hotel stays included children and/or babies, and which did not. Our outcome variable `children` is a factor variable with two levels:\n\n\n::: {.cell layout-align=\"center\" hash='cache/count-children_65252be0ed6f7cb8babd3646ddcf18a6'}\n\n```{.r .cell-code}\nhotels %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> children n prop\n#> \n#> 1 children 4038 0.0808\n#> 2 none 45962 0.919\n```\n:::\n\n\nWe can see that children were only in 8.1% of the reservations. This type of class imbalance can often wreak havoc on an analysis. While there are several methods for combating this issue using [recipes](/find/recipes/) (search for steps to `upsample` or `downsample`) or other more specialized packages like [themis](https://themis.tidymodels.org/), the analyses shown below analyze the data as-is.\n\n## Data Splitting & Resampling {#data-split}\n\nFor a data splitting strategy, let's reserve 25% of the stays to the test set. As in our [*Evaluate your model with resampling*](/start/resampling/#data-split) article, we know our outcome variable `children` is pretty imbalanced so we'll use a stratified random sample:\n\n\n::: {.cell layout-align=\"center\" hash='cache/tr-te-split_f60f5f3cacfad918e4134edd82789d10'}\n\n```{.r .cell-code}\nset.seed(123)\nsplits <- initial_split(hotels, strata = children)\n\nhotel_other <- training(splits)\nhotel_test <- testing(splits)\n\n# training set proportions by children\nhotel_other %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> children n prop\n#> \n#> 1 children 3027 0.0807\n#> 2 none 34473 0.919\n\n# test set proportions by children\nhotel_test %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> children n prop\n#> \n#> 1 children 1011 0.0809\n#> 2 none 11489 0.919\n```\n:::\n\n\nIn our articles so far, we've relied on 10-fold cross-validation as the primary resampling method using [`rsample::vfold_cv()`](https://rsample.tidymodels.org/reference/vfold_cv.html). This has created 10 different resamples of the training set (which we further split into *analysis* and *assessment* sets), producing 10 different performance metrics that we then aggregated.\n\nFor this case study, rather than using multiple iterations of resampling, let's create a single resample called a *validation set*. In tidymodels, a validation set is treated as a single iteration of resampling. This will be a split from the 37,500 stays that were not used for testing, which we called `hotel_other`. This split creates two new datasets:\n\n- the set held out for the purpose of measuring performance, called the *validation set*, and\n\n- the remaining data used to fit the model, called the *training set*.\n\n\n::: {.cell layout-align=\"center\" hash='cache/validation-fig_13eaf681711257e3a9d95d45ceb9a421'}\n::: {.cell-output-display}\n![](img/validation-split.svg){fig-align='center' width=50%}\n:::\n:::\n\n\nWe'll use the `validation_split()` function to allocate 20% of the `hotel_other` stays to the *validation set* and 30,000 stays to the *training set*. This means that our model performance metrics will be computed on a single set of 7,500 hotel stays. This is fairly large, so the amount of data should provide enough precision to be a reliable indicator for how well each model predicts the outcome with a single iteration of resampling.\n\n\n::: {.cell layout-align=\"center\" hash='cache/validation-set_4a3b1ed08c67fafb1cb73bfef87a19aa'}\n\n```{.r .cell-code}\nset.seed(234)\nval_set <- validation_split(hotel_other, \n strata = children, \n prop = 0.80)\nval_set\n#> # Validation Set Split (0.8/0.2) using stratification \n#> # A tibble: 1 × 2\n#> splits id \n#> \n#> 1 validation\n```\n:::\n\n\nThis function, like `initial_split()`, has the same `strata` argument, which uses stratified sampling to create the resample. This means that we'll have roughly the same proportions of hotel stays with and without children in our new validation and training sets, as compared to the original `hotel_other` proportions.\n\n## A first model: penalized logistic regression {#first-model}\n\nSince our outcome variable `children` is categorical, logistic regression would be a good first model to start. Let's use a model that can perform feature selection during training. The [glmnet](https://cran.r-project.org/web/packages/glmnet/index.html) R package fits a generalized linear model via penalized maximum likelihood. This method of estimating the logistic regression slope parameters uses a *penalty* on the process so that less relevant predictors are driven towards a value of zero. One of the glmnet penalization methods, called the [lasso method](https://en.wikipedia.org/wiki/Lasso_(statistics)), can actually set the predictor slopes to zero if a large enough penalty is used.\n\n### Build the model\n\nTo specify a penalized logistic regression model that uses a feature selection penalty, let's use the parsnip package with the [glmnet engine](/find/parsnip/):\n\n\n::: {.cell layout-align=\"center\" hash='cache/logistic-model_a96ebf59d90ba0221e9cef26eb80aa7b'}\n\n```{.r .cell-code}\nlr_mod <- \n logistic_reg(penalty = tune(), mixture = 1) %>% \n set_engine(\"glmnet\")\n```\n:::\n\n\nWe'll set the `penalty` argument to `tune()` as a placeholder for now. This is a model hyperparameter that we will [tune](/start/tuning/) to find the best value for making predictions with our data. Setting `mixture` to a value of one means that the glmnet model will potentially remove irrelevant predictors and choose a simpler model.\n\n### Create the recipe\n\nLet's create a [recipe](/start/recipes/) to define the preprocessing steps we need to prepare our hotel stays data for this model. It might make sense to create a set of date-based predictors that reflect important components related to the arrival date. We have already introduced a [number of useful recipe steps](/start/recipes/#features) for creating features from dates:\n\n- `step_date()` creates predictors for the year, month, and day of the week.\n\n- `step_holiday()` generates a set of indicator variables for specific holidays. Although we don't know where these two hotels are located, we do know that the countries for origin for most stays are based in Europe.\n\n- `step_rm()` removes variables; here we'll use it to remove the original date variable since we no longer want it in the model.\n\nAdditionally, all categorical predictors (e.g., `distribution_channel`, `hotel`, ...) should be converted to dummy variables, and all numeric predictors need to be centered and scaled.\n\n- `step_dummy()` converts characters or factors (i.e., nominal variables) into one or more numeric binary model terms for the levels of the original data.\n\n- `step_zv()` removes indicator variables that only contain a single unique value (e.g. all zeros). This is important because, for penalized models, the predictors should be centered and scaled.\n\n- `step_normalize()` centers and scales numeric variables.\n\nPutting all these steps together into a recipe for a penalized logistic regression model, we have:\n\n\n::: {.cell layout-align=\"center\" hash='cache/logistic-features_1754f74bb66129b1a296558fe696a89a'}\n\n```{.r .cell-code}\nholidays <- c(\"AllSouls\", \"AshWednesday\", \"ChristmasEve\", \"Easter\", \n \"ChristmasDay\", \"GoodFriday\", \"NewYearsDay\", \"PalmSunday\")\n\nlr_recipe <- \n recipe(children ~ ., data = hotel_other) %>% \n step_date(arrival_date) %>% \n step_holiday(arrival_date, holidays = holidays) %>% \n step_rm(arrival_date) %>% \n step_dummy(all_nominal_predictors()) %>% \n step_zv(all_predictors()) %>% \n step_normalize(all_predictors())\n```\n:::\n\n\n### Create the workflow\n\nAs we introduced in [*Preprocess your data with recipes*](/start/recipes/#fit-workflow), let's bundle the model and recipe into a single `workflow()` object to make management of the R objects easier:\n\n\n::: {.cell layout-align=\"center\" hash='cache/logistic-workflow_ec3f7a10278bda31b9c010911df74524'}\n\n```{.r .cell-code}\nlr_workflow <- \n workflow() %>% \n add_model(lr_mod) %>% \n add_recipe(lr_recipe)\n```\n:::\n\n\n### Create the grid for tuning\n\nBefore we fit this model, we need to set up a grid of `penalty` values to tune. In our [*Tune model parameters*](/start/tuning/) article, we used [`dials::grid_regular()`](start/tuning/#tune-grid) to create an expanded grid based on a combination of two hyperparameters. Since we have only one hyperparameter to tune here, we can set the grid up manually using a one-column tibble with 30 candidate values:\n\n\n::: {.cell layout-align=\"center\" hash='cache/logistic-grid_76318cc70d9ef36bd335b85845041a0b'}\n\n```{.r .cell-code}\nlr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30))\n\nlr_reg_grid %>% top_n(-5) # lowest penalty values\n#> Selecting by penalty\n#> # A tibble: 5 × 1\n#> penalty\n#> \n#> 1 0.0001 \n#> 2 0.000127\n#> 3 0.000161\n#> 4 0.000204\n#> 5 0.000259\nlr_reg_grid %>% top_n(5) # highest penalty values\n#> Selecting by penalty\n#> # A tibble: 5 × 1\n#> penalty\n#> \n#> 1 0.0386\n#> 2 0.0489\n#> 3 0.0621\n#> 4 0.0788\n#> 5 0.1\n```\n:::\n\n\n### Train and tune the model\n\nLet's use `tune::tune_grid()` to train these 30 penalized logistic regression models. We'll also save the validation set predictions (via the call to `control_grid()`) so that diagnostic information can be available after the model fit. The area under the ROC curve will be used to quantify how well the model performs across a continuum of event thresholds (recall that the event rate---the proportion of stays including children--- is very low for these data).\n\n\n::: {.cell layout-align=\"center\"}\n\n```{.r .cell-code}\nlr_res <- \n lr_workflow %>% \n tune_grid(val_set,\n grid = lr_reg_grid,\n control = control_grid(save_pred = TRUE),\n metrics = metric_set(roc_auc))\n```\n:::\n\n\nIt might be easier to visualize the validation set metrics by plotting the area under the ROC curve against the range of penalty values:\n\n\n::: {.cell layout-align=\"center\" hash='cache/logistic-results_c50e09f68b13dd92547a88be40103c50'}\n\n```{.r .cell-code}\nlr_plot <- \n lr_res %>% \n collect_metrics() %>% \n ggplot(aes(x = penalty, y = mean)) + \n geom_point() + \n geom_line() + \n ylab(\"Area under the ROC Curve\") +\n scale_x_log10(labels = scales::label_number())\n\nlr_plot \n```\n\n::: {.cell-output-display}\n![](figs/logistic-results-1.svg){fig-align='center' width=576}\n:::\n:::\n\n\nThis plots shows us that model performance is generally better at the smaller penalty values. This suggests that the majority of the predictors are important to the model. We also see a steep drop in the area under the ROC curve towards the highest penalty values. This happens because a large enough penalty will remove *all* predictors from the model, and not surprisingly predictive accuracy plummets with no predictors in the model (recall that an ROC AUC value of 0.50 means that the model does no better than chance at predicting the correct class).\n\nOur model performance seems to plateau at the smaller penalty values, so going by the `roc_auc` metric alone could lead us to multiple options for the \"best\" value for this hyperparameter:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-17_fc673758b9803b4ffb6aa88ec06b869f'}\n\n```{.r .cell-code}\ntop_models <-\n lr_res %>% \n show_best(\"roc_auc\", n = 15) %>% \n arrange(penalty) \ntop_models\n#> # A tibble: 15 × 7\n#> penalty .metric .estimator mean n std_err .config \n#> \n#> 1 0.000127 roc_auc binary 0.872 1 NA Preprocessor1_Model02\n#> 2 0.000161 roc_auc binary 0.872 1 NA Preprocessor1_Model03\n#> 3 0.000204 roc_auc binary 0.873 1 NA Preprocessor1_Model04\n#> 4 0.000259 roc_auc binary 0.873 1 NA Preprocessor1_Model05\n#> 5 0.000329 roc_auc binary 0.874 1 NA Preprocessor1_Model06\n#> 6 0.000418 roc_auc binary 0.874 1 NA Preprocessor1_Model07\n#> 7 0.000530 roc_auc binary 0.875 1 NA Preprocessor1_Model08\n#> 8 0.000672 roc_auc binary 0.875 1 NA Preprocessor1_Model09\n#> 9 0.000853 roc_auc binary 0.876 1 NA Preprocessor1_Model10\n#> 10 0.00108 roc_auc binary 0.876 1 NA Preprocessor1_Model11\n#> 11 0.00137 roc_auc binary 0.876 1 NA Preprocessor1_Model12\n#> 12 0.00174 roc_auc binary 0.876 1 NA Preprocessor1_Model13\n#> 13 0.00221 roc_auc binary 0.876 1 NA Preprocessor1_Model14\n#> 14 0.00281 roc_auc binary 0.875 1 NA Preprocessor1_Model15\n#> 15 0.00356 roc_auc binary 0.873 1 NA Preprocessor1_Model16\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='cache/top-cand-mods_f38baa6162be8c16e9649554fb10ca56'}\n\n:::\n\n\nEvery candidate model in this tibble likely includes more predictor variables than the model in the row below it. If we used `select_best()`, it would return candidate model 11 with a penalty value of 0.00137, shown with the dotted line below.\n\n\n::: {.cell layout-align=\"center\" hash='cache/lr-plot-lines_2ba82e58bf011149e6c30a059a0b6045'}\n::: {.cell-output-display}\n![](figs/lr-plot-lines-1.svg){fig-align='center' width=576}\n:::\n:::\n\n\nHowever, we may want to choose a penalty value further along the x-axis, closer to where we start to see the decline in model performance. For example, candidate model 12 with a penalty value of 0.00174 has effectively the same performance as the numerically best model, but might eliminate more predictors. This penalty value is marked by the solid line above. In general, fewer irrelevant predictors is better. If performance is about the same, we'd prefer to choose a higher penalty value.\n\nLet's select this value and visualize the validation set ROC curve:\n\n\n::: {.cell layout-align=\"center\" hash='cache/logistic-best_536639b58aee05b2dc64901c039c81c4'}\n\n```{.r .cell-code}\nlr_best <- \n lr_res %>% \n collect_metrics() %>% \n arrange(penalty) %>% \n slice(12)\nlr_best\n#> # A tibble: 1 × 7\n#> penalty .metric .estimator mean n std_err .config \n#> \n#> 1 0.00137 roc_auc binary 0.876 1 NA Preprocessor1_Model12\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='cache/logistic-roc-curve_4353a2ff045f53998a16d55d50d392e8'}\n\n```{.r .cell-code}\nlr_auc <- \n lr_res %>% \n collect_predictions(parameters = lr_best) %>% \n roc_curve(children, .pred_children) %>% \n mutate(model = \"Logistic Regression\")\n\nautoplot(lr_auc)\n```\n\n::: {.cell-output-display}\n![](figs/logistic-roc-curve-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThe level of performance generated by this logistic regression model is good, but not groundbreaking. Perhaps the linear nature of the prediction equation is too limiting for this data set. As a next step, we might consider a highly non-linear model generated using a tree-based ensemble method.\n\n## A second model: tree-based ensemble {#second-model}\n\nAn effective and low-maintenance modeling technique is a *random forest*. This model was also used in our [*Evaluate your model with resampling*](/start/resampling/) article. Compared to logistic regression, a random forest model is more flexible. A random forest is an *ensemble model* typically made up of thousands of decision trees, where each individual tree sees a slightly different version of the training data and learns a sequence of splitting rules to predict new data. Each tree is non-linear, and aggregating across trees makes random forests also non-linear but more robust and stable compared to individual trees. Tree-based models like random forests require very little preprocessing and can effectively handle many types of predictors (sparse, skewed, continuous, categorical, etc.).\n\n### Build the model and improve training time\n\nAlthough the default hyperparameters for random forests tend to give reasonable results, we'll plan to tune two hyperparameters that we think could improve performance. Unfortunately, random forest models can be computationally expensive to train and to tune. The computations required for model tuning can usually be easily parallelized to improve training time. The tune package can do [parallel processing](https://tune.tidymodels.org/articles/extras/optimizations.html#parallel-processing) for you, and allows users to use multiple cores or separate machines to fit models.\n\nBut, here we are using a single validation set, so parallelization isn't an option using the tune package. For this specific case study, a good alternative is provided by the engine itself. The ranger package offers a built-in way to compute individual random forest models in parallel. To do this, we need to know the the number of cores we have to work with. We can use the parallel package to query the number of cores on your own computer to understand how much parallelization you can do:\n\n\n::: {.cell layout-align=\"center\" hash='cache/num-cores_d8782f1f1a90527c8c8613f61864599a'}\n\n```{.r .cell-code}\ncores <- parallel::detectCores()\ncores\n#> [1] 10\n```\n:::\n\n\nWe have 10 cores to work with. We can pass this information to the ranger engine when we set up our parsnip `rand_forest()` model. To enable parallel processing, we can pass engine-specific arguments like `num.threads` to ranger when we set the engine:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-model_77a4ff80f861d79482f76885edba8a97'}\n\n```{.r .cell-code}\nrf_mod <- \n rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% \n set_engine(\"ranger\", num.threads = cores) %>% \n set_mode(\"classification\")\n```\n:::\n\n\nThis works well in this modeling context, but it bears repeating: if you use any other resampling method, let tune do the parallel processing for you --- we typically do not recommend relying on the modeling engine (like we did here) to do this.\n\nIn this model, we used `tune()` as a placeholder for the `mtry` and `min_n` argument values, because these are our two hyperparameters that we will [tune](/start/tuning/).\n\n### Create the recipe and workflow\n\nUnlike penalized logistic regression models, random forest models do not require [dummy](https://bookdown.org/max/FES/categorical-trees.html) or normalized predictor variables. Nevertheless, we want to do some feature engineering again with our `arrival_date` variable. As before, the date predictor is engineered so that the random forest model does not need to work hard to tease these potential patterns from the data.\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-features_14c8aea1fbec48c0000c6e8618b6caac'}\n\n```{.r .cell-code}\nrf_recipe <- \n recipe(children ~ ., data = hotel_other) %>% \n step_date(arrival_date) %>% \n step_holiday(arrival_date) %>% \n step_rm(arrival_date) \n```\n:::\n\n\nAdding this recipe to our parsnip model gives us a new workflow for predicting whether a hotel stay included children and/or babies as guests with a random forest:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-workflow_756ce147a56c9a9fe820caec47dcc750'}\n\n```{.r .cell-code}\nrf_workflow <- \n workflow() %>% \n add_model(rf_mod) %>% \n add_recipe(rf_recipe)\n```\n:::\n\n\n### Train and tune the model\n\nWhen we set up our parsnip model, we chose two hyperparameters for tuning:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-26_2c75b4563a180c5ca2926ecd22b51440'}\n\n```{.r .cell-code}\nrf_mod\n#> Random Forest Model Specification (classification)\n#> \n#> Main Arguments:\n#> mtry = tune()\n#> trees = 1000\n#> min_n = tune()\n#> \n#> Engine-Specific Arguments:\n#> num.threads = cores\n#> \n#> Computational engine: ranger\n\n# show what will be tuned\nextract_parameter_set_dials(rf_mod)\n#> Collection of 2 parameters for tuning\n#> \n#> identifier type object\n#> mtry mtry nparam[?]\n#> min_n min_n nparam[+]\n#> \n#> Model parameters needing finalization:\n#> # Randomly Selected Predictors ('mtry')\n#> \n#> See `?dials::finalize` or `?dials::update.parameters` for more information.\n```\n:::\n\n\nThe `mtry` hyperparameter sets the number of predictor variables that each node in the decision tree \"sees\" and can learn about, so it can range from 1 to the total number of features present; when `mtry` = all possible features, the model is the same as bagging decision trees. The `min_n` hyperparameter sets the minimum `n` to split at any node.\n\nWe will use a space-filling design to tune, with 25 candidate models:\n\n\n::: {.cell layout-align=\"center\"}\n\n```{.r .cell-code}\nset.seed(345)\nrf_res <- \n rf_workflow %>% \n tune_grid(val_set,\n grid = 25,\n control = control_grid(save_pred = TRUE),\n metrics = metric_set(roc_auc))\n#> i Creating pre-processing data to finalize unknown parameter: mtry\n```\n:::\n\n\nThe message printed above *\"Creating pre-processing data to finalize unknown parameter: mtry\"* is related to the size of the data set. Since `mtry` depends on the number of predictors in the data set, `tune_grid()` determines the upper bound for `mtry` once it receives the data.\n\nHere are our top 5 random forest models, out of the 25 candidates:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-28_6e67c852131ceddb43d3e962fae7170c'}\n\n```{.r .cell-code}\nrf_res %>% \n show_best(metric = \"roc_auc\")\n#> # A tibble: 5 × 8\n#> mtry min_n .metric .estimator mean n std_err .config \n#> \n#> 1 8 7 roc_auc binary 0.926 1 NA Preprocessor1_Model13\n#> 2 12 7 roc_auc binary 0.926 1 NA Preprocessor1_Model01\n#> 3 13 4 roc_auc binary 0.925 1 NA Preprocessor1_Model05\n#> 4 9 12 roc_auc binary 0.924 1 NA Preprocessor1_Model19\n#> 5 6 18 roc_auc binary 0.924 1 NA Preprocessor1_Model24\n```\n:::\n\n\nRight away, we see that these values for area under the ROC look more promising than our top model using penalized logistic regression, which yielded an ROC AUC of 0.876.\n\nPlotting the results of the tuning process highlights that both `mtry` (number of predictors at each node) and `min_n` (minimum number of data points required to keep splitting) should be fairly small to optimize performance. However, the range of the y-axis indicates that the model is very robust to the choice of these parameter values --- all but one of the ROC AUC values are greater than 0.90.\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-results_d7b9a58a8c52195f5fb2cf1f291a0edf'}\n\n```{.r .cell-code}\nautoplot(rf_res)\n```\n\n::: {.cell-output-display}\n![](figs/rf-results-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nLet's select the best model according to the ROC AUC metric. Our final tuning parameter values are:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-best_d4f2f0a02aebd94d6144c9e69c27decb'}\n\n```{.r .cell-code}\nrf_best <- \n rf_res %>% \n select_best(metric = \"roc_auc\")\nrf_best\n#> # A tibble: 1 × 3\n#> mtry min_n .config \n#> \n#> 1 8 7 Preprocessor1_Model13\n```\n:::\n\n\nTo calculate the data needed to plot the ROC curve, we use `collect_predictions()`. This is only possible after tuning with `control_grid(save_pred = TRUE)`. In the output, you can see the two columns that hold our class probabilities for predicting hotel stays including and not including children.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-31_cab5591e0019917181e2ae7296fea2de'}\n\n```{.r .cell-code}\nrf_res %>% \n collect_predictions()\n#> # A tibble: 187,500 × 8\n#> id .pred_children .pred_none .row mtry min_n children .config \n#> \n#> 1 validation 0.152 0.848 13 12 7 none Preprocessor…\n#> 2 validation 0.0302 0.970 20 12 7 none Preprocessor…\n#> 3 validation 0.513 0.487 22 12 7 children Preprocessor…\n#> 4 validation 0.0103 0.990 23 12 7 none Preprocessor…\n#> 5 validation 0.0111 0.989 31 12 7 none Preprocessor…\n#> 6 validation 0 1 38 12 7 none Preprocessor…\n#> 7 validation 0 1 39 12 7 none Preprocessor…\n#> 8 validation 0.00325 0.997 50 12 7 none Preprocessor…\n#> 9 validation 0.0241 0.976 54 12 7 none Preprocessor…\n#> 10 validation 0.0441 0.956 57 12 7 children Preprocessor…\n#> # ℹ 187,490 more rows\n```\n:::\n\n\nTo filter the predictions for only our best random forest model, we can use the `parameters` argument and pass it our tibble with the best hyperparameter values from tuning, which we called `rf_best`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-roc_82e46ab48227fdb75b3701700019fa82'}\n\n```{.r .cell-code}\nrf_auc <- \n rf_res %>% \n collect_predictions(parameters = rf_best) %>% \n roc_curve(children, .pred_children) %>% \n mutate(model = \"Random Forest\")\n```\n:::\n\n\nNow, we can compare the validation set ROC curves for our top penalized logistic regression model and random forest model:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-lr-roc-curve_4dd7892058b1fa9f2249484f1e97d958'}\n\n```{.r .cell-code}\nbind_rows(rf_auc, lr_auc) %>% \n ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) + \n geom_path(lwd = 1.5, alpha = 0.8) +\n geom_abline(lty = 3) + \n coord_equal() + \n scale_color_viridis_d(option = \"plasma\", end = .6)\n```\n\n::: {.cell-output-display}\n![](figs/rf-lr-roc-curve-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThe random forest is uniformly better across event probability thresholds.\n\n## The last fit {#last-fit}\n\nOur goal was to predict which hotel stays included children and/or babies. The random forest model clearly performed better than the penalized logistic regression model, and would be our best bet for predicting hotel stays with and without children. After selecting our best model and hyperparameter values, our last step is to fit the final model on all the rows of data not originally held out for testing (both the training and the validation sets combined), and then evaluate the model performance one last time with the held-out test set.\n\nWe'll start by building our parsnip model object again from scratch. We take our best hyperparameter values from our random forest model. When we set the engine, we add a new argument: `importance = \"impurity\"`. This will provide *variable importance* scores for this last model, which gives some insight into which predictors drive model performance.\n\n\n::: {.cell layout-align=\"center\"}\n\n```{.r .cell-code}\n# the last model\nlast_rf_mod <- \n rand_forest(mtry = 8, min_n = 7, trees = 1000) %>% \n set_engine(\"ranger\", num.threads = cores, importance = \"impurity\") %>% \n set_mode(\"classification\")\n\n# the last workflow\nlast_rf_workflow <- \n rf_workflow %>% \n update_model(last_rf_mod)\n\n# the last fit\nset.seed(345)\nlast_rf_fit <- \n last_rf_workflow %>% \n last_fit(splits)\n\nlast_rf_fit\n#> # Resampling results\n#> # Manual resampling \n#> # A tibble: 1 × 6\n#> splits id .metrics .notes .predictions .workflow \n#> \n#> 1 train/test sp… \n```\n:::\n\n\nThis fitted workflow contains *everything*, including our final metrics based on the test set. So, how did this model do on the test set? Was the validation set a good estimate of future performance?\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-35_ac7dc8121a115fba4e4d230461049376'}\n\n```{.r .cell-code}\nlast_rf_fit %>% \n collect_metrics()\n#> # A tibble: 2 × 4\n#> .metric .estimator .estimate .config \n#> \n#> 1 accuracy binary 0.946 Preprocessor1_Model1\n#> 2 roc_auc binary 0.923 Preprocessor1_Model1\n```\n:::\n\n\nThis ROC AUC value is pretty close to what we saw when we tuned the random forest model with the validation set, which is good news. That means that our estimate of how well our model would perform with new data was not too far off from how well our model actually performed with the unseen test data.\n\nWe can access those variable importance scores via the `.workflow` column. We can [extract out the fit](https://tune.tidymodels.org/reference/extract-tune.html) from the workflow object, and then use the vip package to visualize the variable importance scores for the top 20 features:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-importance_bb789f3fc826121c61561fe2b64d0fd7'}\n\n```{.r .cell-code}\nlast_rf_fit %>% \n extract_fit_parsnip() %>% \n vip(num_features = 20)\n```\n\n::: {.cell-output-display}\n![](figs/rf-importance-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThe most important predictors in whether a hotel stay had children or not were the daily cost for the room, the type of room reserved, the time between the creation of the reservation and the arrival date, and the type of room that was ultimately assigned.\n\nLet's generate our last ROC curve to visualize. Since the event we are predicting is the first level in the `children` factor (\"children\"), we provide `roc_curve()` with the [relevant class probability](https://yardstick.tidymodels.org/reference/roc_curve.html#relevant-level) `.pred_children`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/test-set-roc-curve_75d6e9b408cd8f9d578418f38f1ad9e8'}\n\n```{.r .cell-code}\nlast_rf_fit %>% \n collect_predictions() %>% \n roc_curve(children, .pred_children) %>% \n autoplot()\n```\n\n::: {.cell-output-display}\n![](figs/test-set-roc-curve-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nBased on these results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data.\n\n## Where to next? {#next}\n\nIf you've made it to the end of this series of [*Get Started*](/start/) articles, we hope you feel ready to learn more! You now know the core tidymodels packages and how they fit together. After you are comfortable with the basics we introduced in this series, you can [learn how to go farther](/learn/) with tidymodels in your modeling and machine learning projects.\n\nHere are some more ideas for where to go next:\n\n- Study up on statistics and modeling with our comprehensive [books](/books/).\n\n- Dig deeper into the [package documentation sites](/packages/) to find functions that meet your modeling needs. Use the [searchable tables](/find/) to explore what is possible.\n\n- Keep up with the latest about tidymodels packages at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/).\n\n- Find ways to ask for [help](/help/) and [contribute to tidymodels](/contribute) to help others.\n\n\\###\n\n
    Happy modeling!
    \n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> glmnet * 4.1-7 2023-03-23 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> ranger * 0.15.1 2023-04-03 [1] CRAN (R 4.3.0)\n#> readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> vip * 0.3.2 2020-12-17 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/start/models/index/execute-results/html.json b/_freeze/start/models/index/execute-results/html.json new file mode 100644 index 00000000..a12abc6f --- /dev/null +++ b/_freeze/start/models/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "fcc6522cb336eaff84eb734fc480d5e0", + "result": { + "markdown": "---\ntitle: \"Build a model\"\nweight: 1\ncategories:\n - model fitting\n - parsnip\n - broom\ndescription: | \n Get started by learning how to specify and train a model using tidymodels.\ntoc-location: body\ntoc-depth: 2\ntoc-title: \"\"\ncss: ../styles.css\ninclude-after-body: ../repo-actions-delete.html\n---\n\n\n\n\n\n\n\n\n## Introduction {#intro}\n\nHow do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/), and understand why these functions are designed this way.\n\nTo use code in this article, you will need to install the following packages: broom.mixed, dotwhisker, readr, rstanarm, and tidymodels.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_85ae9ade00802be6b86e52368433af07'}\n\n```{.r .cell-code}\nlibrary(tidymodels) # for the parsnip package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(broom.mixed) # for converting bayesian models to tidy tibbles\nlibrary(dotwhisker) # for visualizing regression results\n```\n:::\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n\n\n\n## The Sea Urchins Data {#data}\n\nLet's use the data from [Constable (1993)](https://link.springer.com/article/10.1007/BF00349318) to explore how three different feeding regimes affect the size of sea urchins over time. The initial size of the sea urchins at the beginning of the experiment probably affects how big they grow as they are fed. \n\nTo start, let's read our urchins data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (\"\"):\n\n\n::: {.cell layout-align=\"center\" hash='cache/data_39e92d22ba6540af904d9844fd3cad57'}\n\n```{.r .cell-code}\nurchins <-\n # Data were assembled for a tutorial \n # at https://www.flutterbys.com.au/stats/tut/tut7.5a.html\n read_csv(\"https://tidymodels.org/start/models/urchins.csv\") %>% \n # Change the names to be a little more verbose\n setNames(c(\"food_regime\", \"initial_volume\", \"width\")) %>% \n # Factors are very helpful for modeling, so we convert one column\n mutate(food_regime = factor(food_regime, levels = c(\"Initial\", \"Low\", \"High\")))\n#> Rows: 72 Columns: 3\n#> ── Column specification ──────────────────────────────────────────────\n#> Delimiter: \",\"\n#> chr (1): TREAT\n#> dbl (2): IV, SUTW\n#> \n#> ℹ Use `spec()` to retrieve the full column specification for this data.\n#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.\n```\n:::\n\n\nLet's take a quick look at the data:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-5_1e4ae78769856ab4fcf4afd423a5a206'}\n\n```{.r .cell-code}\nurchins\n#> # A tibble: 72 × 3\n#> food_regime initial_volume width\n#> \n#> 1 Initial 3.5 0.01 \n#> 2 Initial 5 0.02 \n#> 3 Initial 8 0.061\n#> 4 Initial 10 0.051\n#> 5 Initial 13 0.041\n#> 6 Initial 13 0.061\n#> 7 Initial 15 0.041\n#> 8 Initial 15 0.071\n#> 9 Initial 16 0.092\n#> 10 Initial 17 0.051\n#> # ℹ 62 more rows\n```\n:::\n\n\nThe urchins data is a [tibble](https://tibble.tidyverse.org/index.html). If you are new to tibbles, the best place to start is the [tibbles chapter](https://r4ds.had.co.nz/tibbles.html) in *R for Data Science*. For each of the 72 urchins, we know their:\n\n+ experimental feeding regime group (`food_regime`: either `Initial`, `Low`, or `High`),\n+ size in milliliters at the start of the experiment (`initial_volume`), and\n+ suture width at the end of the experiment (`width`).\n\nAs a first step in modeling, it's always a good idea to plot the data: \n\n\n::: {.cell layout-align=\"center\" hash='cache/urchin-plot_1603f2b800553a9fcdb01f08303e4c82'}\n\n```{.r .cell-code}\nggplot(urchins,\n aes(x = initial_volume, \n y = width, \n group = food_regime, \n col = food_regime)) + \n geom_point() + \n geom_smooth(method = lm, se = FALSE) +\n scale_color_viridis_d(option = \"plasma\", end = .7)\n#> `geom_smooth()` using formula = 'y ~ x'\n```\n\n::: {.cell-output-display}\n![](figs/urchin-plot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nWe can see that urchins that were larger in volume at the start of the experiment tended to have wider sutures at the end, but the slopes of the lines look different so this effect may depend on the feeding regime condition.\n\n## Build and fit a model {#build-model}\n\nA standard two-way analysis of variance ([ANOVA](https://www.itl.nist.gov/div898/handbook/prc/section4/prc43.htm)) model makes sense for this dataset because we have both a continuous predictor and a categorical predictor. Since the slopes appear to be different for at least two of the feeding regimes, let's build a model that allows for two-way interactions. Specifying an R formula with our variables in this way: \n\n\n::: {.cell layout-align=\"center\" hash='cache/two-way-int_a0f9010460df05a6b677180db1efa18e'}\n\n```{.r .cell-code}\nwidth ~ initial_volume * food_regime\n```\n:::\n\n\nallows our regression model depending on initial volume to have separate slopes and intercepts for each food regime. \n\nFor this kind of model, ordinary least squares is a good initial approach. With tidymodels, we start by specifying the _functional form_ of the model that we want using the [parsnip package](https://parsnip.tidymodels.org/). Since there is a numeric outcome and the model should be linear with slopes and intercepts, the model type is [\"linear regression\"](https://parsnip.tidymodels.org/reference/linear_reg.html). We can declare this with: \n\n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-tm_e2a755c4c6624fa5215375d5f0aef776'}\n\n```{.r .cell-code}\nlinear_reg()\n#> Linear Regression Model Specification (regression)\n#> \n#> Computational engine: lm\n```\n:::\n\n\nThat is pretty underwhelming since, on its own, it doesn't really do much. However, now that the type of model has been specified, we can think about a method for _fitting_ or training the model, the model **engine**. The engine value is often a mash-up of the software that can be used to fit or train the model as well as the estimation method. The default for `linear_reg()` is `\"lm\"` for ordinary least squares, as you can see above. We could set a non-default option instead:\n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-spec_cda99ca6749ca900647312148fe394e0'}\n\n```{.r .cell-code}\nlinear_reg() %>% \n set_engine(\"keras\")\n#> Linear Regression Model Specification (regression)\n#> \n#> Computational engine: keras\n```\n:::\n\n\nThe [documentation page for `linear_reg()`](https://parsnip.tidymodels.org/reference/linear_reg.html) lists all the possible engines. We'll save our model object using the default engine as `lm_mod`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-10_c9235c54519f3ab630742819ff3ae69a'}\n\n```{.r .cell-code}\nlm_mod <- linear_reg()\n```\n:::\n\n\nFrom here, the model can be estimated or trained using the [`fit()`](https://parsnip.tidymodels.org/reference/fit.html) function:\n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-fit_ac90c3b9f0b2cea68ef81afa87f0593d'}\n\n```{.r .cell-code}\nlm_fit <- \n lm_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\nlm_fit\n#> parsnip model object\n#> \n#> \n#> Call:\n#> stats::lm(formula = width ~ initial_volume * food_regime, data = data)\n#> \n#> Coefficients:\n#> (Intercept) initial_volume \n#> 0.0331216 0.0015546 \n#> food_regimeLow food_regimeHigh \n#> 0.0197824 0.0214111 \n#> initial_volume:food_regimeLow initial_volume:food_regimeHigh \n#> -0.0012594 0.0005254\n```\n:::\n\n\nPerhaps our analysis requires a description of the model parameter estimates and their statistical properties. Although the `summary()` function for `lm` objects can provide that, it gives the results back in an unwieldy format. Many models have a `tidy()` method that provides the summary results in a more predictable and useful format (e.g. a data frame with standard column names): \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-table_f31fd5d2fb97daf14365207c56f7d982'}\n\n```{.r .cell-code}\ntidy(lm_fit)\n#> # A tibble: 6 × 5\n#> term estimate std.error statistic p.value\n#> \n#> 1 (Intercept) 0.0331 0.00962 3.44 0.00100 \n#> 2 initial_volume 0.00155 0.000398 3.91 0.000222\n#> 3 food_regimeLow 0.0198 0.0130 1.52 0.133 \n#> 4 food_regimeHigh 0.0214 0.0145 1.47 0.145 \n#> 5 initial_volume:food_regimeLow -0.00126 0.000510 -2.47 0.0162 \n#> 6 initial_volume:food_regimeHigh 0.000525 0.000702 0.748 0.457\n```\n:::\n\n\nThis kind of output can be used to generate a dot-and-whisker plot of our regression results using the dotwhisker package:\n\n\n::: {.cell layout-align=\"center\" hash='cache/dwplot_39f0dcc3141b08785643d2da2b33dda8'}\n\n```{.r .cell-code}\ntidy(lm_fit) %>% \n dwplot(dot_args = list(size = 2, color = \"black\"),\n whisker_args = list(color = \"black\"),\n vline = geom_vline(xintercept = 0, colour = \"grey50\", linetype = 2))\n```\n\n::: {.cell-output-display}\n![](figs/dwplot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\n\n## Use a model to predict {#predict-model}\n\nThis fitted object `lm_fit` has the `lm` model output built-in, which you can access with `lm_fit$fit`, but there are some benefits to using the fitted parsnip model object when it comes to predicting.\n\nSuppose that, for a publication, it would be particularly interesting to make a plot of the mean body size for urchins that started the experiment with an initial volume of 20ml. To create such a graph, we start with some new example data that we will make predictions for, to show in our graph:\n\n\n::: {.cell layout-align=\"center\" hash='cache/new-points_ff1664824d31fadb2ca327ca199e357a'}\n\n```{.r .cell-code}\nnew_points <- expand.grid(initial_volume = 20, \n food_regime = c(\"Initial\", \"Low\", \"High\"))\nnew_points\n#> initial_volume food_regime\n#> 1 20 Initial\n#> 2 20 Low\n#> 3 20 High\n```\n:::\n\n\nTo get our predicted results, we can use the `predict()` function to find the mean values at 20ml. \n\nIt is also important to communicate the variability, so we also need to find the predicted confidence intervals. If we had used `lm()` to fit the model directly, a few minutes of reading the [documentation page](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/predict.lm.html) for `predict.lm()` would explain how to do this. However, if we decide to use a different model to estimate urchin size (_spoiler:_ we will!), it is likely that a completely different syntax would be required. \n\nInstead, with tidymodels, the types of predicted values are standardized so that we can use the same syntax to get these values. \n\nFirst, let's generate the mean body width values: \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-pred-mean_d903c9188834966ac82e0773cb20c03c'}\n\n```{.r .cell-code}\nmean_pred <- predict(lm_fit, new_data = new_points)\nmean_pred\n#> # A tibble: 3 × 1\n#> .pred\n#> \n#> 1 0.0642\n#> 2 0.0588\n#> 3 0.0961\n```\n:::\n\n\nWhen making predictions, the tidymodels convention is to always produce a tibble of results with standardized column names. This makes it easy to combine the original data and the predictions in a usable format: \n\n\n::: {.cell layout-align=\"center\" hash='cache/lm-all-pred_de80c71006396b25f96613de87596cbd'}\n\n```{.r .cell-code}\nconf_int_pred <- predict(lm_fit, \n new_data = new_points, \n type = \"conf_int\")\nconf_int_pred\n#> # A tibble: 3 × 2\n#> .pred_lower .pred_upper\n#> \n#> 1 0.0555 0.0729\n#> 2 0.0499 0.0678\n#> 3 0.0870 0.105\n\n# Now combine: \nplot_data <- \n new_points %>% \n bind_cols(mean_pred) %>% \n bind_cols(conf_int_pred)\n\n# and plot:\nggplot(plot_data, aes(x = food_regime)) + \n geom_point(aes(y = .pred)) + \n geom_errorbar(aes(ymin = .pred_lower, \n ymax = .pred_upper),\n width = .2) + \n labs(y = \"urchin size\")\n```\n\n::: {.cell-output-display}\n![](figs/lm-all-pred-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\n## Model with a different engine {#new-engine}\n\nEvery one on your team is happy with that plot _except_ that one person who just read their first book on [Bayesian analysis](https://bayesian.org/what-is-bayesian-analysis/). They are interested in knowing if the results would be different if the model were estimated using a Bayesian approach. In such an analysis, a [_prior distribution_](https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7) needs to be declared for each model parameter that represents the possible values of the parameters (before being exposed to the observed data). After some discussion, the group agrees that the priors should be bell-shaped but, since no one has any idea what the range of values should be, to take a conservative approach and make the priors _wide_ using a Cauchy distribution (which is the same as a t-distribution with a single degree of freedom).\n\nThe [documentation](https://mc-stan.org/rstanarm/articles/priors.html) on the rstanarm package shows us that the `stan_glm()` function can be used to estimate this model, and that the function arguments that need to be specified are called `prior` and `prior_intercept`. It turns out that `linear_reg()` has a [`stan` engine](https://parsnip.tidymodels.org/reference/linear_reg.html#details). Since these prior distribution arguments are specific to the Stan software, they are passed as arguments to [`parsnip::set_engine()`](https://parsnip.tidymodels.org/reference/set_engine.html). After that, the same exact `fit()` call is used:\n\n\n::: {.cell layout-align=\"center\" hash='cache/go-stan_eddd72fd40bea602d679d53647cfec4f'}\n\n```{.r .cell-code}\n# set the prior distribution\nprior_dist <- rstanarm::student_t(df = 1)\n\nset.seed(123)\n\n# make the parsnip model\nbayes_mod <- \n linear_reg() %>% \n set_engine(\"stan\", \n prior_intercept = prior_dist, \n prior = prior_dist) \n\n# train the model\nbayes_fit <- \n bayes_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\n\nprint(bayes_fit, digits = 5)\n#> parsnip model object\n#> \n#> stan_glm\n#> family: gaussian [identity]\n#> formula: width ~ initial_volume * food_regime\n#> observations: 72\n#> predictors: 6\n#> ------\n#> Median MAD_SD \n#> (Intercept) 0.03336 0.01003\n#> initial_volume 0.00156 0.00040\n#> food_regimeLow 0.01963 0.01308\n#> food_regimeHigh 0.02120 0.01421\n#> initial_volume:food_regimeLow -0.00126 0.00051\n#> initial_volume:food_regimeHigh 0.00054 0.00070\n#> \n#> Auxiliary parameter(s):\n#> Median MAD_SD \n#> sigma 0.02129 0.00188\n#> \n#> ------\n#> * For help interpreting the printed output see ?print.stanreg\n#> * For info on the priors used see ?prior_summary.stanreg\n```\n:::\n\n\nThis kind of Bayesian analysis (like many models) involves randomly generated numbers in its fitting procedure. We can use `set.seed()` to ensure that the same (pseudo-)random numbers are generated each time we run this code. The number `123` isn't special or related to our data; it is just a \"seed\" used to choose random numbers.\n\nTo update the parameter table, the `tidy()` method is once again used: \n\n\n::: {.cell layout-align=\"center\" hash='cache/tidy-stan_bb45497a37b521633e968f3529225176'}\n\n```{.r .cell-code}\ntidy(bayes_fit, conf.int = TRUE)\n#> # A tibble: 6 × 5\n#> term estimate std.error conf.low conf.high\n#> \n#> 1 (Intercept) 0.0334 0.0100 0.0179 0.0493 \n#> 2 initial_volume 0.00156 0.000404 0.000876 0.00219 \n#> 3 food_regimeLow 0.0196 0.0131 -0.00271 0.0414 \n#> 4 food_regimeHigh 0.0212 0.0142 -0.00289 0.0455 \n#> 5 initial_volume:food_regimeLow -0.00126 0.000515 -0.00213 -0.000364\n#> 6 initial_volume:food_regimeHigh 0.000541 0.000696 -0.000669 0.00174\n```\n:::\n\n\nA goal of the tidymodels packages is that the **interfaces to common tasks are standardized** (as seen in the `tidy()` results above). The same is true for getting predictions; we can use the same code even though the underlying packages use very different syntax:\n\n\n::: {.cell layout-align=\"center\" hash='cache/stan-pred_ceeac20323f8d65322224aef7a6bbd72'}\n\n```{.r .cell-code}\nbayes_plot_data <- \n new_points %>% \n bind_cols(predict(bayes_fit, new_data = new_points)) %>% \n bind_cols(predict(bayes_fit, new_data = new_points, type = \"conf_int\"))\n\nggplot(bayes_plot_data, aes(x = food_regime)) + \n geom_point(aes(y = .pred)) + \n geom_errorbar(aes(ymin = .pred_lower, ymax = .pred_upper), width = .2) + \n labs(y = \"urchin size\") + \n ggtitle(\"Bayesian model with t(1) prior distribution\")\n```\n\n::: {.cell-output-display}\n![](figs/stan-pred-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThis isn't very different from the non-Bayesian results (except in interpretation). \n\n::: {.callout-note}\nThe [parsnip](https://parsnip.tidymodels.org/) package can work with many model types, engines, and arguments. Check out [tidymodels.org/find/parsnip](/find/parsnip/) to see what is available. \n:::\n\n## Why does it work that way? {#why}\n\nThe extra step of defining the model using a function like `linear_reg()` might seem superfluous since a call to `lm()` is much more succinct. However, the problem with standard modeling functions is that they don't separate what you want to do from the execution. For example, the process of executing a formula has to happen repeatedly across model calls even when the formula does not change; we can't recycle those computations. \n\nAlso, using the tidymodels framework, we can do some interesting things by incrementally creating a model (instead of using single function call). [Model tuning](/start/tuning/) with tidymodels uses the specification of the model to declare what parts of the model should be tuned. That would be very difficult to do if `linear_reg()` immediately fit the model. \n\nIf you are familiar with the tidyverse, you may have noticed that our modeling code uses the magrittr pipe (`%>%`). With dplyr and other tidyverse packages, the pipe works well because all of the functions take the _data_ as the first argument. For example: \n\n\n::: {.cell layout-align=\"center\" hash='cache/tidy-data_5fe8b739527b2f81ab4b0be04a791074'}\n\n```{.r .cell-code}\nurchins %>% \n group_by(food_regime) %>% \n summarize(med_vol = median(initial_volume))\n#> # A tibble: 3 × 2\n#> food_regime med_vol\n#> \n#> 1 Initial 20.5\n#> 2 Low 19.2\n#> 3 High 15\n```\n:::\n\n\nwhereas the modeling code uses the pipe to pass around the _model object_:\n\n\n::: {.cell layout-align=\"center\" hash='cache/tidy-model_1494c19f5cade983524eb65c04ab2513'}\n\n```{.r .cell-code}\nbayes_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\n```\n:::\n\n\nThis may seem jarring if you have used dplyr a lot, but it is extremely similar to how ggplot2 operates:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-22_7497a76ac6f0cccf90bc271055a00601'}\n\n```{.r .cell-code}\nggplot(urchins,\n aes(initial_volume, width)) + # returns a ggplot object \n geom_jitter() + # same\n geom_smooth(method = lm, se = FALSE) + # same \n labs(x = \"Volume\", y = \"Width\") # etc\n```\n:::\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> broom.mixed * 0.2.9.4 2022-04-17 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dotwhisker * 0.7.4 2021-09-02 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> rstanarm * 2.21.4 2023-04-08 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/start/recipes/index/execute-results/html.json b/_freeze/start/recipes/index/execute-results/html.json new file mode 100644 index 00000000..b33da076 --- /dev/null +++ b/_freeze/start/recipes/index/execute-results/html.json @@ -0,0 +1,18 @@ +{ + "hash": "118f50b736df8c15350c4248b4453b2d", + "result": { + "markdown": "---\ntitle: \"Preprocess your data with recipes\"\nweight: 2\ncategories:\n - pre-processing\n - recipes\n - parsnip\n - workflows\n - yardstick\n - broom\ndescription: | \n Prepare data for modeling with modular preprocessing steps.\ntoc-location: body\ntoc-depth: 2\ntoc-title: \"\"\ncss: ../styles.css\ninclude-after-body: ../repo-actions-delete.html\n---\n\n\n\n\n\n\n## Introduction {#intro}\n\nIn our [*Build a Model*](/start/models/) article, we learned how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/). In this article, we'll explore another tidymodels package, [recipes](https://recipes.tidymodels.org/), which is designed to help you preprocess your data *before* training your model. Recipes are built as a series of preprocessing steps, such as:\n\n- converting qualitative predictors to indicator variables (also known as dummy variables),\n\n- transforming data to be on a different scale (e.g., taking the logarithm of a variable),\n\n- transforming whole groups of predictors together,\n\n- extracting key features from raw variables (e.g., getting the day of the week out of a date variable),\n\nand so on. If you are familiar with R's formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling.\n\nTo use code in this article, you will need to install the following packages: nycflights13, skimr, and tidymodels.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_33da1dce98ae4aaf16583129513e7576'}\n\n```{.r .cell-code}\nlibrary(tidymodels) # for the recipes package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(nycflights13) # for flight data\nlibrary(skimr) # for variable summaries\n```\n:::\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n\n\n## The New York City flight data {#data}\n\n\n::: {.cell layout-align=\"center\" hash='cache/flight-start_7be6e44d7c7fe4440aa1ab409842361b'}\n\n:::\n\n\nLet's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-5_ba376fdd24bcdf21c5e9edd46ea08efc'}\n\n```{.r .cell-code}\nset.seed(123)\n\nflight_data <- \n flights %>% \n mutate(\n # Convert the arrival delay to a factor\n arr_delay = ifelse(arr_delay >= 30, \"late\", \"on_time\"),\n arr_delay = factor(arr_delay),\n # We will use the date (not date-time) in the recipe below\n date = lubridate::as_date(time_hour)\n ) %>% \n # Include the weather data\n inner_join(weather, by = c(\"origin\", \"time_hour\")) %>% \n # Only retain the specific columns we will use\n select(dep_time, flight, origin, dest, air_time, distance, \n carrier, date, arr_delay, time_hour) %>% \n # Exclude missing data\n na.omit() %>% \n # For creating models, it is better to have qualitative columns\n # encoded as factors (instead of character strings)\n mutate_if(is.character, as.factor)\n```\n:::\n\n\nWe can see that about 16% of the flights in this data set arrived more than 30 minutes late.\n\n\n::: {.cell layout-align=\"center\" hash='cache/count-delays_5849290b5c6a7df218e9cb949a0f0433'}\n\n```{.r .cell-code}\nflight_data %>% \n count(arr_delay) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> arr_delay n prop\n#> \n#> 1 late 52540 0.161\n#> 2 on_time 273279 0.839\n```\n:::\n\n\nBefore we start building up our recipe, let's take a quick look at a few specific variables that will be important for both preprocessing and modeling.\n\nFirst, notice that the variable we created called `arr_delay` is a factor variable; it is important that our outcome variable for training a logistic regression model is a factor.\n\n\n::: {.cell layout-align=\"center\" hash='cache/glimpse-flights_9e7969e05e325dd83978b5118de71ddb'}\n\n```{.r .cell-code}\nglimpse(flight_data)\n#> Rows: 325,819\n#> Columns: 10\n#> $ dep_time 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 558, …\n#> $ flight 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49, 71…\n#> $ origin EWR, LGA, JFK, JFK, LGA, EWR, EWR, LGA, JFK, LGA, JFK, JFK, …\n#> $ dest IAH, IAH, MIA, BQN, ATL, ORD, FLL, IAD, MCO, ORD, PBI, TPA, …\n#> $ air_time 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158, 3…\n#> $ distance 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 1028,…\n#> $ carrier UA, UA, AA, B6, DL, UA, B6, EV, B6, AA, B6, B6, UA, UA, AA, …\n#> $ date 2013-01-01, 2013-01-01, 2013-01-01, 2013-01-01, 2013-01-01,…\n#> $ arr_delay on_time, on_time, late, on_time, on_time, on_time, on_time, …\n#> $ time_hour 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:…\n```\n:::\n\n\nSecond, there are two variables that we don't want to use as predictors in our model, but that we would like to retain as identification variables that can be used to troubleshoot poorly predicted data points. These are `flight`, a numeric value, and `time_hour`, a date-time value.\n\nThird, there are 104 flight destinations contained in `dest` and 16 distinct `carrier`s.\n\n\n::: {.cell layout-align=\"center\" hash='cache/skim-flights_3663c5c037362c6e9287904011f4027d'}\n\n```{.r .cell-code}\nflight_data %>% \n skimr::skim(dest, carrier) \n```\n\n::: {.cell-output-display}\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
    Data summary
    Name Piped data
    Number of rows 325819
    Number of columns 10
    _______________________
    Column type frequency:
    factor 2
    ________________________
    Group variables None
    \n\n\n**Variable type: factor**\n\n\n \n \n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
    skim_variable n_missing complete_rate ordered n_unique top_counts
    dest 0 1 FALSE 104 ATL: 16771, ORD: 16507, LAX: 15942, BOS: 14948
    carrier 0 1 FALSE 16 UA: 57489, B6: 53715, EV: 50868, DL: 47465
    \n:::\n:::\n\n\nBecause we'll be using a simple logistic regression model, the variables `dest` and `carrier` will be converted to [dummy variables](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html). However, some of these values do not occur very frequently and this could complicate our analysis. We'll discuss specific steps later in this article that we can add to our recipe to address this issue before modeling.\n\n## Data splitting {#data-split}\n\nTo get started, let's split this single dataset into two: a *training* set and a *testing* set. We'll keep most of the rows in the original dataset (subset chosen randomly) in the *training* set. The training data will be used to *fit* the model, and the *testing* set will be used to measure model performance.\n\nTo do this, we can use the [rsample](https://rsample.tidymodels.org/) package to create an object that contains the information on *how* to split the data, and then two more rsample functions to create data frames for the training and testing sets:\n\n\n::: {.cell layout-align=\"center\" hash='cache/split_63f6881dbae8f8d1bcbe87ef8e48ecfc'}\n\n```{.r .cell-code}\n# Fix the random numbers by setting the seed \n# This enables the analysis to be reproducible when random numbers are used \nset.seed(222)\n# Put 3/4 of the data into the training set \ndata_split <- initial_split(flight_data, prop = 3/4)\n\n# Create data frames for the two sets:\ntrain_data <- training(data_split)\ntest_data <- testing(data_split)\n```\n:::\n\n\n## Create recipe and roles {#recipe}\n\nTo get started, let's create a recipe for a simple logistic regression model. Before training the model, we can use a recipe to create a few new predictors and conduct some preprocessing required by the model.\n\nLet's initiate a new recipe:\n\n\n::: {.cell layout-align=\"center\" hash='cache/initial-recipe_7233f1669fd288b82db7342770322065'}\n\n```{.r .cell-code}\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) \n```\n:::\n\n\nThe [`recipe()` function](https://recipes.tidymodels.org/reference/recipe.html) as we used it here has two arguments:\n\n- A **formula**. Any variable on the left-hand side of the tilde (`~`) is considered the model outcome (here, `arr_delay`). On the right-hand side of the tilde are the predictors. Variables may be listed by name, or you can use the dot (`.`) to indicate all other variables as predictors.\n\n- The **data**. A recipe is associated with the data set used to create the model. This will typically be the *training* set, so `data = train_data` here. Naming a data set doesn't actually change the data itself; it is only used to catalog the names of the variables and their types, like factors, integers, dates, etc.\n\nNow we can add [roles](https://recipes.tidymodels.org/reference/roles.html) to this recipe. We can use the [`update_role()` function](https://recipes.tidymodels.org/reference/roles.html) to let recipes know that `flight` and `time_hour` are variables with a custom role that we called `\"ID\"` (a role can have any character value). Whereas our formula included all variables in the training set other than `arr_delay` as predictors, this tells the recipe to keep these two variables but not use them as either outcomes or predictors.\n\n\n::: {.cell layout-align=\"center\" hash='cache/recipe-roles_f2c8735ea1622ce477471ac7a4c9da15'}\n\n```{.r .cell-code}\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") \n```\n:::\n\n\nThis step of adding roles to a recipe is optional; the purpose of using it here is that those two variables can be retained in the data but not included in the model. This can be convenient when, after the model is fit, we want to investigate some poorly predicted value. These ID columns will be available and can be used to try to understand what went wrong.\n\nTo get the current set of variables and roles, use the `summary()` function:\n\n\n::: {.cell layout-align=\"center\" hash='cache/summary_c2d837bd91379fb42c1dd50b549de4df'}\n\n```{.r .cell-code}\nsummary(flights_rec)\n#> # A tibble: 10 × 4\n#> variable type role source \n#> \n#> 1 dep_time predictor original\n#> 2 flight ID original\n#> 3 origin predictor original\n#> 4 dest predictor original\n#> 5 air_time predictor original\n#> 6 distance predictor original\n#> 7 carrier predictor original\n#> 8 date predictor original\n#> 9 time_hour ID original\n#> 10 arr_delay outcome original\n```\n:::\n\n\n## Create features {#features}\n\nNow we can start adding steps onto our recipe using the pipe operator. Perhaps it is reasonable for the date of the flight to have an effect on the likelihood of a late arrival. A little bit of **feature engineering** might go a long way to improving our model. How should the date be encoded into the model? The `date` column has an R `date` object so including that column \"as is\" will mean that the model will convert it to a numeric format equal to the number of days after a reference date:\n\n\n::: {.cell layout-align=\"center\" hash='cache/dates_60ff94620824ef3668623d9704514378'}\n\n```{.r .cell-code}\nflight_data %>% \n distinct(date) %>% \n mutate(numeric_date = as.numeric(date)) \n#> # A tibble: 364 × 2\n#> date numeric_date\n#> \n#> 1 2013-01-01 15706\n#> 2 2013-01-02 15707\n#> 3 2013-01-03 15708\n#> 4 2013-01-04 15709\n#> 5 2013-01-05 15710\n#> 6 2013-01-06 15711\n#> 7 2013-01-07 15712\n#> 8 2013-01-08 15713\n#> 9 2013-01-09 15714\n#> 10 2013-01-10 15715\n#> # ℹ 354 more rows\n```\n:::\n\n\nIt's possible that the numeric date variable is a good option for modeling; perhaps the model would benefit from a linear trend between the log-odds of a late arrival and the numeric date variable. However, it might be better to add model terms *derived* from the date that have a better potential to be important to the model. For example, we could derive the following meaningful features from the single `date` variable:\n\n- the day of the week,\n\n- the month, and\n\n- whether or not the date corresponds to a holiday.\n\nLet's do all three of these by adding steps to our recipe:\n\n\n::: {.cell layout-align=\"center\" hash='cache/date-recipe_d2ec85245dd4ac821e783a3b81be807f'}\n\n```{.r .cell-code}\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE)\n```\n:::\n\n\nWhat do each of these steps do?\n\n- With [`step_date()`](https://recipes.tidymodels.org/reference/step_date.html), we created two new factor columns with the appropriate day of the week and the month.\n\n- With [`step_holiday()`](https://recipes.tidymodels.org/reference/step_holiday.html), we created a binary variable indicating whether the current date is a holiday or not. The argument value of `timeDate::listHolidays(\"US\")` uses the [timeDate package](https://cran.r-project.org/web/packages/timeDate/index.html) to list the 18 standard US holidays.\n\n- With `keep_original_cols = FALSE`, we remove the original `date` variable since we no longer want it in the model. Many recipe steps that create new variables have this argument.\n\nNext, we'll turn our attention to the variable types of our predictors. Because we plan to train a logistic regression model, we know that predictors will ultimately need to be numeric, as opposed to nominal data like strings and factor variables. In other words, there may be a difference in how we store our data (in factors inside a data frame), and how the underlying equations require them (a purely numeric matrix).\n\nFor factors like `dest` and `origin`, [standard practice](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html) is to convert them into *dummy* or *indicator* variables to make them numeric. These are binary values for each level of the factor. For example, our `origin` variable has values of `\"EWR\"`, `\"JFK\"`, and `\"LGA\"`. The standard dummy variable encoding, shown below, will create *two* numeric columns of the data that are 1 when the originating airport is `\"JFK\"` or `\"LGA\"` and zero otherwise, respectively.\n\n\n\n\n::: {.cell layout-align=\"center\" hash='cache/dummy-table_6cd9a745b2ec8a08a34aadd0b204464f'}\n::: {.cell-output-display}\n`````{=html}\n\n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
    origin origin_JFK origin_LGA
    JFK 1 0
    EWR 0 0
    LGA 0 1
    \n\n`````\n:::\n:::\n\n\nBut, unlike the standard model formula methods in R, a recipe **does not** automatically create these dummy variables for you; you'll need to tell your recipe to add this step. This is for two reasons. First, many models do not require [numeric predictors](https://bookdown.org/max/FES/categorical-trees.html), so dummy variables may not always be preferred. Second, recipes can also be used for purposes outside of modeling, where non-dummy versions of the variables may work better. For example, you may want to make a table or a plot with a variable as a single factor. For those reasons, you need to explicitly tell recipes to create dummy variables using `step_dummy()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/dummy_49f08ce7a83b6de72ec52a1b272e62c4'}\n\n```{.r .cell-code}\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE) %>% \n step_dummy(all_nominal_predictors())\n```\n:::\n\n\nHere, we did something different than before: instead of applying a step to an individual variable, we used [selectors](https://recipes.tidymodels.org/reference/selections.html) to apply this recipe step to several variables at once, `all_nominal_predictors()`. The [selector functions](https://recipes.tidymodels.org/reference/selections.html) can be combined to select intersections of variables.\n\nAt this stage in the recipe, this step selects the `origin`, `dest`, and `carrier` variables. It also includes two new variables, `date_dow` and `date_month`, that were created by the earlier `step_date()`.\n\nMore generally, the recipe selectors mean that you don't always have to apply steps to individual variables one at a time. Since a recipe knows the *variable type* and *role* of each column, they can also be selected (or dropped) using this information.\n\nWe need one final step to add to our recipe. Since `carrier` and `dest` have some infrequently occurring factor values, it is possible that dummy variables might be created for values that don't exist in the training set. For example, there is one destination that is only in the test set:\n\n\n::: {.cell layout-align=\"center\" hash='cache/zv-cols_a8e419ee339885441896c728f0a8eb2a'}\n\n```{.r .cell-code}\ntest_data %>% \n distinct(dest) %>% \n anti_join(train_data)\n#> Joining with `by = join_by(dest)`\n#> # A tibble: 1 × 1\n#> dest \n#> \n#> 1 LEX\n```\n:::\n\n\nWhen the recipe is applied to the training set, a column is made for LEX because the factor levels come from `flight_data` (not the training set), but this column will contain all zeros. This is a \"zero-variance predictor\" that has no information within the column. While some R functions will not produce an error for such predictors, it usually causes warnings and other issues. `step_zv()` will remove columns from the data when the training set data have a single value, so it is added to the recipe *after* `step_dummy()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/zv_6afa0de7c3f6740f875dcf509adc5d77'}\n\n```{.r .cell-code}\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE) %>% \n step_dummy(all_nominal_predictors()) %>% \n step_zv(all_predictors())\n```\n:::\n\n\nNow we've created a *specification* of what should be done with the data. How do we use the recipe we made?\n\n## Fit a model with a recipe {#fit-workflow}\n\nLet's use logistic regression to model the flight data. As we saw in [*Build a Model*](/start/models/), we start by [building a model specification](/start/models/#build-model) using the parsnip package:\n\n\n::: {.cell layout-align=\"center\" hash='cache/model_cf6cc6dc3690e0fea4d0c57d1d045e16'}\n\n```{.r .cell-code}\nlr_mod <- \n logistic_reg() %>% \n set_engine(\"glm\")\n```\n:::\n\n\nWe will want to use our recipe across several steps as we train and test our model. We will:\n\n1. **Process the recipe using the training set**: This involves any estimation or calculations based on the training set. For our recipe, the training set will be used to determine which predictors should be converted to dummy variables and which predictors will have zero-variance in the training set, and should be slated for removal.\n\n2. **Apply the recipe to the training set**: We create the final predictor set on the training set.\n\n3. **Apply the recipe to the test set**: We create the final predictor set on the test set. Nothing is recomputed and no information from the test set is used here; the dummy variable and zero-variance results from the training set are applied to the test set.\n\nTo simplify this process, we can use a *model workflow*, which pairs a model and recipe together. This is a straightforward approach because different recipes are often needed for different models, so when a model and recipe are bundled, it becomes easier to train and test *workflows*. We'll use the [workflows package](https://workflows.tidymodels.org/) from tidymodels to bundle our parsnip model (`lr_mod`) with our recipe (`flights_rec`).\n\n\n::: {.cell layout-align=\"center\" hash='cache/workflow_b76ceff88efdf6911b44754fe85d2dee'}\n\n```{.r .cell-code}\nflights_wflow <- \n workflow() %>% \n add_model(lr_mod) %>% \n add_recipe(flights_rec)\n\nflights_wflow\n#> ══ Workflow ══════════════════════════════════════════════════════════\n#> Preprocessor: Recipe\n#> Model: logistic_reg()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> 4 Recipe Steps\n#> \n#> • step_date()\n#> • step_holiday()\n#> • step_dummy()\n#> • step_zv()\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> Logistic Regression Model Specification (classification)\n#> \n#> Computational engine: glm\n```\n:::\n\n\nNow, there is a single function that can be used to prepare the recipe and train the model from the resulting predictors:\n\n\n::: {.cell layout-align=\"center\" hash='cache/fit_c048a5aa49766b9ee2e4a4dc03222b11'}\n\n```{.r .cell-code}\nflights_fit <- \n flights_wflow %>% \n fit(data = train_data)\n```\n:::\n\n\nThis object has the finalized recipe and fitted model objects inside. You may want to extract the model or recipe objects from the workflow. To do this, you can use the helper functions `extract_fit_parsnip()` and `extract_recipe()`. For example, here we pull the fitted model object then use the `broom::tidy()` function to get a tidy tibble of model coefficients:\n\n\n::: {.cell layout-align=\"center\" hash='cache/fit-glance_89457f1224a78f974440d42b4d4b450e'}\n\n```{.r .cell-code}\nflights_fit %>% \n extract_fit_parsnip() %>% \n tidy()\n#> # A tibble: 158 × 5\n#> term estimate std.error statistic p.value\n#> \n#> 1 (Intercept) 7.26 2.73 2.66 7.75e- 3\n#> 2 dep_time -0.00166 0.0000141 -118. 0 \n#> 3 air_time -0.0440 0.000563 -78.2 0 \n#> 4 distance 0.00508 0.00150 3.38 7.13e- 4\n#> 5 date_USChristmasDay 1.35 0.178 7.59 3.32e-14\n#> 6 date_USColumbusDay 0.721 0.170 4.23 2.33e- 5\n#> 7 date_USCPulaskisBirthday 0.804 0.139 5.78 7.38e- 9\n#> 8 date_USDecorationMemorialDay 0.582 0.117 4.96 7.22e- 7\n#> 9 date_USElectionDay 0.945 0.190 4.97 6.73e- 7\n#> 10 date_USGoodFriday 1.24 0.167 7.44 1.04e-13\n#> # ℹ 148 more rows\n```\n:::\n\n\n## Use a trained workflow to predict {#predict-workflow}\n\nOur goal was to predict whether a plane arrives more than 30 minutes late. We have just:\n\n1. Built the model (`lr_mod`),\n\n2. Created a preprocessing recipe (`flights_rec`),\n\n3. Bundled the model and recipe (`flights_wflow`), and\n\n4. Trained our workflow using a single call to `fit()`.\n\nThe next step is to use the trained workflow (`flights_fit`) to predict with the unseen test data, which we will do with a single call to `predict()`. The `predict()` method applies the recipe to the new data, then passes them to the fitted model.\n\n\n::: {.cell layout-align=\"center\" hash='cache/pred-class_6f0bcf4a99b4993171e481adf569d456'}\n\n```{.r .cell-code}\npredict(flights_fit, test_data)\n#> # A tibble: 81,455 × 1\n#> .pred_class\n#> \n#> 1 on_time \n#> 2 on_time \n#> 3 on_time \n#> 4 on_time \n#> 5 on_time \n#> 6 on_time \n#> 7 on_time \n#> 8 on_time \n#> 9 on_time \n#> 10 on_time \n#> # ℹ 81,445 more rows\n```\n:::\n\n\nBecause our outcome variable here is a factor, the output from `predict()` returns the predicted class: `late` versus `on_time`. But, let's say we want the predicted class probabilities for each flight instead. To return those, we can specify `type = \"prob\"` when we use `predict()` or use `augment()` with the model plus test data to save them together:\n\n\n::: {.cell layout-align=\"center\" hash='cache/test-pred_6df86f199df4958b9c2a47b74bfd8a3a'}\n\n```{.r .cell-code}\nflights_aug <- \n augment(flights_fit, test_data)\n\n# The data look like: \nflights_aug %>%\n select(arr_delay, time_hour, flight, .pred_class, .pred_on_time)\n#> # A tibble: 81,455 × 5\n#> arr_delay time_hour flight .pred_class .pred_on_time\n#> \n#> 1 on_time 2013-01-01 05:00:00 1545 on_time 0.945\n#> 2 on_time 2013-01-01 05:00:00 1714 on_time 0.949\n#> 3 on_time 2013-01-01 06:00:00 507 on_time 0.964\n#> 4 on_time 2013-01-01 06:00:00 5708 on_time 0.961\n#> 5 on_time 2013-01-01 06:00:00 71 on_time 0.962\n#> 6 on_time 2013-01-01 06:00:00 194 on_time 0.975\n#> 7 on_time 2013-01-01 06:00:00 1124 on_time 0.963\n#> 8 on_time 2013-01-01 05:00:00 1806 on_time 0.981\n#> 9 on_time 2013-01-01 06:00:00 1187 on_time 0.935\n#> 10 on_time 2013-01-01 06:00:00 4650 on_time 0.931\n#> # ℹ 81,445 more rows\n```\n:::\n\n\nNow that we have a tibble with our predicted class probabilities, how will we evaluate the performance of our workflow? We can see from these first few rows that our model predicted these 5 on time flights correctly because the values of `.pred_on_time` are *p* \\> .50. But we also know that we have 81,455 rows total to predict. We would like to calculate a metric that tells how well our model predicted late arrivals, compared to the true status of our outcome variable, `arr_delay`.\n\nLet's use the area under the [ROC curve](https://bookdown.org/max/FES/measuring-performance.html#class-metrics) as our metric, computed using `roc_curve()` and `roc_auc()` from the [yardstick package](https://yardstick.tidymodels.org/).\n\nTo generate a ROC curve, we need the predicted class probabilities for `late` and `on_time`, which we just calculated in the code chunk above. We can create the ROC curve with these values, using `roc_curve()` and then piping to the `autoplot()` method:\n\n\n::: {.cell layout-align=\"center\" hash='cache/roc-plot_2b4379a2561a283475b502d9d3be939e'}\n\n```{.r .cell-code}\nflights_aug %>% \n roc_curve(truth = arr_delay, .pred_late) %>% \n autoplot()\n```\n\n::: {.cell-output-display}\n![](figs/roc-plot-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nSimilarly, `roc_auc()` estimates the area under the curve:\n\n\n::: {.cell layout-align=\"center\" hash='cache/roc-auc_d976c75aa17ddfd1a4b37d632c808be9'}\n\n```{.r .cell-code}\nflights_aug %>% \n roc_auc(truth = arr_delay, .pred_late)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 roc_auc binary 0.764\n```\n:::\n\n\nNot too bad! We leave it to the reader to test out this workflow [*without*](https://workflows.tidymodels.org/reference/add_formula.html) this recipe. You can use `workflows::add_formula(arr_delay ~ .)` instead of `add_recipe()` (remember to remove the identification variables first!), and see whether our recipe improved our model's ability to predict late arrivals.\n\n\n\n\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> nycflights13 * 1.0.2 2021-04-12 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> skimr * 2.1.5 2022-12-23 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": { + "include-in-header": [ + "\n\n" + ] + }, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/start/resampling/index/execute-results/html.json b/_freeze/start/resampling/index/execute-results/html.json new file mode 100644 index 00000000..d0d37098 --- /dev/null +++ b/_freeze/start/resampling/index/execute-results/html.json @@ -0,0 +1,18 @@ +{ + "hash": "abe9322acca96ed6e985133a51fffc67", + "result": { + "markdown": "---\ntitle: \"Evaluate your model with resampling\"\nweight: 3\ncategories:\n - resampling\n - rsample\n - parsnip\n - tune\n - workflows\n - yardstick\ndescription: | \n Measure model performance by generating different versions of the training data through resampling.\ntoc-location: body\ntoc-depth: 2\ntoc-title: \"\"\ncss: ../styles.css\ninclude-after-body: ../repo-actions-delete.html\n---\n\n\n\n\n\n\n## Introduction {#intro}\n\nSo far, we have [built a model](/start/models/) and [preprocessed data with a recipe](/start/recipes/). We also introduced [workflows](/start/recipes/#fit-workflow) as a way to bundle a [parsnip model](https://parsnip.tidymodels.org/) and [recipe](https://recipes.tidymodels.org/) together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on **resampling** statistics.\n\nTo use code in this article, you will need to install the following packages: modeldata, ranger, and tidymodels.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_0ce833621e5b0fc33fbb8d3346b34e5b'}\n\n```{.r .cell-code}\nlibrary(tidymodels) # for the rsample package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(modeldata) # for the cells data\n```\n:::\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n\n\n## The cell image data {#data}\n\nLet's use data from [Hill, LaPan, Li, and Haney (2007)](http://www.biomedcentral.com/1471-2105/8/340), available in the [modeldata package](https://cran.r-project.org/web/packages/modeldata/index.html), to predict cell image segmentation quality with resampling. To start, we load this data into R:\n\n\n::: {.cell layout-align=\"center\" hash='cache/cell-import_620d009694ed515939c5ab84c9919570'}\n\n```{.r .cell-code}\ndata(cells, package = \"modeldata\")\ncells\n#> # A tibble: 2,019 × 58\n#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3\n#> \n#> 1 Test PS 143. 185 15.7 4.95 9.55\n#> 2 Train PS 134. 819 31.9 207. 69.9 \n#> 3 Train WS 107. 431 28.0 116. 63.9 \n#> 4 Train PS 69.2 298 19.5 102. 28.2 \n#> 5 Test PS 2.89 285 24.3 112. 20.5 \n#> 6 Test WS 40.7 172 326. 654. 129. \n#> 7 Test WS 174. 177 260. 596. 124. \n#> 8 Test PS 180. 251 18.3 5.73 17.2 \n#> 9 Test WS 18.9 495 16.1 89.5 13.7 \n#> 10 Test WS 153. 384 17.7 89.9 20.4 \n#> # ℹ 2,009 more rows\n#> # ℹ 51 more variables: avg_inten_ch_4 , convex_hull_area_ratio_ch_1 ,\n#> # convex_hull_perim_ratio_ch_1 , diff_inten_density_ch_1 ,\n#> # diff_inten_density_ch_3 , diff_inten_density_ch_4 ,\n#> # entropy_inten_ch_1 , entropy_inten_ch_3 ,\n#> # entropy_inten_ch_4 , eq_circ_diam_ch_1 ,\n#> # eq_ellipse_lwr_ch_1 , eq_ellipse_oblate_vol_ch_1 , …\n```\n:::\n\n\nWe have data for 2019 cells, with 58 variables. The main outcome variable of interest for us here is called `class`, which you can see is a factor. But before we jump into predicting the `class` variable, we need to understand it better. Below is a brief primer on cell image segmentation.\n\n### Predicting image segmentation quality\n\nSome biologists conduct experiments on cells. In drug discovery, a particular type of cell can be treated with either a drug or control and then observed to see what the effect is (if any). A common approach for this kind of measurement is cell imaging. Different parts of the cells can be colored so that the locations of a cell can be determined.\n\nFor example, in top panel of this image of five cells, the green color is meant to define the boundary of the cell (coloring something called the cytoskeleton) while the blue color defines the nucleus of the cell.\n\n\n::: {.cell layout-align=\"center\" hash='cache/cell-fig_a9f7aa84806e5959719d9ae20bedc78c'}\n::: {.cell-output-display}\n![](img/cells.png){fig-align='center' width=70%}\n:::\n:::\n\n\nUsing these colors, the cells in an image can be *segmented* so that we know which pixels belong to which cell. If this is done well, the cell can be measured in different ways that are important to the biology. Sometimes the shape of the cell matters and different mathematical tools are used to summarize characteristics like the size or \"oblongness\" of the cell.\n\nThe bottom panel shows some segmentation results. Cells 1 and 5 are fairly well segmented. However, cells 2 to 4 are bunched up together because the segmentation was not very good. The consequence of bad segmentation is data contamination; when the biologist analyzes the shape or size of these cells, the data are inaccurate and could lead to the wrong conclusion.\n\nA cell-based experiment might involve millions of cells so it is unfeasible to visually assess them all. Instead, a subsample can be created and these cells can be manually labeled by experts as either poorly segmented (`PS`) or well-segmented (`WS`). If we can predict these labels accurately, the larger data set can be improved by filtering out the cells most likely to be poorly segmented.\n\n### Back to the cells data\n\nThe `cells` data has `class` labels for 2019 cells --- each cell is labeled as either poorly segmented (`PS`) or well-segmented (`WS`). Each also has a total of 56 predictors based on automated image analysis measurements. For example, `avg_inten_ch_1` is the mean intensity of the data contained in the nucleus, `area_ch_1` is the total size of the cell, and so on (some predictors are fairly arcane in nature).\n\n\n::: {.cell layout-align=\"center\" hash='cache/cells-show_9e1acf9a69d4a881d71cf50e07e75aaa'}\n\n```{.r .cell-code}\ncells\n#> # A tibble: 2,019 × 58\n#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3\n#> \n#> 1 Test PS 143. 185 15.7 4.95 9.55\n#> 2 Train PS 134. 819 31.9 207. 69.9 \n#> 3 Train WS 107. 431 28.0 116. 63.9 \n#> 4 Train PS 69.2 298 19.5 102. 28.2 \n#> 5 Test PS 2.89 285 24.3 112. 20.5 \n#> 6 Test WS 40.7 172 326. 654. 129. \n#> 7 Test WS 174. 177 260. 596. 124. \n#> 8 Test PS 180. 251 18.3 5.73 17.2 \n#> 9 Test WS 18.9 495 16.1 89.5 13.7 \n#> 10 Test WS 153. 384 17.7 89.9 20.4 \n#> # ℹ 2,009 more rows\n#> # ℹ 51 more variables: avg_inten_ch_4 , convex_hull_area_ratio_ch_1 ,\n#> # convex_hull_perim_ratio_ch_1 , diff_inten_density_ch_1 ,\n#> # diff_inten_density_ch_3 , diff_inten_density_ch_4 ,\n#> # entropy_inten_ch_1 , entropy_inten_ch_3 ,\n#> # entropy_inten_ch_4 , eq_circ_diam_ch_1 ,\n#> # eq_ellipse_lwr_ch_1 , eq_ellipse_oblate_vol_ch_1 , …\n```\n:::\n\n\nThe rates of the classes are somewhat imbalanced; there are more poorly segmented cells than well-segmented cells:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rates_6444e66c05a37946e8bbdd8e6033a975'}\n\n```{.r .cell-code}\ncells %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> class n prop\n#> \n#> 1 PS 1300 0.644\n#> 2 WS 719 0.356\n```\n:::\n\n\n## Data splitting {#data-split}\n\nIn our previous [*Preprocess your data with recipes*](/start/recipes/#data-split) article, we started by splitting our data. It is common when beginning a modeling project to [separate the data set](https://bookdown.org/max/FES/data-splitting.html) into two partitions:\n\n- The *training set* is used to estimate parameters, compare models and feature engineering techniques, tune models, etc.\n\n- The *test set* is held in reserve until the end of the project, at which point there should only be one or two models under serious consideration. It is used as an unbiased source for measuring final model performance.\n\nThere are different ways to create these partitions of the data. The most common approach is to use a random sample. Suppose that one quarter of the data were reserved for the test set. Random sampling would randomly select 25% for the test set and use the remainder for the training set. We can use the [rsample](https://rsample.tidymodels.org/) package for this purpose.\n\nSince random sampling uses random numbers, it is important to set the random number seed. This ensures that the random numbers can be reproduced at a later time (if needed).\n\nThe function `rsample::initial_split()` takes the original data and saves the information on how to make the partitions. In the original analysis, the authors made their own training/test set and that information is contained in the column `case`. To demonstrate how to make a split, we'll remove this column before we make our own split:\n\n\n::: {.cell layout-align=\"center\" hash='cache/cell-split_ee190c5f50b7def1307d564ecbbfea38'}\n\n```{.r .cell-code}\nset.seed(123)\ncell_split <- initial_split(cells %>% select(-case), \n strata = class)\n```\n:::\n\n\nHere we used the [`strata` argument](https://rsample.tidymodels.org/reference/initial_split.html), which conducts a stratified split. This ensures that, despite the imbalance we noticed in our `class` variable, our training and test data sets will keep roughly the same proportions of poorly and well-segmented cells as in the original data. After the `initial_split`, the `training()` and `testing()` functions return the actual data sets.\n\n\n::: {.cell layout-align=\"center\" hash='cache/cell-train-test_2199ba8e44f190d3fb282ecac72bd112'}\n\n```{.r .cell-code}\ncell_train <- training(cell_split)\ncell_test <- testing(cell_split)\n\nnrow(cell_train)\n#> [1] 1514\nnrow(cell_train)/nrow(cells)\n#> [1] 0.7498762\n\n# training set proportions by class\ncell_train %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> class n prop\n#> \n#> 1 PS 975 0.644\n#> 2 WS 539 0.356\n\n# test set proportions by class\ncell_test %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> class n prop\n#> \n#> 1 PS 325 0.644\n#> 2 WS 180 0.356\n```\n:::\n\n\nThe majority of the modeling work is then conducted on the training set data.\n\n## Modeling\n\n[Random forest models](https://en.wikipedia.org/wiki/Random_forest) are [ensembles](https://en.wikipedia.org/wiki/Ensemble_learning) of [decision trees](https://en.wikipedia.org/wiki/Decision_tree). A large number of decision tree models are created for the ensemble based on slightly different versions of the training set. When creating the individual decision trees, the fitting process encourages them to be as diverse as possible. The collection of trees are combined into the random forest model and, when a new sample is predicted, the votes from each tree are used to calculate the final predicted value for the new sample. For categorical outcome variables like `class` in our `cells` data example, the majority vote across all the trees in the random forest determines the predicted class for the new sample.\n\nOne of the benefits of a random forest model is that it is very low maintenance; it requires very little preprocessing of the data and the default parameters tend to give reasonable results. For that reason, we won't create a recipe for the `cells` data.\n\nAt the same time, the number of trees in the ensemble should be large (in the thousands) and this makes the model moderately expensive to compute.\n\nTo fit a random forest model on the training set, let's use the [parsnip](https://parsnip.tidymodels.org/) package with the [ranger](https://cran.r-project.org/package=ranger) engine. We first define the model that we want to create:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-def_d511a40526dda7ce4a5018970f3b10a7'}\n\n```{.r .cell-code}\nrf_mod <- \n rand_forest(trees = 1000) %>% \n set_engine(\"ranger\") %>% \n set_mode(\"classification\")\n```\n:::\n\n\nStarting with this parsnip model object, the `fit()` function can be used with a model formula. Since random forest models use random numbers, we again set the seed prior to computing:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-fit_fa56896bd1bf05f5ad08855bf1a84030'}\n\n```{.r .cell-code}\nset.seed(234)\nrf_fit <- \n rf_mod %>% \n fit(class ~ ., data = cell_train)\nrf_fit\n#> parsnip model object\n#> \n#> Ranger result\n#> \n#> Call:\n#> ranger::ranger(x = maybe_data_frame(x), y = y, num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE) \n#> \n#> Type: Probability estimation \n#> Number of trees: 1000 \n#> Sample size: 1514 \n#> Number of independent variables: 56 \n#> Mtry: 7 \n#> Target node size: 10 \n#> Variable importance mode: none \n#> Splitrule: gini \n#> OOB prediction error (Brier s.): 0.1189338\n```\n:::\n\n\nThis new `rf_fit` object is our fitted model, trained on our training data set.\n\n## Estimating performance {#performance}\n\nDuring a modeling project, we might create a variety of different models. To choose between them, we need to consider how well these models do, as measured by some performance statistics. In our example in this article, some options we could use are:\n\n- the area under the Receiver Operating Characteristic (ROC) curve, and\n\n- overall classification accuracy.\n\nThe ROC curve uses the class probability estimates to give us a sense of performance across the entire set of potential probability cutoffs. Overall accuracy uses the hard class predictions to measure performance. The hard class predictions tell us whether our model predicted `PS` or `WS` for each cell. But, behind those predictions, the model is actually estimating a probability. A simple 50% probability cutoff is used to categorize a cell as poorly segmented.\n\nThe [yardstick package](https://yardstick.tidymodels.org/) has functions for computing both of these measures called `roc_auc()` and `accuracy()`.\n\nAt first glance, it might seem like a good idea to use the training set data to compute these statistics. (This is actually a very bad idea.) Let's see what happens if we try this. To evaluate performance based on the training set, we call the `predict()` method to get both types of predictions (i.e. probabilities and hard class predictions).\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-train-pred_c7a327376a9ce7f31b1f691abbc21119'}\n\n```{.r .cell-code}\nrf_training_pred <- \n predict(rf_fit, cell_train) %>% \n bind_cols(predict(rf_fit, cell_train, type = \"prob\")) %>% \n # Add the true outcome data back in\n bind_cols(cell_train %>% \n select(class))\n```\n:::\n\n\nUsing the yardstick functions, this model has spectacular results, so spectacular that you might be starting to get suspicious:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-train-perf_151ecd729bde2a913feb0ff1caa1980d'}\n\n```{.r .cell-code}\nrf_training_pred %>% # training set predictions\n roc_auc(truth = class, .pred_PS)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 roc_auc binary 1.00\nrf_training_pred %>% # training set predictions\n accuracy(truth = class, .pred_class)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 accuracy binary 0.991\n```\n:::\n\n\nNow that we have this model with exceptional performance, we proceed to the test set. Unfortunately, we discover that, although our results aren't bad, they are certainly worse than what we initially thought based on predicting the training set:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rf-test_81af7572faea897acb5c503792e2a939'}\n\n```{.r .cell-code}\nrf_testing_pred <- \n predict(rf_fit, cell_test) %>% \n bind_cols(predict(rf_fit, cell_test, type = \"prob\")) %>% \n bind_cols(cell_test %>% select(class))\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='cache/rf-test-perf_2f092152e191eea26ef2df29532042f8'}\n\n```{.r .cell-code}\nrf_testing_pred %>% # test set predictions\n roc_auc(truth = class, .pred_PS)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 roc_auc binary 0.891\nrf_testing_pred %>% # test set predictions\n accuracy(truth = class, .pred_class)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 accuracy binary 0.816\n```\n:::\n\n\n### What happened here?\n\nThere are several reasons why training set statistics like the ones shown in this section can be unrealistically optimistic:\n\n- Models like random forests, neural networks, and other black-box methods can essentially memorize the training set. Re-predicting that same set should always result in nearly perfect results.\n\n- The training set does not have the capacity to be a good arbiter of performance. It is not an independent piece of information; predicting the training set can only reflect what the model already knows.\n\nTo understand that second point better, think about an analogy from teaching. Suppose you give a class a test, then give them the answers, then provide the same test. The student scores on the *second* test do not accurately reflect what they know about the subject; these scores would probably be higher than their results on the first test.\n\n## Resampling to the rescue {#resampling}\n\nResampling methods, such as cross-validation and the bootstrap, are empirical simulation systems. They create a series of data sets similar to the training/testing split discussed previously; a subset of the data are used for creating the model and a different subset is used to measure performance. Resampling is always used with the *training set*. This schematic from [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/resampling.html) illustrates data usage for resampling methods:\n\n\n::: {.cell layout-align=\"center\" hash='cache/resampling-fig_96516c0c742a824aaef9eba8b5781946'}\n::: {.cell-output-display}\n![](img/resampling.svg){fig-align='center' width=85%}\n:::\n:::\n\n\nIn the first level of this diagram, you see what happens when you use `rsample::initial_split()`, which splits the original data into training and test sets. Then, the training set is chosen for resampling, and the test set is held out.\n\nLet's use 10-fold cross-validation (CV) in this example. This method randomly allocates the 1514 cells in the training set to 10 groups of roughly equal size, called \"folds\". For the first iteration of resampling, the first fold of about 151 cells are held out for the purpose of measuring performance. This is similar to a test set but, to avoid confusion, we call these data the *assessment set* in the tidymodels framework.\n\nThe other 90% of the data (about 1362 cells) are used to fit the model. Again, this sounds similar to a training set, so in tidymodels we call this data the *analysis set*. This model, trained on the analysis set, is applied to the assessment set to generate predictions, and performance statistics are computed based on those predictions.\n\nIn this example, 10-fold CV moves iteratively through the folds and leaves a different 10% out each time for model assessment. At the end of this process, there are 10 sets of performance statistics that were created on 10 data sets that were not used in the modeling process. For the cell example, this means 10 accuracies and 10 areas under the ROC curve. While 10 models were created, these are not used further; we do not keep the models themselves trained on these folds because their only purpose is calculating performance metrics.\n\n\n\n\n\nThe final resampling estimates for the model are the **averages** of the performance statistics replicates. For example, suppose for our data the results were:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rs-table_3db1a319eedfb38180c7a3475d9a573e'}\n::: {.cell-output-display}\n`````{=html}\n\n \n \n \n \n \n \n \n \n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n
    resample accuracy roc_auc assessment size
    Fold01 0.8289474 0.8937128 152
    Fold02 0.7697368 0.8768989 152
    Fold03 0.8552632 0.9017666 152
    Fold04 0.8552632 0.8928076 152
    Fold05 0.7947020 0.8816342 151
    Fold06 0.8476821 0.9244306 151
    Fold07 0.8145695 0.8960339 151
    Fold08 0.8543046 0.9267677 151
    Fold09 0.8543046 0.9231392 151
    Fold10 0.8476821 0.9266917 151
    \n\n`````\n:::\n:::\n\n\nFrom these resampling statistics, the final estimate of performance for this random forest model would be 0.904 for the area under the ROC curve and 0.832 for accuracy.\n\nThese resampling statistics are an effective method for measuring model performance *without* predicting the training set directly as a whole.\n\n## Fit a model with resampling {#fit-resamples}\n\nTo generate these results, the first step is to create a resampling object using rsample. There are [several resampling methods](https://rsample.tidymodels.org/reference/index.html#section-resampling-methods) implemented in rsample; cross-validation folds can be created using `vfold_cv()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/folds_61f1f654b3ca89e4ddb3d02c2c008ec1'}\n\n```{.r .cell-code}\nset.seed(345)\nfolds <- vfold_cv(cell_train, v = 10)\nfolds\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 2\n#> splits id \n#> \n#> 1 Fold01\n#> 2 Fold02\n#> 3 Fold03\n#> 4 Fold04\n#> 5 Fold05\n#> 6 Fold06\n#> 7 Fold07\n#> 8 Fold08\n#> 9 Fold09\n#> 10 Fold10\n```\n:::\n\n\nThe list column for `splits` contains the information on which rows belong in the analysis and assessment sets. There are functions that can be used to extract the individual resampled data called `analysis()` and `assessment()`.\n\nHowever, the tune package contains high-level functions that can do the required computations to resample a model for the purpose of measuring performance. You have several options for building an object for resampling:\n\n- Resample a model specification preprocessed with a formula or [recipe](/start/recipes/), or\n\n- Resample a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and formula/recipe.\n\nFor this example, let's use a `workflow()` that bundles together the random forest model and a formula, since we are not using a recipe. Whichever of these options you use, the syntax to `fit_resamples()` is very similar to `fit()`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/rs_812801b606c5af8cc535a5bc972924b7'}\n\n```{.r .cell-code}\nrf_wf <- \n workflow() %>%\n add_model(rf_mod) %>%\n add_formula(class ~ .)\n\nset.seed(456)\nrf_fit_rs <- \n rf_wf %>% \n fit_resamples(folds)\n```\n:::\n\n::: {.cell layout-align=\"center\" hash='cache/rs-show_31df0335cd3e307bc6390f667238140d'}\n\n```{.r .cell-code}\nrf_fit_rs\n#> # Resampling results\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 4\n#> splits id .metrics .notes \n#> \n#> 1 Fold01 \n#> 2 Fold02 \n#> 3 Fold03 \n#> 4 Fold04 \n#> 5 Fold05 \n#> 6 Fold06 \n#> 7 Fold07 \n#> 8 Fold08 \n#> 9 Fold09 \n#> 10 Fold10 \n```\n:::\n\n\nThe results are similar to the `folds` results with some extra columns. The column `.metrics` contains the performance statistics created from the 10 assessment sets. These can be manually unnested but the tune package contains a number of simple functions that can extract these data:\n\n\n::: {.cell layout-align=\"center\" hash='cache/metrics_2d478aa9561f2bc931b1cc153e38760e'}\n\n```{.r .cell-code}\ncollect_metrics(rf_fit_rs)\n#> # A tibble: 2 × 6\n#> .metric .estimator mean n std_err .config \n#> \n#> 1 accuracy binary 0.832 10 0.00952 Preprocessor1_Model1\n#> 2 roc_auc binary 0.904 10 0.00610 Preprocessor1_Model1\n```\n:::\n\n\nThink about these values we now have for accuracy and AUC. These performance metrics are now more realistic (i.e. lower) than our ill-advised first attempt at computing performance metrics in the section above. If we wanted to try different model types for this data set, we could more confidently compare performance metrics computed using resampling to choose between models. Also, remember that at the end of our project, we return to our test set to estimate final model performance. We have looked at this once already before we started using resampling, but let's remind ourselves of the results:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-23_e885c4fa1bc9037707151d13d1033a46'}\n\n```{.r .cell-code}\nrf_testing_pred %>% # test set predictions\n roc_auc(truth = class, .pred_PS)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 roc_auc binary 0.891\nrf_testing_pred %>% # test set predictions\n accuracy(truth = class, .pred_class)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> \n#> 1 accuracy binary 0.816\n```\n:::\n\n\nThe performance metrics from the test set are much closer to the performance metrics computed using resampling than our first (\"bad idea\") attempt. Resampling allows us to simulate how well our model will perform on new data, and the test set acts as the final, unbiased check for our model's performance.\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> ranger * 0.15.1 2023-04-03 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": { + "include-in-header": [ + "\n\n" + ] + }, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_freeze/start/tuning/index/execute-results/html.json b/_freeze/start/tuning/index/execute-results/html.json new file mode 100644 index 00000000..dd1ed76c --- /dev/null +++ b/_freeze/start/tuning/index/execute-results/html.json @@ -0,0 +1,14 @@ +{ + "hash": "54fa77493aa861fb16502a2b8d864fe8", + "result": { + "markdown": "---\ntitle: \"Tune model parameters\"\nweight: 4\ncategories:\n - tuning\n - rsample\n - parsnip\n - tune\n - dials\n - workflows\n - yardstick\ndescription: | \n Estimate the best values for hyperparameters that cannot be learned directly during model training.\ntoc-location: body\ntoc-depth: 2\ntoc-title: \"\"\ncss: ../styles.css\ninclude-after-body: ../repo-actions-delete.html\n---\n\n\n\n\n\n\n## Introduction {#intro}\n\nSome model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called **hyperparameters**. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this `mtry` in tidymodels) or the learning rate in a boosted tree model (we call this `learn_rate`). Instead of learning these kinds of hyperparameters during model training, we can *estimate* the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called **tuning**.\n\nTo use code in this article, you will need to install the following packages: rpart, rpart.plot, tidymodels, and vip.\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-3_4b4c65e2badee19d32824d0c20279d5e'}\n\n```{.r .cell-code}\nlibrary(tidymodels) # for the tune package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(rpart.plot) # for visualizing a decision tree\nlibrary(vip) # for variable importance plots\n```\n:::\n\n{{< test-drive url=\"https://rstudio.cloud/project/2674862\" >}}\n\n\n\n## The cell image data, revisited {#data}\n\nIn our previous [*Evaluate your model with resampling*](/start/resampling/) article, we introduced a data set of images of cells that were labeled by experts as well-segmented (`WS`) or poorly segmented (`PS`). We trained a [random forest model](/start/resampling/#modeling) to predict which images are segmented well vs. poorly, so that a biologist could filter out poorly segmented cell images in their analysis. We used [resampling](/start/resampling/#resampling) to estimate the performance of our model on this data.\n\n\n::: {.cell layout-align=\"center\" hash='cache/cell-import_620d009694ed515939c5ab84c9919570'}\n\n```{.r .cell-code}\ndata(cells, package = \"modeldata\")\ncells\n#> # A tibble: 2,019 × 58\n#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3\n#> \n#> 1 Test PS 143. 185 15.7 4.95 9.55\n#> 2 Train PS 134. 819 31.9 207. 69.9 \n#> 3 Train WS 107. 431 28.0 116. 63.9 \n#> 4 Train PS 69.2 298 19.5 102. 28.2 \n#> 5 Test PS 2.89 285 24.3 112. 20.5 \n#> 6 Test WS 40.7 172 326. 654. 129. \n#> 7 Test WS 174. 177 260. 596. 124. \n#> 8 Test PS 180. 251 18.3 5.73 17.2 \n#> 9 Test WS 18.9 495 16.1 89.5 13.7 \n#> 10 Test WS 153. 384 17.7 89.9 20.4 \n#> # ℹ 2,009 more rows\n#> # ℹ 51 more variables: avg_inten_ch_4 , convex_hull_area_ratio_ch_1 ,\n#> # convex_hull_perim_ratio_ch_1 , diff_inten_density_ch_1 ,\n#> # diff_inten_density_ch_3 , diff_inten_density_ch_4 ,\n#> # entropy_inten_ch_1 , entropy_inten_ch_3 ,\n#> # entropy_inten_ch_4 , eq_circ_diam_ch_1 ,\n#> # eq_ellipse_lwr_ch_1 , eq_ellipse_oblate_vol_ch_1 , …\n```\n:::\n\n\n## Predicting image segmentation, but better {#why-tune}\n\nRandom forest models are a tree-based ensemble method, and typically perform well with [default hyperparameters](https://bradleyboehmke.github.io/HOML/random-forest.html#out-of-the-box-performance). However, the accuracy of some other tree-based models, such as [boosted tree models](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) or [decision tree models](https://en.wikipedia.org/wiki/Decision_tree), can be sensitive to the values of hyperparameters. In this article, we will train a **decision tree** model. There are several hyperparameters for decision tree models that can be tuned for better performance. Let's explore:\n\n- the complexity parameter (which we call `cost_complexity` in tidymodels) for the tree, and\n- the maximum `tree_depth`.\n\nTuning these hyperparameters can improve model performance because decision tree models are prone to [overfitting](https://bookdown.org/max/FES/important-concepts.html#overfitting). This happens because single tree models tend to fit the training data *too well* --- so well, in fact, that they over-learn patterns present in the training data that end up being detrimental when predicting new data.\n\nWe will tune the model hyperparameters to avoid overfitting. Tuning the value of `cost_complexity` helps by [pruning](https://bradleyboehmke.github.io/HOML/DT.html#pruning) back our tree. It adds a cost, or penalty, to error rates of more complex trees; a cost closer to zero decreases the number tree nodes pruned and is more likely to result in an overfit tree. However, a high cost increases the number of tree nodes pruned and can result in the opposite problem---an underfit tree. Tuning `tree_depth`, on the other hand, helps by [stopping](https://bradleyboehmke.github.io/HOML/DT.html#early-stopping) our tree from growing after it reaches a certain depth. We want to tune these hyperparameters to find what those two values should be for our model to do the best job predicting image segmentation.\n\nBefore we start the tuning process, we split our data into training and testing sets, just like when we trained the model with one default set of hyperparameters. As [before](/start/resampling/), we can use `strata = class` if we want our training and testing sets to be created using stratified sampling so that both have the same proportion of both kinds of segmentation.\n\n\n::: {.cell layout-align=\"center\" hash='cache/cell-split_8c1ca0d7be68dc025f6b0d565111c005'}\n\n```{.r .cell-code}\nset.seed(123)\ncell_split <- initial_split(cells %>% select(-case), \n strata = class)\ncell_train <- training(cell_split)\ncell_test <- testing(cell_split)\n```\n:::\n\n\nWe use the training data for tuning the model.\n\n## Tuning hyperparameters {#tuning}\n\nLet's start with the parsnip package, using a [`decision_tree()`](https://parsnip.tidymodels.org/reference/decision_tree.html) model with the [rpart](https://cran.r-project.org/web/packages/rpart/index.html) engine. To tune the decision tree hyperparameters `cost_complexity` and `tree_depth`, we create a model specification that identifies which hyperparameters we plan to tune.\n\n\n::: {.cell layout-align=\"center\" hash='cache/tune-spec_e5d79e884708d6f25b9535caaddb3023'}\n\n```{.r .cell-code}\ntune_spec <- \n decision_tree(\n cost_complexity = tune(),\n tree_depth = tune()\n ) %>% \n set_engine(\"rpart\") %>% \n set_mode(\"classification\")\n\ntune_spec\n#> Decision Tree Model Specification (classification)\n#> \n#> Main Arguments:\n#> cost_complexity = tune()\n#> tree_depth = tune()\n#> \n#> Computational engine: rpart\n```\n:::\n\n\nThink of `tune()` here as a placeholder. After the tuning process, we will select a single numeric value for each of these hyperparameters. For now, we specify our parsnip model object and identify the hyperparameters we will `tune()`.\n\nWe can't train this specification on a single data set (such as the entire training set) and learn what the hyperparameter values should be, but we *can* train many models using resampled data and see which models turn out best. We can create a regular grid of values to try using some convenience functions for each hyperparameter:\n\n\n::: {.cell layout-align=\"center\" hash='cache/tree-grid_d67adfb503bc52dbebef73da6a919228'}\n\n```{.r .cell-code}\ntree_grid <- grid_regular(cost_complexity(),\n tree_depth(),\n levels = 5)\n```\n:::\n\n\nThe function [`grid_regular()`](https://dials.tidymodels.org/reference/grid_regular.html) is from the [dials](https://dials.tidymodels.org/) package. It chooses sensible values to try for each hyperparameter; here, we asked for 5 of each. Since we have two to tune, `grid_regular()` returns 5 $\\times$ 5 = 25 different possible tuning combinations to try in a tidy tibble format.\n\n\n::: {.cell layout-align=\"center\" hash='cache/tree-grid-tibble_4e757ce1804c78a04dcf463e25e24531'}\n\n```{.r .cell-code}\ntree_grid\n#> # A tibble: 25 × 2\n#> cost_complexity tree_depth\n#> \n#> 1 0.0000000001 1\n#> 2 0.0000000178 1\n#> 3 0.00000316 1\n#> 4 0.000562 1\n#> 5 0.1 1\n#> 6 0.0000000001 4\n#> 7 0.0000000178 4\n#> 8 0.00000316 4\n#> 9 0.000562 4\n#> 10 0.1 4\n#> # ℹ 15 more rows\n```\n:::\n\n\nHere, you can see all 5 values of `cost_complexity` ranging up to 0.1. These values get repeated for each of the 5 values of `tree_depth`:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-9_9a6d4c8ddf5141d7a783423b80946c0f'}\n\n```{.r .cell-code}\ntree_grid %>% \n count(tree_depth)\n#> # A tibble: 5 × 2\n#> tree_depth n\n#> \n#> 1 1 5\n#> 2 4 5\n#> 3 8 5\n#> 4 11 5\n#> 5 15 5\n```\n:::\n\n\nArmed with our grid filled with 25 candidate decision tree models, let's create [cross-validation folds](/start/resampling/) for tuning:\n\n\n::: {.cell layout-align=\"center\" hash='cache/cell-folds_53cd98ea6dad5a92958c3f0f00346611'}\n\n```{.r .cell-code}\nset.seed(234)\ncell_folds <- vfold_cv(cell_train)\n```\n:::\n\n\nTuning in tidymodels requires a resampled object created with the [rsample](https://rsample.tidymodels.org/) package.\n\n## Model tuning with a grid {#tune-grid}\n\nWe are ready to tune! Let's use [`tune_grid()`](https://tune.tidymodels.org/reference/tune_grid.html) to fit models at all the different values we chose for each tuned hyperparameter. There are several options for building the object for tuning:\n\n- Tune a model specification along with a recipe or model, or\n\n- Tune a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and a recipe or model preprocessor.\n\nHere we use a `workflow()` with a straightforward formula; if this model required more involved data preprocessing, we could use `add_recipe()` instead of `add_formula()`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/tree-res_89ca0dc2ed9e3c2dd2e573d34a84a13d'}\n\n```{.r .cell-code}\nset.seed(345)\n\ntree_wf <- workflow() %>%\n add_model(tune_spec) %>%\n add_formula(class ~ .)\n\ntree_res <- \n tree_wf %>% \n tune_grid(\n resamples = cell_folds,\n grid = tree_grid\n )\n\ntree_res\n#> # Tuning results\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 4\n#> splits id .metrics .notes \n#> \n#> 1 Fold01 \n#> 2 Fold02 \n#> 3 Fold03 \n#> 4 Fold04 \n#> 5 Fold05 \n#> 6 Fold06 \n#> 7 Fold07 \n#> 8 Fold08 \n#> 9 Fold09 \n#> 10 Fold10 \n```\n:::\n\n\nOnce we have our tuning results, we can both explore them through visualization and then select the best result. The function `collect_metrics()` gives us a tidy tibble with all the results. We had 25 candidate models and two metrics, `accuracy` and `roc_auc`, and we get a row for each `.metric` and model.\n\n\n::: {.cell layout-align=\"center\" hash='cache/collect-trees_f80e0747e613667426443cad2059daf8'}\n\n```{.r .cell-code}\ntree_res %>% \n collect_metrics()\n#> # A tibble: 50 × 8\n#> cost_complexity tree_depth .metric .estimator mean n std_err .config \n#> \n#> 1 0.0000000001 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 2 0.0000000001 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> 3 0.0000000178 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 4 0.0000000178 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> 5 0.00000316 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 6 0.00000316 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> 7 0.000562 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 8 0.000562 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> 9 0.1 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 10 0.1 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> # ℹ 40 more rows\n```\n:::\n\n\nWe might get more out of plotting these results:\n\n\n::: {.cell layout-align=\"center\" hash='cache/best-tree_1a55e75c20efa9caa9d7cdf71f3a405f'}\n\n```{.r .cell-code}\ntree_res %>%\n collect_metrics() %>%\n mutate(tree_depth = factor(tree_depth)) %>%\n ggplot(aes(cost_complexity, mean, color = tree_depth)) +\n geom_line(size = 1.5, alpha = 0.6) +\n geom_point(size = 2) +\n facet_wrap(~ .metric, scales = \"free\", nrow = 2) +\n scale_x_log10(labels = scales::label_number()) +\n scale_color_viridis_d(option = \"plasma\", begin = .9, end = 0)\n#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.\n#> ℹ Please use `linewidth` instead.\n```\n\n::: {.cell-output-display}\n![](figs/best-tree-1.svg){fig-align='center' width=768}\n:::\n:::\n\n\nWe can see that our \"stubbiest\" tree, with a depth of 1, is the worst model according to both metrics and across all candidate values of `cost_complexity`. Our deepest tree, with a depth of 15, did better. However, the best tree seems to be between these values with a tree depth of 4. The [`show_best()`](https://tune.tidymodels.org/reference/show_best.html) function shows us the top 5 candidate models by default:\n\n\n::: {.cell layout-align=\"center\" hash='cache/show-best-tree_33388e34b2f988c64badd924a600d8ea'}\n\n```{.r .cell-code}\ntree_res %>%\n show_best(\"accuracy\")\n#> # A tibble: 5 × 8\n#> cost_complexity tree_depth .metric .estimator mean n std_err .config \n#> \n#> 1 0.0000000001 4 accuracy binary 0.807 10 0.0119 Preprocess…\n#> 2 0.0000000178 4 accuracy binary 0.807 10 0.0119 Preprocess…\n#> 3 0.00000316 4 accuracy binary 0.807 10 0.0119 Preprocess…\n#> 4 0.000562 4 accuracy binary 0.807 10 0.0119 Preprocess…\n#> 5 0.1 4 accuracy binary 0.786 10 0.0124 Preprocess…\n```\n:::\n\n\nWe can also use the [`select_best()`](https://tune.tidymodels.org/reference/show_best.html) function to pull out the single set of hyperparameter values for our best decision tree model:\n\n\n::: {.cell layout-align=\"center\" hash='cache/select-best-tree_6de0d6852f5e5913d2c8d3f1689db25f'}\n\n```{.r .cell-code}\nbest_tree <- tree_res %>%\n select_best(\"accuracy\")\n\nbest_tree\n#> # A tibble: 1 × 3\n#> cost_complexity tree_depth .config \n#> \n#> 1 0.0000000001 4 Preprocessor1_Model06\n```\n:::\n\n\nThese are the values for `tree_depth` and `cost_complexity` that maximize accuracy in this data set of cell images.\n\n## Finalizing our model {#final-model}\n\nWe can update (or \"finalize\") our workflow object `tree_wf` with the values from `select_best()`.\n\n\n::: {.cell layout-align=\"center\" hash='cache/final-wf_ac50f36798990c975198e3d3454362e3'}\n\n```{.r .cell-code}\nfinal_wf <- \n tree_wf %>% \n finalize_workflow(best_tree)\n\nfinal_wf\n#> ══ Workflow ══════════════════════════════════════════════════════════\n#> Preprocessor: Formula\n#> Model: decision_tree()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> class ~ .\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> Decision Tree Model Specification (classification)\n#> \n#> Main Arguments:\n#> cost_complexity = 1e-10\n#> tree_depth = 4\n#> \n#> Computational engine: rpart\n```\n:::\n\n\nOur tuning is done!\n\n### The last fit\n\nFinally, let's fit this final model to the training data and use our test data to estimate the model performance we expect to see with new data. We can use the function [`last_fit()`](https://tune.tidymodels.org/reference/last_fit.html) with our finalized model; this function *fits* the finalized model on the full training data set and *evaluates* the finalized model on the testing data.\n\n\n::: {.cell layout-align=\"center\" hash='cache/last-fit_f43e4f1224d157f36174805864397d06'}\n\n```{.r .cell-code}\nfinal_fit <- \n final_wf %>%\n last_fit(cell_split) \n\nfinal_fit %>%\n collect_metrics()\n#> # A tibble: 2 × 4\n#> .metric .estimator .estimate .config \n#> \n#> 1 accuracy binary 0.802 Preprocessor1_Model1\n#> 2 roc_auc binary 0.840 Preprocessor1_Model1\n\nfinal_fit %>%\n collect_predictions() %>% \n roc_curve(class, .pred_PS) %>% \n autoplot()\n```\n\n::: {.cell-output-display}\n![](figs/last-fit-1.svg){fig-align='center' width=672}\n:::\n:::\n\n\nThe performance metrics from the test set indicate that we did not overfit during our tuning procedure.\n\nThe `final_fit` object contains a finalized, fitted workflow that you can use for predicting on new data or further understanding the results. You may want to extract this object, using [one of the `extract_` helper functions](https://tune.tidymodels.org/reference/extract-tune.html).\n\n\n::: {.cell layout-align=\"center\" hash='cache/last-fit-wf_e9628897749dc6b616930438ec04247f'}\n\n```{.r .cell-code}\nfinal_tree <- extract_workflow(final_fit)\nfinal_tree\n#> ══ Workflow [trained] ════════════════════════════════════════════════\n#> Preprocessor: Formula\n#> Model: decision_tree()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> class ~ .\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> n= 1514 \n#> \n#> node), split, n, loss, yval, (yprob)\n#> * denotes terminal node\n#> \n#> 1) root 1514 539 PS (0.64398943 0.35601057) \n#> 2) total_inten_ch_2< 41732.5 642 33 PS (0.94859813 0.05140187) \n#> 4) shape_p_2_a_ch_1>=1.251801 631 27 PS (0.95721078 0.04278922) *\n#> 5) shape_p_2_a_ch_1< 1.251801 11 5 WS (0.45454545 0.54545455) *\n#> 3) total_inten_ch_2>=41732.5 872 366 WS (0.41972477 0.58027523) \n#> 6) fiber_width_ch_1< 11.37318 406 160 PS (0.60591133 0.39408867) \n#> 12) avg_inten_ch_1< 145.4883 293 85 PS (0.70989761 0.29010239) *\n#> 13) avg_inten_ch_1>=145.4883 113 38 WS (0.33628319 0.66371681) \n#> 26) total_inten_ch_3>=57919.5 33 10 PS (0.69696970 0.30303030) *\n#> 27) total_inten_ch_3< 57919.5 80 15 WS (0.18750000 0.81250000) *\n#> 7) fiber_width_ch_1>=11.37318 466 120 WS (0.25751073 0.74248927) \n#> 14) eq_ellipse_oblate_vol_ch_1>=1673.942 30 8 PS (0.73333333 0.26666667) \n#> 28) var_inten_ch_3>=41.10858 20 2 PS (0.90000000 0.10000000) *\n#> 29) var_inten_ch_3< 41.10858 10 4 WS (0.40000000 0.60000000) *\n#> 15) eq_ellipse_oblate_vol_ch_1< 1673.942 436 98 WS (0.22477064 0.77522936) *\n```\n:::\n\n\nWe can create a visualization of the decision tree using another helper function to extract the underlying engine-specific fit.\n\n\n::: {.cell layout-align=\"center\" hash='cache/rpart-plot_0d99c5d26ec04a41d741cd3fd4721a6f'}\n\n```{.r .cell-code}\nfinal_tree %>%\n extract_fit_engine() %>%\n rpart.plot(roundint = FALSE)\n```\n\n::: {.cell-output-display}\n![](figs/rpart-plot-1.svg){fig-align='center' width=768}\n:::\n:::\n\n\nPerhaps we would also like to understand what variables are important in this final model. We can use the [vip](https://koalaverse.github.io/vip/) package to estimate variable importance [based on the model's structure](https://koalaverse.github.io/vip/reference/vi_model.html#details).\n\n\n::: {.cell layout-align=\"center\" hash='cache/vip_6dfe96ef8d26fe2d8082e117fef2f980'}\n\n```{.r .cell-code}\nlibrary(vip)\n\nfinal_tree %>% \n extract_fit_parsnip() %>% \n vip()\n```\n\n::: {.cell-output-display}\n![](figs/vip-1.svg){fig-align='center' width=576}\n:::\n:::\n\n\nThese are the automated image analysis measurements that are the most important in driving segmentation quality predictions.\n\nWe leave it to the reader to explore whether you can tune a different decision tree hyperparameter. You can explore the [reference docs](/find/parsnip/#models), or use the `args()` function to see which parsnip object arguments are available:\n\n\n::: {.cell layout-align=\"center\" hash='cache/unnamed-chunk-21_16c85924d058e4cd725a03d37ec2c7a5'}\n\n```{.r .cell-code}\nargs(decision_tree)\n#> function (mode = \"unknown\", engine = \"rpart\", cost_complexity = NULL, \n#> tree_depth = NULL, min_n = NULL) \n#> NULL\n```\n:::\n\n\nYou could tune the other hyperparameter we didn't use here, `min_n`, which sets the minimum `n` to split at any node. This is another early stopping method for decision trees that can help prevent overfitting. Use this [searchable table](/find/parsnip/#model-args) to find the original argument for `min_n` in the rpart package ([hint](https://stat.ethz.ch/R-manual/R-devel/library/rpart/html/rpart.control.html)). See whether you can tune a different combination of hyperparameters and/or values to improve a tree's ability to predict cell segmentation quality.\n\n## Session information {#session-info}\n\n\n::: {.cell layout-align=\"center\" hash='cache/si_43a75b68dcc94565ba13180d7ad26a69'}\n\n```\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rpart * 4.1.19 2022-10-21 [2] CRAN (R 4.3.0)\n#> rpart.plot * 3.1.1 2022-05-21 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> vip * 0.3.2 2020-12-17 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_quarto.yml b/_quarto.yml new file mode 100644 index 00000000..36634075 --- /dev/null +++ b/_quarto.yml @@ -0,0 +1,107 @@ +project: + type: website + output-dir: docs + render: + - "*.qmd" + - "!STYLING.md" + +execute: + freeze: auto + +website: + title: "tidymodels" + navbar: + pinned: true + right: + - text: "Packages" + href: packages/index.qmd + - text: "Get Started" + href: start/index.qmd + - text: "Learn" + href: learn/index.qmd + - text: "Help" + href: help/index.qmd + - text: "Contribute" + href: contribute/index.qmd + - text: "About" + href: about/index.qmd + - text: "Find" + href: find/index.qmd + - icon: github + href: https://github.com/tidymodels/ + aria-label: GitHub + + sidebar: + - id: get-started + style: "floating" + contents: + - text: "GET STARTED" + href: start/index.qmd + - text: "Build a model" + href: start/models/index.qmd + - text: "Preprocess your data with recipes" + href: start/recipes/index.qmd + - text: "Evaluate your model with resamples" + href: start/resampling/index.qmd + - text: "Tune model parameters" + href: start/tuning/index.qmd + - text: "A predictive modeling case study" + href: start/case-study/index.qmd + - id: learn + collapse-level: 1 + contents: + - text: "Learn" + href: learn/index.qmd + - section: "Perform Statistical Analyses" + contents: + - learn/statistics/bootstrap/index.qmd + - learn/statistics/infer/index.qmd + - learn/statistics/k-means/index.qmd + - learn/statistics/tidy-analysis/index.qmd + - learn/statistics/xtabs/index.qmd + - section: "Create Robust Models" + contents: + - learn/models/coefficients/index.qmd + - learn/models/parsnip-nnet/index.qmd + - learn/models/parsnip-ranger-glmnet/index.qmd + - learn/models/pls/index.qmd + - learn/models/sub-sampling/index.qmd + - learn/models/time-series/index.qmd + - section: "Develop custom modeling tools" + contents: + - learn/develop/broom/index.qmd + - learn/develop/metrics/index.qmd + - learn/develop/models/index.qmd + - learn/develop/parameters/index.qmd + - learn/develop/recipes/index.qmd + - section: "Tune, compare, and work with your models" + contents: + - learn/work/bayes-opt/index.qmd + - learn/work/case-weights/index.qmd + - learn/work/nested-resampling/index.qmd + - learn/work/tune-svm/index.qmd + - learn/work/tune-text/index.qmd + + page-footer: + left: | + Proudly supported by + [![](https://www.rstudio.com/assets/img/posit-logo-fullcolor-TM.svg){fig-alt="Posit" width=65px}](https://posit.co) + right: | + + {{< fa chevron-up >}} + + repo-url: https://github.com/tidymodels/tidymodels_dot_org + repo-actions: [edit, issue] + +format: + html: + quarto-required: ">= 1.3.353" + toc: true + linestretch: 1.6 + grid: + body-width: 840px + +theme: + - cosmo + - styles.scss + - styles-frontpage.scss diff --git a/content/author/davis/_index.md b/about/davis/index.qmd similarity index 100% rename from content/author/davis/_index.md rename to about/davis/index.qmd diff --git a/about/index.qmd b/about/index.qmd new file mode 100644 index 00000000..274f3099 --- /dev/null +++ b/about/index.qmd @@ -0,0 +1,6 @@ +--- +title: Meet the team +toc: true +toc-depth: 0 +include-after-body: ../resources.html +--- diff --git a/content/author/max/_index.md b/about/max/index.qmd similarity index 100% rename from content/author/max/_index.md rename to about/max/index.qmd diff --git a/content/books/fes/cover.jpg b/books/fes/cover.jpg similarity index 100% rename from content/books/fes/cover.jpg rename to books/fes/cover.jpg diff --git a/content/books/fes/index.md b/books/fes/index.qmd similarity index 100% rename from content/books/fes/index.md rename to books/fes/index.qmd diff --git a/books/index.qmd b/books/index.qmd new file mode 100644 index 00000000..ac7be1b9 --- /dev/null +++ b/books/index.qmd @@ -0,0 +1,12 @@ +--- +title: Books +description: "Study up on statistics and modeling with our comprehensive books." +toc: false +listing: + - id: book-links + categories: unnumbered + type: default + page-size: 40 + contents: + - "*.qmd" +--- diff --git a/content/books/moderndive/cover.png b/books/moderndive/cover.png similarity index 100% rename from content/books/moderndive/cover.png rename to books/moderndive/cover.png diff --git a/content/books/moderndive/index.md b/books/moderndive/index.qmd similarity index 100% rename from content/books/moderndive/index.md rename to books/moderndive/index.qmd diff --git a/content/books/smltar/cover.png b/books/smltar/cover.png similarity index 100% rename from content/books/smltar/cover.png rename to books/smltar/cover.png diff --git a/content/books/smltar/index.md b/books/smltar/index.qmd similarity index 100% rename from content/books/smltar/index.md rename to books/smltar/index.qmd diff --git a/content/books/tidytext/cover.png b/books/tidytext/cover.png similarity index 100% rename from content/books/tidytext/cover.png rename to books/tidytext/cover.png diff --git a/content/books/tidytext/index.md b/books/tidytext/index.qmd similarity index 100% rename from content/books/tidytext/index.md rename to books/tidytext/index.qmd diff --git a/content/books/tmwr/cover.png b/books/tmwr/cover.png similarity index 100% rename from content/books/tmwr/cover.png rename to books/tmwr/cover.png diff --git a/content/books/tmwr/index.md b/books/tmwr/index.qmd similarity index 100% rename from content/books/tmwr/index.md rename to books/tmwr/index.qmd diff --git a/common.R b/common.R new file mode 100644 index 00000000..12420c08 --- /dev/null +++ b/common.R @@ -0,0 +1,40 @@ + +print(here::here()) + +knitr::opts_chunk$set( + digits = 3, + comment = "#>", + dev = 'svglite', + dev.args = list(bg = "transparent"), + fig.path = "figs/", + fig.align = "center", + collapse = TRUE, + cache = TRUE, + cache.path = "cache/" +) +options(width = 80, cli.width = 70) + +article_req_pkgs <- function(x, what = "To use code in this article, ") { + x <- sort(x) + x <- knitr::combine_words(x, and = " and ") + paste0( + what, + " you will need to install the following packages: ", + x, "." + ) +} +small_session <- function(pkgs = NULL) { + pkgs <- c(pkgs, "recipes", "parsnip", "tune", "workflows", "dials", "dplyr", + "broom", "ggplot2", "purrr", "rlang", "rsample", "tibble", "infer", + "yardstick", "tidymodels", "infer") + pkgs <- unique(pkgs) + library(sessioninfo) + library(dplyr) + sinfo <- sessioninfo::session_info() + cls <- class(sinfo$packages) + sinfo$packages <- + sinfo$packages %>% + dplyr::filter(package %in% pkgs) + class(sinfo$packages) <- cls + sinfo +} diff --git a/config.toml b/config.toml deleted file mode 100644 index bc25889c..00000000 --- a/config.toml +++ /dev/null @@ -1,49 +0,0 @@ -# Title of your site -title = "Tidymodels" - -# The URL of your site. -# End your URL with a `/` trailing slash, e.g. `https://example.com/`. -baseurl = "https://www.tidymodels.org/" - -# Name the theme folder in `themes/`. -theme = "hugo-tourmaline" - -# Enable analytics by entering your Google Analytics tracking ID -googleAnalytics = "UA-20375833-29" - -relativeurls = false -languageCode = "en-us" -description = "A collection of R packages for modeling and machine learning using tidyverse principles." -disqusShortname = "" -ignoreFiles = ["\\.Rmd$", "_cache$", "\\.Rmarkdown$", "\\.knit\\.md$", "\\.utf8\\.md$"] -# below is critical for non-authors to show up in article listing -preserveTaxonomyNames = true -# uncomment the next line to disable listing authors -# disableKinds = ["taxonomyTerm"] - -[permalinks] - articles = "news/:year/:month/:slug/" - -[taxonomies] - author = "author" - category = "categories" - tag = "tags" - -# Configure BlackFriday Markdown rendering. -# See: https://gohugo.io/getting-started/configuration/#configure-blackfriday -[blackfriday] - hrefTargetBlank = true # `true` opens external links in a new tab. See https://github.com/gohugoio/hugo/issues/2424 - angledQuotes = false - latexDashes = true - extensions = ["backslashLineBreak"] - -[markup] - defaultMarkdownHandler = "goldmark" - [markup.tableOfContents] - endLevel = 2 - ordered = false - startLevel = 2 - [markup.goldmark.renderer] - unsafe = true - [markup.highlight] - style = "pygments" diff --git a/config/_default/menus.toml b/config/_default/menus.toml deleted file mode 100644 index 3a9434cb..00000000 --- a/config/_default/menus.toml +++ /dev/null @@ -1,33 +0,0 @@ -# Navigation Bar Links -# The weight parameter defines the order that the links will appear in. - -[[main]] - name = "Packages" - url = "/packages/" - weight = 1 -[[main]] - name = "Get Started" - url = "/start/" - weight = 2 -[[main]] - name = "Learn" - url = "/learn/" - weight = 3 -[[main]] - name = "Help" - url = "/help/" - weight = 4 -[[main]] - name = "Contribute" - url = "/contribute/" - weight = 5 -[[main]] - name = " " - url = "/find/" - pre = "" - weight = 6 -[[main]] - pre = "" - url = "https://github.com/tidymodels/" - weight = 7 - diff --git a/config/_default/params.toml b/config/_default/params.toml deleted file mode 100644 index a7b96a4a..00000000 --- a/config/_default/params.toml +++ /dev/null @@ -1,77 +0,0 @@ -# Description for social sharing and search engines. -description = "A collection of R packages for modeling and machine learning using tidyverse principles." - -# Default image for social sharing and search engines. Place image in `static/images/` folder and specify image name here. -sharing_image = "feature_summary_large_image.jpg" -twitter_image = "feature_summary_large_image.jpg" - -# Link custom CSS and JS assets -# (relative to /static/css and /static/js respectively) -custom_css = ["tm.css"] -custom_js = ["tm.js"] - -# Define base hex url for index page (where the stickers link out to) -hex_base_url = ".tidymodels.org/" -hex_ext = "svg" - -# Define "news" as both articles and events -mainSections = ["articles", "events"] - -# options for highlight.js (version, additional languages, and theme) -disable_highlightjs = "true" -highlightjsVersion = "9.11.0" -highlightjsCDN = "//cdn.bootcss.com" -highlightjsLang = ["r", "yaml"] -highlightjsTheme = "github" - -MathJaxCDN = "//cdn.bootcss.com" -MathJaxVersion = "2.7.1" - -[footer] - github_url = "" - twitter_url = "" - support = "Proudly supported by" - -# leave blank in quotes for the default = true -# to turn off, set = false (no quotes) -[authors] - show_list = "" # on list page, default is true - show_single = "" # on single pages, default is true - show_latest = "" # on bio, default is true - label_latest = "" # on bio, default is "Latest" - -# leave blank in quotes for the default = true -# to turn off, set = false (no quotes) -[events] - link_archive = "" # default is true - -# customize your site colors -# these colors get applied by layouts/partials/custom-site.css -# then, the layouts/partials/head_includes.html uses that partial -# the partial head_includes.html is called in the partial header.html everywhere! -[colors] - text = "#404040" # off-black - # Links - link = "#CA225E" # main accent - link_hover = "#cc3168" #lighter - # Footer - footer_primary = "#CA225E40" - footer_text = "#CA225E" - # Backgrounds - background = "#ffffff" - home_section_odd = "#ffffff" - home_section_even = "#fcfcfc" - # Menu - menu_primary = "#ffffff" - menu_text = "black" - menu_text_active = "#CA225E" - menu_text_current = "#CA225E" - menu_decorate_current = "none" - menu_title = "#CA225E" # Tidymodels - menu_title_active = "#00000075" - menu_box_hover = "white" - menu_box_active = "#fcfcfc" - # Tables - header_background = "#1a162d80" - table_row_even = "#fcfcfc" - table_row_odd = "#1a162d10" diff --git a/content/author/_index.md b/content/author/_index.md deleted file mode 100644 index 7c94d5ae..00000000 --- a/content/author/_index.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -title: Meet the team ---- diff --git a/content/books/_index.md b/content/books/_index.md deleted file mode 100644 index fb6c9168..00000000 --- a/content/books/_index.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Books -description: "Study up on statistics and modeling with our comprehensive books." ---- diff --git a/content/contribute/index.md b/content/contribute/index.md deleted file mode 100644 index 3f0946f5..00000000 --- a/content/contribute/index.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: How to contribute to tidymodels ---- - -The ecosystem of tidymodels packages would not be possible without the contributions of the R community. No matter your current skills, it's possible to contribute back to tidymodels. Contributions are guided by our design goals. - -## Design goals - -The goals of tidymodels packages are to: - - * Encourage empirical validation and good statistical practice. - - * Smooth out heterogeneous interfaces. - - * Establish highly reusable infrastructure. - - * Enable a wider variety of methodologies. - - * Help package developers quickly build high quality model packages of their own. - -These goals are guided by our [principles for creating modeling packages](https://tidymodels.github.io/model-implementation-principles/). - -**What are different ways _you_ can contribute?** - -## Answer questions - -You can help others use and learn tidymodels by answering questions on the [RStudio community site](https://community.rstudio.com/tag/tidymodels), [Stack Overflow](https://stackoverflow.com/questions/tagged/tidymodels?sort=newest), and [Twitter](https://twitter.com/search?q=%23tidymodels&f=live). Many people asking for help with tidymodels don't know what a [reprex](https://www.tidyverse.org/help#reprex) is or how to craft one. Acknowledging an individual's problem, showing them how to build a reprex, and pointing them to helpful resources are all enormously beneficial, even if you don't immediately solve their problem. - -Remember that while you might have seen a problem a hundred times before, it's new to the person asking it. Be patient, polite, and empathic. - -## File issues - -If you've found a bug, first create a minimal [reprex](https://www.tidyverse.org/help#reprex). Spend some time working to make it as minimal as possible; the more time you spend doing this, the easier it is to fix the bug. When your reprex is ready, file it on the [GitHub repo](https://github.com/tidymodels/) of the appropriate package. - -The tidymodels team often focuses on one package at a time to reduce context switching and be more efficient. We may not address each issue right away, but we will use the reprex you create to understand your problem when it is time to focus on that package. - -## Contribute documentation - -Documentation is a high priority for tidymodels, and pull requests to correct or improve documentation are welcome. The most important thing to know is that tidymodels packages use [roxygen2](https://roxygen2.r-lib.org/); this means that documentation is found in the R code close to the source of each function. There are some special tags, but most tidymodels packages now use markdown in the documentation. This makes it particularly easy to get started! - - -## Contribute code - -If you are a more experienced R programmer, you may have the inclination, interest, and ability to contribute directly to package development. Before you submit a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal. - -In tidymodels packages, we use the [tidyverse style guide](https://style.tidyverse.org/) which will make sure that your new code and documentation matches the existing style. This makes the review process much smoother. - -The tidymodels packages are explicitly built to support the creation of other modeling packages, and we would love to hear about what you build yourself! Check out our learning resources for [developing custom modeling tools](/learn/develop/). - diff --git a/content/find/_index.md b/content/find/_index.md deleted file mode 100644 index dea412d6..00000000 --- a/content/find/_index.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: "Explore tidymodels" -description: "Explore searchable tables of all tidymodels packages and functions." ---- - -Below you'll find searchable tables to help you explore the tidymodels packages and functions. The tables also include links to the relevant reference page to help you navigate the package documentation. Use the following categories to guide you: - diff --git a/content/find/all/index.Rmd b/content/find/all/index.Rmd deleted file mode 100644 index 2f31dcd8..00000000 --- a/content/find/all/index.Rmd +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: "Search all of tidymodels" -disable_jquery: true -weight: 1 ---- - -```{r ex_setup, include=FALSE} -knitr::opts_chunk$set( - message = FALSE, - digits = 3, - collapse = TRUE, - comment = "#>" - ) -options(digits = 3) -script <- here::here("static/code/get_pkgdown_urls.R") -source(script) -library(DT) -``` - - -Here are all the functions available across all of the [tidymodels packages](/packages/). Click on the link in the **topic** column to find the relevant reference documentation. - -
    - -```{r table-compute, include = FALSE} -pkgs <- c("applicable", "baguette", "broom", "brulee", "butcher", - "corrr", "dials", "discrim", - "embed", "finetune", "hardhat", "infer", "modeldata", "modeldb", - "multilevelmod", "parsnip", "plsmod", - "poissonreg", "probably", "recipes", "rsample", "rules", - "shinymodels", "spatialsample", "stacks", - "textrecipes", "themis", "tidypredict", "tune", "yardstick", - "usemodels", "workflows", "workflowsets") -pkgs <- tibble(pkg = pkgs, - base_url = paste0("https://", pkg, ".tidymodels.org/")) - -pkg_urls <- get_pkgdown_urls(pkgs) %>% - select(title, topic, package) -``` - - -```{r table-display, echo = FALSE} -pkg_urls %>% - mutate(package = as.factor(package)) %>% - datatable(rownames = FALSE, - class = 'cell-border stripe', - filter = 'top', - escape = FALSE, - options = list(pageLength = 5)) -``` - - - - - diff --git a/content/find/all/index.html b/content/find/all/index.html deleted file mode 100644 index 0e1b70b5..00000000 --- a/content/find/all/index.html +++ /dev/null @@ -1,25 +0,0 @@ ---- -title: "Search all of tidymodels" -disable_jquery: true -weight: 1 ---- - - - - - - - - - - - - - - - - -

    Here are all the functions available across all of the tidymodels packages. Click on the link in the topic column to find the relevant reference documentation.

    -


    -
    - diff --git a/content/find/all/index_files/datatables-binding/datatables.js b/content/find/all/index_files/datatables-binding/datatables.js deleted file mode 100644 index fcee8ce2..00000000 --- a/content/find/all/index_files/datatables-binding/datatables.js +++ /dev/null @@ -1,1512 +0,0 @@ -(function() { - -// some helper functions: using a global object DTWidget so that it can be used -// in JS() code, e.g. datatable(options = list(foo = JS('code'))); unlike R's -// dynamic scoping, when 'code' is eval()'ed, JavaScript does not know objects -// from the "parent frame", e.g. JS('DTWidget') will not work unless it was made -// a global object -var DTWidget = {}; - -// 123456666.7890 -> 123,456,666.7890 -var markInterval = function(d, digits, interval, mark, decMark, precision) { - x = precision ? d.toPrecision(digits) : d.toFixed(digits); - if (!/^-?[\d.]+$/.test(x)) return x; - var xv = x.split('.'); - if (xv.length > 2) return x; // should have at most one decimal point - xv[0] = xv[0].replace(new RegExp('\\B(?=(\\d{' + interval + '})+(?!\\d))', 'g'), mark); - return xv.join(decMark); -}; - -DTWidget.formatCurrency = function(data, currency, digits, interval, mark, decMark, before, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - var res = markInterval(d, digits, interval, mark, decMark); - res = before ? (/^-/.test(res) ? '-' + currency + res.replace(/^-/, '') : currency + res) : - res + currency; - return res; -}; - -DTWidget.formatString = function(data, prefix, suffix) { - var d = data; - if (d === null) return ''; - return prefix + d + suffix; -}; - -DTWidget.formatPercentage = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d * 100, digits, interval, mark, decMark) + '%'; -}; - -DTWidget.formatRound = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d, digits, interval, mark, decMark); -}; - -DTWidget.formatSignif = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d, digits, interval, mark, decMark, true); -}; - -DTWidget.formatDate = function(data, method, params) { - var d = data; - if (d === null) return ''; - // (new Date('2015-10-28')).toDateString() may return 2015-10-27 because the - // actual time created could be like 'Tue Oct 27 2015 19:00:00 GMT-0500 (CDT)', - // i.e. the date-only string is treated as UTC time instead of local time - if ((method === 'toDateString' || method === 'toLocaleDateString') && /^\d{4,}\D\d{2}\D\d{2}$/.test(d)) { - d = d.split(/\D/); - d = new Date(d[0], d[1] - 1, d[2]); - } else { - d = new Date(d); - } - return d[method].apply(d, params); -}; - -window.DTWidget = DTWidget; - -// A helper function to update the properties of existing filters -var setFilterProps = function(td, props) { - // Update enabled/disabled state - var $input = $(td).find('input').first(); - var searchable = $input.data('searchable'); - $input.prop('disabled', !searchable || props.disabled); - - // Based on the filter type, set its new values - var type = td.getAttribute('data-type'); - if (['factor', 'logical'].includes(type)) { - // Reformat the new dropdown options for use with selectize - var new_vals = props.params.options.map(function(item) { - return { text: item, value: item }; - }); - - // Find the selectize object - var dropdown = $(td).find('.selectized').eq(0)[0].selectize; - - // Note the current values - var old_vals = dropdown.getValue(); - - // Remove the existing values - dropdown.clearOptions(); - - // Add the new options - dropdown.addOption(new_vals); - - // Preserve the existing values - dropdown.setValue(old_vals); - - } else if (['number', 'integer', 'date', 'time'].includes(type)) { - // Apply internal scaling to new limits. Updating scale not yet implemented. - var slider = $(td).find('.noUi-target').eq(0); - var scale = Math.pow(10, Math.max(0, +slider.data('scale') || 0)); - var new_vals = [props.params.min * scale, props.params.max * scale]; - - // Note what the new limits will be just for this filter - var new_lims = new_vals.slice(); - - // Determine the current values and limits - var old_vals = slider.val().map(Number); - var old_lims = slider.noUiSlider('options').range; - old_lims = [old_lims.min, old_lims.max]; - - // Preserve the current values if filters have been applied; otherwise, apply no filtering - if (old_vals[0] != old_lims[0]) { - new_vals[0] = Math.max(old_vals[0], new_vals[0]); - } - - if (old_vals[1] != old_lims[1]) { - new_vals[1] = Math.min(old_vals[1], new_vals[1]); - } - - // Update the endpoints of the slider - slider.noUiSlider({ - start: new_vals, - range: {'min': new_lims[0], 'max': new_lims[1]} - }, true); - } -}; - -var transposeArray2D = function(a) { - return a.length === 0 ? a : HTMLWidgets.transposeArray2D(a); -}; - -var crosstalkPluginsInstalled = false; - -function maybeInstallCrosstalkPlugins() { - if (crosstalkPluginsInstalled) - return; - crosstalkPluginsInstalled = true; - - $.fn.dataTable.ext.afnFiltering.push( - function(oSettings, aData, iDataIndex) { - var ctfilter = oSettings.nTable.ctfilter; - if (ctfilter && !ctfilter[iDataIndex]) - return false; - - var ctselect = oSettings.nTable.ctselect; - if (ctselect && !ctselect[iDataIndex]) - return false; - - return true; - } - ); -} - -HTMLWidgets.widget({ - name: "datatables", - type: "output", - renderOnNullValue: true, - initialize: function(el, width, height) { - // in order that the type=number inputs return a number - $.valHooks.number = { - get: function(el) { - var value = parseFloat(el.value); - return isNaN(value) ? "" : value; - } - }; - $(el).html(' '); - return { - data: null, - ctfilterHandle: new crosstalk.FilterHandle(), - ctfilterSubscription: null, - ctselectHandle: new crosstalk.SelectionHandle(), - ctselectSubscription: null - }; - }, - renderValue: function(el, data, instance) { - if (el.offsetWidth === 0 || el.offsetHeight === 0) { - instance.data = data; - return; - } - instance.data = null; - var $el = $(el); - $el.empty(); - - if (data === null) { - $el.append(' '); - // clear previous Shiny inputs (if any) - for (var i in instance.clearInputs) instance.clearInputs[i](); - instance.clearInputs = {}; - return; - } - - var crosstalkOptions = data.crosstalkOptions; - if (!crosstalkOptions) crosstalkOptions = { - 'key': null, 'group': null - }; - if (crosstalkOptions.group) { - maybeInstallCrosstalkPlugins(); - instance.ctfilterHandle.setGroup(crosstalkOptions.group); - instance.ctselectHandle.setGroup(crosstalkOptions.group); - } - - // if we are in the viewer then we always want to fillContainer and - // and autoHideNavigation (unless the user has explicitly set these) - if (window.HTMLWidgets.viewerMode) { - if (!data.hasOwnProperty("fillContainer")) - data.fillContainer = true; - if (!data.hasOwnProperty("autoHideNavigation")) - data.autoHideNavigation = true; - } - - // propagate fillContainer to instance (so we have it in resize) - instance.fillContainer = data.fillContainer; - - var cells = data.data; - - if (cells instanceof Array) cells = transposeArray2D(cells); - - $el.append(data.container); - var $table = $el.find('table'); - if (data.class) $table.addClass(data.class); - if (data.caption) $table.prepend(data.caption); - - if (!data.selection) data.selection = { - mode: 'none', selected: null, target: 'row', selectable: null - }; - if (HTMLWidgets.shinyMode && data.selection.mode !== 'none' && - data.selection.target === 'row+column') { - if ($table.children('tfoot').length === 0) { - $table.append($('')); - $table.find('thead tr').clone().appendTo($table.find('tfoot')); - } - } - - // column filters - var filterRow; - switch (data.filter) { - case 'top': - $table.children('thead').append(data.filterHTML); - filterRow = $table.find('thead tr:last td'); - break; - case 'bottom': - if ($table.children('tfoot').length === 0) { - $table.append($('')); - } - $table.children('tfoot').prepend(data.filterHTML); - filterRow = $table.find('tfoot tr:first td'); - break; - } - - var options = { searchDelay: 1000 }; - if (cells !== null) $.extend(options, { - data: cells - }); - - // options for fillContainer - var bootstrapActive = typeof($.fn.popover) != 'undefined'; - if (instance.fillContainer) { - - // force scrollX/scrollY and turn off autoWidth - options.scrollX = true; - options.scrollY = "100px"; // can be any value, we'll adjust below - - // if we aren't paginating then move around the info/filter controls - // to save space at the bottom and rephrase the info callback - if (data.options.paging === false) { - - // we know how to do this cleanly for bootstrap, not so much - // for other themes/layouts - if (bootstrapActive) { - options.dom = "<'row'<'col-sm-4'i><'col-sm-8'f>>" + - "<'row'<'col-sm-12'tr>>"; - } - - options.fnInfoCallback = function(oSettings, iStart, iEnd, - iMax, iTotal, sPre) { - return Number(iTotal).toLocaleString() + " records"; - }; - } - } - - // auto hide navigation if requested - // Note, this only works on client-side processing mode as on server-side, - // cells (data.data) is null; In addition, we require the pageLength option - // being provided explicitly to enable this. Despite we may be able to deduce - // the default value of pageLength, it may complicate things so we'd rather - // put this responsiblity to users and warn them on the R side. - if (data.autoHideNavigation === true && data.options.paging !== false) { - // strip all nav if length >= cells - if ((cells instanceof Array) && data.options.pageLength >= cells.length) - options.dom = bootstrapActive ? "<'row'<'col-sm-12'tr>>" : "t"; - // alternatively lean things out for flexdashboard mobile portrait - else if (bootstrapActive && window.FlexDashboard && window.FlexDashboard.isMobilePhone()) - options.dom = "<'row'<'col-sm-12'f>>" + - "<'row'<'col-sm-12'tr>>" + - "<'row'<'col-sm-12'p>>"; - } - - $.extend(true, options, data.options || {}); - - var searchCols = options.searchCols; - if (searchCols) { - searchCols = searchCols.map(function(x) { - return x === null ? '' : x.search; - }); - // FIXME: this means I don't respect the escapeRegex setting - delete options.searchCols; - } - - // server-side processing? - var server = options.serverSide === true; - - // use the dataSrc function to pre-process JSON data returned from R - var DT_rows_all = [], DT_rows_current = []; - if (server && HTMLWidgets.shinyMode && typeof options.ajax === 'object' && - /^session\/[\da-z]+\/dataobj/.test(options.ajax.url) && !options.ajax.dataSrc) { - options.ajax.dataSrc = function(json) { - DT_rows_all = $.makeArray(json.DT_rows_all); - DT_rows_current = $.makeArray(json.DT_rows_current); - var data = json.data; - if (!colReorderEnabled()) return data; - var table = $table.DataTable(), order = table.colReorder.order(), flag = true, i, j, row; - for (i = 0; i < order.length; ++i) if (order[i] !== i) flag = false; - if (flag) return data; - for (i = 0; i < data.length; ++i) { - row = data[i].slice(); - for (j = 0; j < order.length; ++j) data[i][j] = row[order[j]]; - } - return data; - }; - } - - var thiz = this; - if (instance.fillContainer) $table.on('init.dt', function(e) { - thiz.fillAvailableHeight(el, $(el).innerHeight()); - }); - // If the page contains serveral datatables and one of which enables colReorder, - // the table.colReorder.order() function will exist but throws error when called. - // So it seems like the only way to know if colReorder is enabled or not is to - // check the options. - var colReorderEnabled = function() { return "colReorder" in options; }; - var table = $table.DataTable(options); - $el.data('datatable', table); - - // Unregister previous Crosstalk event subscriptions, if they exist - if (instance.ctfilterSubscription) { - instance.ctfilterHandle.off("change", instance.ctfilterSubscription); - instance.ctfilterSubscription = null; - } - if (instance.ctselectSubscription) { - instance.ctselectHandle.off("change", instance.ctselectSubscription); - instance.ctselectSubscription = null; - } - - if (!crosstalkOptions.group) { - $table[0].ctfilter = null; - $table[0].ctselect = null; - } else { - var key = crosstalkOptions.key; - function keysToMatches(keys) { - if (!keys) { - return null; - } else { - var selectedKeys = {}; - for (var i = 0; i < keys.length; i++) { - selectedKeys[keys[i]] = true; - } - var matches = {}; - for (var j = 0; j < key.length; j++) { - if (selectedKeys[key[j]]) - matches[j] = true; - } - return matches; - } - } - - function applyCrosstalkFilter(e) { - $table[0].ctfilter = keysToMatches(e.value); - table.draw(); - } - instance.ctfilterSubscription = instance.ctfilterHandle.on("change", applyCrosstalkFilter); - applyCrosstalkFilter({value: instance.ctfilterHandle.filteredKeys}); - - function applyCrosstalkSelection(e) { - if (e.sender !== instance.ctselectHandle) { - table - .rows('.' + selClass, {search: 'applied'}) - .nodes() - .to$() - .removeClass(selClass); - if (selectedRows) - changeInput('rows_selected', selectedRows(), void 0, true); - } - - if (e.sender !== instance.ctselectHandle && e.value && e.value.length) { - var matches = keysToMatches(e.value); - - // persistent selection with plotly (& leaflet) - var ctOpts = crosstalk.var("plotlyCrosstalkOpts").get() || {}; - if (ctOpts.persistent === true) { - var matches = $.extend(matches, $table[0].ctselect); - } - - $table[0].ctselect = matches; - table.draw(); - } else { - if ($table[0].ctselect) { - $table[0].ctselect = null; - table.draw(); - } - } - } - instance.ctselectSubscription = instance.ctselectHandle.on("change", applyCrosstalkSelection); - // TODO: This next line doesn't seem to work when renderDataTable is used - applyCrosstalkSelection({value: instance.ctselectHandle.value}); - } - - var inArray = function(val, array) { - return $.inArray(val, $.makeArray(array)) > -1; - }; - - // search the i-th column - var searchColumn = function(i, value) { - var regex = false, ci = true; - if (options.search) { - regex = options.search.regex, - ci = options.search.caseInsensitive !== false; - } - return table.column(i).search(value, regex, !regex, ci); - }; - - if (data.filter !== 'none') { - - filterRow.each(function(i, td) { - - var $td = $(td), type = $td.data('type'), filter; - var $input = $td.children('div').first().children('input'); - var disabled = $input.prop('disabled'); - var searchable = table.settings()[0].aoColumns[i].bSearchable; - $input.prop('disabled', !searchable || disabled); - $input.data('searchable', searchable); // for updating later - $input.on('input blur', function() { - $input.next('span').toggle(Boolean($input.val())); - }); - // Bootstrap sets pointer-events to none and we won't be able to click - // the clear button - $input.next('span').css('pointer-events', 'auto').hide().click(function() { - $(this).hide().prev('input').val('').trigger('input').focus(); - }); - var searchCol; // search string for this column - if (searchCols && searchCols[i]) { - searchCol = searchCols[i]; - $input.val(searchCol).trigger('input'); - } - var $x = $td.children('div').last(); - - // remove the overflow: hidden attribute of the scrollHead - // (otherwise the scrolling table body obscures the filters) - // The workaround and the discussion from - // https://github.com/rstudio/DT/issues/554#issuecomment-518007347 - // Otherwise the filter selection will not be anchored to the values - // when the columns number is many and scrollX is enabled. - var scrollHead = $(el).find('.dataTables_scrollHead,.dataTables_scrollFoot'); - var cssOverflowHead = scrollHead.css('overflow'); - var scrollBody = $(el).find('.dataTables_scrollBody'); - var cssOverflowBody = scrollBody.css('overflow'); - var scrollTable = $(el).find('.dataTables_scroll'); - var cssOverflowTable = scrollTable.css('overflow'); - if (cssOverflowHead === 'hidden') { - $x.on('show hide', function(e) { - if (e.type === 'show') { - scrollHead.css('overflow', 'visible'); - scrollBody.css('overflow', 'visible'); - scrollTable.css('overflow-x', 'scroll'); - } else { - scrollHead.css('overflow', cssOverflowHead); - scrollBody.css('overflow', cssOverflowBody); - scrollTable.css('overflow-x', cssOverflowTable); - } - }); - $x.css('z-index', 25); - } - - if (inArray(type, ['factor', 'logical'])) { - $input.on({ - click: function() { - $input.parent().hide(); $x.show().trigger('show'); filter[0].selectize.focus(); - }, - input: function() { - if ($input.val() === '') filter[0].selectize.setValue([]); - } - }); - var $input2 = $x.children('select'); - filter = $input2.selectize({ - options: $input2.data('options').map(function(v, i) { - return ({text: v, value: v}); - }), - plugins: ['remove_button'], - hideSelected: true, - onChange: function(value) { - if (value === null) value = []; // compatibility with jQuery 3.0 - $input.val(value.length ? JSON.stringify(value) : ''); - if (value.length) $input.trigger('input'); - $input.attr('title', $input.val()); - if (server) { - table.column(i).search(value.length ? JSON.stringify(value) : '').draw(); - return; - } - // turn off filter if nothing selected - $td.data('filter', value.length > 0); - table.draw(); // redraw table, and filters will be applied - } - }); - if (searchCol) filter[0].selectize.setValue(JSON.parse(searchCol)); - filter[0].selectize.on('blur', function() { - $x.hide().trigger('hide'); $input.parent().show(); $input.trigger('blur'); - }); - filter.next('div').css('margin-bottom', 'auto'); - } else if (type === 'character') { - var fun = function() { - searchColumn(i, $input.val()).draw(); - }; - if (server) { - fun = $.fn.dataTable.util.throttle(fun, options.searchDelay); - } - $input.on('input', fun); - } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { - var $x0 = $x; - $x = $x0.children('div').first(); - $x0.css({ - 'background-color': '#fff', - 'border': '1px #ddd solid', - 'border-radius': '4px', - 'padding': data.vertical ? '35px 20px': '20px 20px 10px 20px' - }); - var $spans = $x0.children('span').css({ - 'margin-top': data.vertical ? '0' : '10px', - 'white-space': 'nowrap' - }); - var $span1 = $spans.first(), $span2 = $spans.last(); - var r1 = +$x.data('min'), r2 = +$x.data('max'); - // when the numbers are too small or have many decimal places, the - // slider may have numeric precision problems (#150) - var scale = Math.pow(10, Math.max(0, +$x.data('scale') || 0)); - r1 = Math.round(r1 * scale); r2 = Math.round(r2 * scale); - var scaleBack = function(x, scale) { - if (scale === 1) return x; - var d = Math.round(Math.log(scale) / Math.log(10)); - // to avoid problems like 3.423/100 -> 0.034230000000000003 - return (x / scale).toFixed(d); - }; - var slider_min = function() { - return filter.noUiSlider('options').range.min; - }; - var slider_max = function() { - return filter.noUiSlider('options').range.max; - }; - $input.on({ - focus: function() { - $x0.show().trigger('show'); - // first, make sure the slider div leaves at least 20px between - // the two (slider value) span's - $x0.width(Math.max(160, $span1.outerWidth() + $span2.outerWidth() + 20)); - // then, if the input is really wide or slider is vertical, - // make the slider the same width as the input - if ($x0.outerWidth() < $input.outerWidth() || data.vertical) { - $x0.outerWidth($input.outerWidth()); - } - // make sure the slider div does not reach beyond the right margin - if ($(window).width() < $x0.offset().left + $x0.width()) { - $x0.offset({ - 'left': $input.offset().left + $input.outerWidth() - $x0.outerWidth() - }); - } - }, - blur: function() { - $x0.hide().trigger('hide'); - }, - input: function() { - if ($input.val() === '') filter.val([slider_min(), slider_max()]); - }, - change: function() { - var v = $input.val().replace(/\s/g, ''); - if (v === '') return; - v = v.split('...'); - if (v.length !== 2) { - $input.parent().addClass('has-error'); - return; - } - if (v[0] === '') v[0] = slider_min(); - if (v[1] === '') v[1] = slider_max(); - $input.parent().removeClass('has-error'); - // treat date as UTC time at midnight - var strTime = function(x) { - var s = type === 'date' ? 'T00:00:00Z' : ''; - var t = new Date(x + s).getTime(); - // add 10 minutes to date since it does not hurt the date, and - // it helps avoid the tricky floating point arithmetic problems, - // e.g. sometimes the date may be a few milliseconds earlier - // than the midnight due to precision problems in noUiSlider - return type === 'date' ? t + 3600000 : t; - }; - if (inArray(type, ['date', 'time'])) { - v[0] = strTime(v[0]); - v[1] = strTime(v[1]); - } - if (v[0] != slider_min()) v[0] *= scale; - if (v[1] != slider_max()) v[1] *= scale; - filter.val(v); - } - }); - var formatDate = function(d, isoFmt) { - d = scaleBack(d, scale); - if (type === 'number') return d; - if (type === 'integer') return parseInt(d); - var x = new Date(+d); - var fmt = ('filterDateFmt' in data) ? data.filterDateFmt[i] : undefined; - if (fmt !== undefined && isoFmt === false) return x[fmt.method].apply(x, fmt.params); - if (type === 'date') { - var pad0 = function(x) { - return ('0' + x).substr(-2, 2); - }; - return x.getUTCFullYear() + '-' + pad0(1 + x.getUTCMonth()) - + '-' + pad0(x.getUTCDate()); - } else { - return x.toISOString(); - } - }; - var opts = type === 'date' ? { step: 60 * 60 * 1000 } : - type === 'integer' ? { step: 1 } : {}; - - opts.orientation = data.vertical ? 'vertical': 'horizontal'; - opts.direction = data.vertical ? 'rtl': 'ltr'; - - filter = $x.noUiSlider($.extend({ - start: [r1, r2], - range: {min: r1, max: r2}, - connect: true - }, opts)); - if (scale > 1) (function() { - var t1 = r1, t2 = r2; - var val = filter.val(); - while (val[0] > r1 || val[1] < r2) { - if (val[0] > r1) { - t1 -= val[0] - r1; - } - if (val[1] < r2) { - t2 += r2 - val[1]; - } - filter = $x.noUiSlider($.extend({ - start: [t1, t2], - range: {min: t1, max: t2}, - connect: true - }, opts), true); - val = filter.val(); - } - r1 = t1; r2 = t2; - })(); - var updateSliderText = function(v1, v2) { - $span1.text(formatDate(v1, false)); $span2.text(formatDate(v2, false)); - }; - updateSliderText(r1, r2); - var updateSlider = function(e) { - var val = filter.val(); - // turn off filter if in full range - $td.data('filter', val[0] > slider_min() || val[1] < slider_max()); - var v1 = formatDate(val[0]), v2 = formatDate(val[1]), ival; - if ($td.data('filter')) { - ival = v1 + ' ... ' + v2; - $input.attr('title', ival).val(ival).trigger('input'); - } else { - $input.attr('title', '').val(''); - } - updateSliderText(val[0], val[1]); - if (e.type === 'slide') return; // no searching when sliding only - if (server) { - table.column(i).search($td.data('filter') ? ival : '').draw(); - return; - } - table.draw(); - }; - filter.on({ - set: updateSlider, - slide: updateSlider - }); - } - - // server-side processing will be handled by R (or whatever server - // language you use); the following code is only needed for client-side - // processing - if (server) { - // if a search string has been pre-set, search now - if (searchCol) searchColumn(i, searchCol).draw(); - return; - } - - var customFilter = function(settings, data, dataIndex) { - // there is no way to attach a search function to a specific table, - // and we need to make sure a global search function is not applied to - // all tables (i.e. a range filter in a previous table should not be - // applied to the current table); we use the settings object to - // determine if we want to perform searching on the current table, - // since settings.sTableId will be different to different tables - if (table.settings()[0] !== settings) return true; - // no filter on this column or no need to filter this column - if (typeof filter === 'undefined' || !$td.data('filter')) return true; - - var r = filter.val(), v, r0, r1; - var i_data = function(i) { - if (!colReorderEnabled()) return i; - var order = table.colReorder.order(), k; - for (k = 0; k < order.length; ++k) if (order[k] === i) return k; - return i; // in theory it will never be here... - } - v = data[i_data(i)]; - if (type === 'number' || type === 'integer') { - v = parseFloat(v); - // how to handle NaN? currently exclude these rows - if (isNaN(v)) return(false); - r0 = parseFloat(scaleBack(r[0], scale)) - r1 = parseFloat(scaleBack(r[1], scale)); - if (v >= r0 && v <= r1) return true; - } else if (type === 'date' || type === 'time') { - v = new Date(v); - r0 = new Date(r[0] / scale); r1 = new Date(r[1] / scale); - if (v >= r0 && v <= r1) return true; - } else if (type === 'factor') { - if (r.length === 0 || inArray(v, r)) return true; - } else if (type === 'logical') { - if (r.length === 0) return true; - if (inArray(v === '' ? 'na' : v, r)) return true; - } - return false; - }; - - $.fn.dataTable.ext.search.push(customFilter); - - // search for the preset search strings if it is non-empty - if (searchCol) { - if (inArray(type, ['factor', 'logical'])) { - filter[0].selectize.setValue(JSON.parse(searchCol)); - } else if (type === 'character') { - $input.trigger('input'); - } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { - $input.trigger('change'); - } - } - - }); - - } - - // highlight search keywords - var highlight = function() { - var body = $(table.table().body()); - // removing the old highlighting first - body.unhighlight(); - - // don't highlight the "not found" row, so we get the rows using the api - if (table.rows({ filter: 'applied' }).data().length === 0) return; - // highlight global search keywords - body.highlight($.trim(table.search()).split(/\s+/)); - // then highlight keywords from individual column filters - if (filterRow) filterRow.each(function(i, td) { - var $td = $(td), type = $td.data('type'); - if (type !== 'character') return; - var $input = $td.children('div').first().children('input'); - var column = table.column(i).nodes().to$(), - val = $.trim($input.val()); - if (type !== 'character' || val === '') return; - column.highlight(val.split(/\s+/)); - }); - }; - - if (options.searchHighlight) { - table - .on('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth', highlight) - .on('destroy', function() { - // remove event handler - table.off('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth'); - }); - - // Set the option for escaping regex characters in our search string. This will be used - // for all future matching. - jQuery.fn.highlight.options.escapeRegex = (!options.search || !options.search.regex); - - // initial highlight for state saved conditions and initial states - highlight(); - } - - // run the callback function on the table instance - if (typeof data.callback === 'function') data.callback(table); - - // double click to edit the cell, row, column, or all cells - if (data.editable) table.on('dblclick.dt', 'tbody td', function(e) { - // only bring up the editor when the cell itself is dbclicked, and ignore - // other dbclick events bubbled up (e.g. from the ) - if (e.target !== this) return; - var target = [], immediate = false; - switch (data.editable.target) { - case 'cell': - target = [this]; - immediate = true; // edit will take effect immediately - break; - case 'row': - target = table.cells(table.cell(this).index().row, '*').nodes(); - break; - case 'column': - target = table.cells('*', table.cell(this).index().column).nodes(); - break; - case 'all': - target = table.cells().nodes(); - break; - default: - throw 'The editable parameter must be "cell", "row", "column", or "all"'; - } - var disableCols = data.editable.disable ? data.editable.disable.columns : null; - var numericCols = data.editable.numeric; - var areaCols = data.editable.area; - for (var i = 0; i < target.length; i++) { - (function(cell, current) { - var $cell = $(cell), html = $cell.html(); - var _cell = table.cell(cell), value = _cell.data(), index = _cell.index().column; - var $input; - if (inArray(index, numericCols)) { - $input = $(''); - } else if (inArray(index, areaCols)) { - $input = $(''); - } else { - $input = $(''); - } - if (!immediate) { - $cell.data('input', $input).data('html', html); - $input.attr('title', 'Hit Ctrl+Enter to finish editing, or Esc to cancel'); - } - $input.val(value); - if (inArray(index, disableCols)) { - $input.attr('readonly', '').css('filter', 'invert(25%)'); - } - $cell.empty().append($input); - if (cell === current) $input.focus(); - $input.css('width', '100%'); - - if (immediate) $input.on('blur', function(e) { - var valueNew = $input.val(); - if (valueNew != value) { - _cell.data(valueNew); - if (HTMLWidgets.shinyMode) { - changeInput('cell_edit', [cellInfo(cell)], 'DT.cellInfo', null, {priority: 'event'}); - } - // for server-side processing, users have to call replaceData() to update the table - if (!server) table.draw(false); - } else { - $cell.html(html); - } - }).on('keyup', function(e) { - // hit Escape to cancel editing - if (e.keyCode === 27) $input.trigger('blur'); - }); - - // bulk edit (row, column, or all) - if (!immediate) $input.on('keyup', function(e) { - var removeInput = function($cell, restore) { - $cell.data('input').remove(); - if (restore) $cell.html($cell.data('html')); - } - if (e.keyCode === 27) { - for (var i = 0; i < target.length; i++) { - removeInput($(target[i]), true); - } - } else if (e.keyCode === 13 && e.ctrlKey) { - // Ctrl + Enter - var cell, $cell, _cell, cellData = []; - for (var i = 0; i < target.length; i++) { - cell = target[i]; $cell = $(cell); _cell = table.cell(cell); - _cell.data($cell.data('input').val()); - HTMLWidgets.shinyMode && cellData.push(cellInfo(cell)); - removeInput($cell, false); - } - if (HTMLWidgets.shinyMode) { - changeInput('cell_edit', cellData, 'DT.cellInfo', null, {priority: "event"}); - } - if (!server) table.draw(false); - } - }); - })(target[i], this); - } - }); - - // interaction with shiny - if (!HTMLWidgets.shinyMode && !crosstalkOptions.group) return; - - var methods = {}; - var shinyData = {}; - - methods.updateCaption = function(caption) { - if (!caption) return; - $table.children('caption').replaceWith(caption); - } - - // register clear functions to remove input values when the table is removed - instance.clearInputs = {}; - - var changeInput = function(id, value, type, noCrosstalk, opts) { - var event = id; - id = el.id + '_' + id; - if (type) id = id + ':' + type; - // do not update if the new value is the same as old value - if (event !== 'cell_edit' && !/_clicked$/.test(event) && shinyData.hasOwnProperty(id) && shinyData[id] === JSON.stringify(value)) - return; - shinyData[id] = JSON.stringify(value); - if (HTMLWidgets.shinyMode && Shiny.setInputValue) { - Shiny.setInputValue(id, value, opts); - if (!instance.clearInputs[id]) instance.clearInputs[id] = function() { - Shiny.setInputValue(id, null); - } - } - - // HACK - if (event === "rows_selected" && !noCrosstalk) { - if (crosstalkOptions.group) { - var keys = crosstalkOptions.key; - var selectedKeys = null; - if (value) { - selectedKeys = []; - for (var i = 0; i < value.length; i++) { - // The value array's contents use 1-based row numbers, so we must - // convert to 0-based before indexing into the keys array. - selectedKeys.push(keys[value[i] - 1]); - } - } - instance.ctselectHandle.set(selectedKeys); - } - } - }; - - var addOne = function(x) { - return x.map(function(i) { return 1 + i; }); - }; - - var unique = function(x) { - var ux = []; - $.each(x, function(i, el){ - if ($.inArray(el, ux) === -1) ux.push(el); - }); - return ux; - } - - // change the row index of a cell - var tweakCellIndex = function(cell) { - var info = cell.index(); - // some cell may not be valid. e.g, #759 - // when using the RowGroup extension, datatables will - // generate the row label and the cells are not part of - // the data thus contain no row/col info - if (info === undefined) - return {row: null, col: null}; - if (server) { - info.row = DT_rows_current[info.row]; - } else { - info.row += 1; - } - return {row: info.row, col: info.column}; - } - - var cleanSelectedValues = function() { - changeInput('rows_selected', []); - changeInput('columns_selected', []); - changeInput('cells_selected', transposeArray2D([]), 'shiny.matrix'); - } - // #828 we should clean the selection on the server-side when the table reloads - cleanSelectedValues(); - - // a flag to indicates if select extension is initialized or not - var flagSelectExt = table.settings()[0]._select !== undefined; - // the Select extension should only be used in the client mode and - // when the selection.mode is set to none - if (data.selection.mode === 'none' && !server && flagSelectExt) { - var updateRowsSelected = function() { - var rows = table.rows({selected: true}); - var selected = []; - $.each(rows.indexes().toArray(), function(i, v) { - selected.push(v + 1); - }); - changeInput('rows_selected', selected); - } - var updateColsSelected = function() { - var columns = table.columns({selected: true}); - changeInput('columns_selected', columns.indexes().toArray()); - } - var updateCellsSelected = function() { - var cells = table.cells({selected: true}); - var selected = []; - cells.every(function() { - var row = this.index().row; - var col = this.index().column; - selected = selected.concat([[row + 1, col]]); - }); - changeInput('cells_selected', transposeArray2D(selected), 'shiny.matrix'); - } - table.on('select deselect', function(e, dt, type, indexes) { - updateRowsSelected(); - updateColsSelected(); - updateCellsSelected(); - }) - } - - var selMode = data.selection.mode, selTarget = data.selection.target; - var selDisable = data.selection.selectable === false; - if (inArray(selMode, ['single', 'multiple'])) { - var selClass = inArray(data.style, ['bootstrap', 'bootstrap4']) ? 'active' : 'selected'; - // selected1: row indices; selected2: column indices - var initSel = function(x) { - if (x === null || typeof x === 'boolean' || selTarget === 'cell') { - return {rows: [], cols: []}; - } else if (selTarget === 'row') { - return {rows: $.makeArray(x), cols: []}; - } else if (selTarget === 'column') { - return {rows: [], cols: $.makeArray(x)}; - } else if (selTarget === 'row+column') { - return {rows: $.makeArray(x.rows), cols: $.makeArray(x.cols)}; - } - } - var selected = data.selection.selected; - var selected1 = initSel(selected).rows, selected2 = initSel(selected).cols; - // selectable should contain either all positive or all non-positive values, not both - // positive values indicate "selectable" while non-positive values means "nonselectable" - // the assertion is performed on R side. (only column indicides could be zero which indicates - // the row name) - var selectable = data.selection.selectable; - var selectable1 = initSel(selectable).rows, selectable2 = initSel(selectable).cols; - - // After users reorder the rows or filter the table, we cannot use the table index - // directly. Instead, we need this function to find out the rows between the two clicks. - // If user filter the table again between the start click and the end click, the behavior - // would be undefined, but it should not be a problem. - var shiftSelRowsIndex = function(start, end) { - var indexes = server ? DT_rows_all : table.rows({ search: 'applied' }).indexes().toArray(); - start = indexes.indexOf(start); end = indexes.indexOf(end); - // if start is larger than end, we need to swap - if (start > end) { - var tmp = end; end = start; start = tmp; - } - return indexes.slice(start, end + 1); - } - - var serverRowIndex = function(clientRowIndex) { - return server ? DT_rows_current[clientRowIndex] : clientRowIndex + 1; - } - - // row, column, or cell selection - var lastClickedRow; - if (inArray(selTarget, ['row', 'row+column'])) { - // Get the current selected rows. It will also - // update the selected1's value based on the current row selection state - // Note we can't put this function inside selectRows() directly, - // the reason is method.selectRows() will override selected1's value but this - // function will add rows to selected1 (keep the existing selection), which is - // inconsistent with column and cell selection. - var selectedRows = function() { - var rows = table.rows('.' + selClass); - var idx = rows.indexes().toArray(); - if (!server) { - selected1 = addOne(idx); - return selected1; - } - idx = idx.map(function(i) { - return DT_rows_current[i]; - }); - selected1 = selMode === 'multiple' ? unique(selected1.concat(idx)) : idx; - return selected1; - } - // Change selected1's value based on selectable1, then refresh the row state - var onlyKeepSelectableRows = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected1 = []; - return; - } - if (selectable1.length === 0) return; - var nonselectable = selectable1[0] <= 0; - if (nonselectable) { - // should make selectable1 positive - selected1 = $(selected1).not(selectable1.map(function(i) { return -i; })).get(); - } else { - selected1 = $(selected1).filter(selectable1).get(); - } - } - // Change selected1's value based on selectable1, then - // refresh the row selection state according to values in selected1 - var selectRows = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableRows(); - table.$('tr.' + selClass).removeClass(selClass); - if (selected1.length === 0) return; - if (server) { - table.rows({page: 'current'}).every(function() { - if (inArray(DT_rows_current[this.index()], selected1)) { - $(this.node()).addClass(selClass); - } - }); - } else { - var selected0 = selected1.map(function(i) { return i - 1; }); - $(table.rows(selected0).nodes()).addClass(selClass); - } - } - table.on('mousedown.dt', 'tbody tr', function(e) { - var $this = $(this), thisRow = table.row(this); - if (selMode === 'multiple') { - if (e.shiftKey && lastClickedRow !== undefined) { - // select or de-select depends on the last clicked row's status - var flagSel = !$this.hasClass(selClass); - var crtClickedRow = serverRowIndex(thisRow.index()); - if (server) { - var rowsIndex = shiftSelRowsIndex(lastClickedRow, crtClickedRow); - // update current page's selClass - rowsIndex.map(function(i) { - var rowIndex = DT_rows_current.indexOf(i); - if (rowIndex >= 0) { - var row = table.row(rowIndex).nodes().to$(); - var flagRowSel = !row.hasClass(selClass); - if (flagSel === flagRowSel) row.toggleClass(selClass); - } - }); - // update selected1 - if (flagSel) { - selected1 = unique(selected1.concat(rowsIndex)); - } else { - selected1 = selected1.filter(function(index) { - return !inArray(index, rowsIndex); - }); - } - } else { - // js starts from 0 - shiftSelRowsIndex(lastClickedRow - 1, crtClickedRow - 1).map(function(value) { - var row = table.row(value).nodes().to$(); - var flagRowSel = !row.hasClass(selClass); - if (flagSel === flagRowSel) row.toggleClass(selClass); - }); - } - e.preventDefault(); - } else { - $this.toggleClass(selClass); - } - } else { - if ($this.hasClass(selClass)) { - $this.removeClass(selClass); - } else { - table.$('tr.' + selClass).removeClass(selClass); - $this.addClass(selClass); - } - } - if (server && !$this.hasClass(selClass)) { - var id = DT_rows_current[thisRow.index()]; - // remove id from selected1 since its class .selected has been removed - if (inArray(id, selected1)) selected1.splice($.inArray(id, selected1), 1); - } - selectedRows(); // update selected1's value based on selClass - selectRows(false); // only keep the selectable rows - changeInput('rows_selected', selected1); - changeInput('row_last_clicked', serverRowIndex(thisRow.index()), null, null, {priority: 'event'}); - lastClickedRow = serverRowIndex(thisRow.index()); - }); - selectRows(false); // in case users have specified pre-selected rows - // restore selected rows after the table is redrawn (e.g. sort/search/page); - // client-side tables will preserve the selections automatically; for - // server-side tables, we have to *real* row indices are in `selected1` - changeInput('rows_selected', selected1); - if (server) table.on('draw.dt', function(e) { selectRows(false); }); - methods.selectRows = function(selected, ignoreSelectable) { - selected1 = $.makeArray(selected); - selectRows(ignoreSelectable); - changeInput('rows_selected', selected1); - } - } - - if (inArray(selTarget, ['column', 'row+column'])) { - if (selTarget === 'row+column') { - $(table.columns().footer()).css('cursor', 'pointer'); - } - // update selected2's value based on selectable2 - var onlyKeepSelectableCols = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected2 = []; - return; - } - if (selectable2.length === 0) return; - var nonselectable = selectable2[0] <= 0; - if (nonselectable) { - // need to make selectable2 positive - selected2 = $(selected2).not(selectable2.map(function(i) { return -i; })).get(); - } else { - selected2 = $(selected2).filter(selectable2).get(); - } - } - // update selected2 and then - // refresh the col selection state according to values in selected2 - var selectCols = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableCols(); - // if selected2 is not a valide index (e.g., larger than the column number) - // table.columns(selected2) will fail and result in a blank table - // this is different from the table.rows(), where the out-of-range indexes - // doesn't affect at all - selected2 = $(selected2).filter(table.columns().indexes()).get(); - table.columns().nodes().flatten().to$().removeClass(selClass); - if (selected2.length > 0) - table.columns(selected2).nodes().flatten().to$().addClass(selClass); - } - var callback = function() { - var colIdx = selTarget === 'column' ? table.cell(this).index().column : - $.inArray(this, table.columns().footer()), - thisCol = $(table.column(colIdx).nodes()); - if (colIdx === -1) return; - if (thisCol.hasClass(selClass)) { - thisCol.removeClass(selClass); - selected2.splice($.inArray(colIdx, selected2), 1); - } else { - if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); - thisCol.addClass(selClass); - selected2 = selMode === 'single' ? [colIdx] : unique(selected2.concat([colIdx])); - } - selectCols(false); // update selected2 based on selectable - changeInput('columns_selected', selected2); - } - if (selTarget === 'column') { - $(table.table().body()).on('click.dt', 'td', callback); - } else { - $(table.table().footer()).on('click.dt', 'tr th', callback); - } - selectCols(false); // in case users have specified pre-selected columns - changeInput('columns_selected', selected2); - if (server) table.on('draw.dt', function(e) { selectCols(false); }); - methods.selectColumns = function(selected, ignoreSelectable) { - selected2 = $.makeArray(selected); - selectCols(ignoreSelectable); - changeInput('columns_selected', selected2); - } - } - - if (selTarget === 'cell') { - var selected3 = [], selectable3 = []; - if (selected !== null) selected3 = selected; - if (selectable !== null && typeof selectable !== 'boolean') selectable3 = selectable; - var findIndex = function(ij, sel) { - for (var i = 0; i < sel.length; i++) { - if (ij[0] === sel[i][0] && ij[1] === sel[i][1]) return i; - } - return -1; - } - // Change selected3's value based on selectable3, then refresh the cell state - var onlyKeepSelectableCells = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected3 = []; - return; - } - if (selectable3.length === 0) return; - var nonselectable = selectable3[0][0] <= 0; - var out = []; - if (nonselectable) { - selected3.map(function(ij) { - // should make selectable3 positive - if (findIndex([-ij[0], -ij[1]], selectable3) === -1) { out.push(ij); } - }); - } else { - selected3.map(function(ij) { - if (findIndex(ij, selectable3) > -1) { out.push(ij); } - }); - } - selected3 = out; - } - // Change selected3's value based on selectable3, then - // refresh the cell selection state according to values in selected3 - var selectCells = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableCells(); - table.$('td.' + selClass).removeClass(selClass); - if (selected3.length === 0) return; - if (server) { - table.cells({page: 'current'}).every(function() { - var info = tweakCellIndex(this); - if (findIndex([info.row, info.col], selected3) > -1) - $(this.node()).addClass(selClass); - }); - } else { - selected3.map(function(ij) { - $(table.cell(ij[0] - 1, ij[1]).node()).addClass(selClass); - }); - } - }; - table.on('click.dt', 'tbody td', function() { - var $this = $(this), info = tweakCellIndex(table.cell(this)); - if ($this.hasClass(selClass)) { - $this.removeClass(selClass); - selected3.splice(findIndex([info.row, info.col], selected3), 1); - } else { - if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); - $this.addClass(selClass); - selected3 = selMode === 'single' ? [[info.row, info.col]] : - unique(selected3.concat([[info.row, info.col]])); - } - selectCells(false); // must call this to update selected3 based on selectable3 - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - }); - selectCells(false); // in case users have specified pre-selected columns - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - - if (server) table.on('draw.dt', function(e) { selectCells(false); }); - methods.selectCells = function(selected, ignoreSelectable) { - selected3 = selected ? selected : []; - selectCells(ignoreSelectable); - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - } - } - } - - // expose some table info to Shiny - var updateTableInfo = function(e, settings) { - // TODO: is anyone interested in the page info? - // changeInput('page_info', table.page.info()); - var updateRowInfo = function(id, modifier) { - var idx; - if (server) { - idx = modifier.page === 'current' ? DT_rows_current : DT_rows_all; - } else { - var rows = table.rows($.extend({ - search: 'applied', - page: 'all' - }, modifier)); - idx = addOne(rows.indexes().toArray()); - } - changeInput('rows' + '_' + id, idx); - }; - updateRowInfo('current', {page: 'current'}); - updateRowInfo('all', {}); - } - table.on('draw.dt', updateTableInfo); - updateTableInfo(); - - // state info - table.on('draw.dt column-visibility.dt', function() { - changeInput('state', table.state()); - }); - changeInput('state', table.state()); - - // search info - var updateSearchInfo = function() { - changeInput('search', table.search()); - if (filterRow) changeInput('search_columns', filterRow.toArray().map(function(td) { - return $(td).find('input').first().val(); - })); - } - table.on('draw.dt', updateSearchInfo); - updateSearchInfo(); - - var cellInfo = function(thiz) { - var info = tweakCellIndex(table.cell(thiz)); - info.value = table.cell(thiz).data(); - return info; - } - // the current cell clicked on - table.on('click.dt', 'tbody td', function() { - changeInput('cell_clicked', cellInfo(this), null, null, {priority: 'event'}); - }) - changeInput('cell_clicked', {}); - - // do not trigger table selection when clicking on links unless they have classes - table.on('click.dt', 'tbody td a', function(e) { - if (this.className === '') e.stopPropagation(); - }); - - methods.addRow = function(data, rowname, resetPaging) { - var n = table.columns().indexes().length, d = n - data.length; - if (d === 1) { - data = rowname.concat(data) - } else if (d !== 0) { - console.log(data); - console.log(table.columns().indexes()); - throw 'New data must be of the same length as current data (' + n + ')'; - }; - table.row.add(data).draw(resetPaging); - } - - methods.updateSearch = function(keywords) { - if (keywords.global !== null) - $(table.table().container()).find('input[type=search]').first() - .val(keywords.global).trigger('input'); - var columns = keywords.columns; - if (!filterRow || columns === null) return; - filterRow.toArray().map(function(td, i) { - var v = typeof columns === 'string' ? columns : columns[i]; - if (typeof v === 'undefined') { - console.log('The search keyword for column ' + i + ' is undefined') - return; - } - $(td).find('input').first().val(v); - searchColumn(i, v); - }); - table.draw(); - } - - methods.hideCols = function(hide, reset) { - if (reset) table.columns().visible(true, false); - table.columns(hide).visible(false); - } - - methods.showCols = function(show, reset) { - if (reset) table.columns().visible(false, false); - table.columns(show).visible(true); - } - - methods.colReorder = function(order, origOrder) { - table.colReorder.order(order, origOrder); - } - - methods.selectPage = function(page) { - if (table.page.info().pages < page || page < 1) { - throw 'Selected page is out of range'; - }; - table.page(page - 1).draw(false); - } - - methods.reloadData = function(resetPaging, clearSelection) { - // empty selections first if necessary - if (methods.selectRows && inArray('row', clearSelection)) methods.selectRows([]); - if (methods.selectColumns && inArray('column', clearSelection)) methods.selectColumns([]); - if (methods.selectCells && inArray('cell', clearSelection)) methods.selectCells([]); - table.ajax.reload(null, resetPaging); - } - - // update table filters (set new limits of sliders) - methods.updateFilters = function(newProps) { - // loop through each filter in the filter row - filterRow.each(function(i, td) { - var k = i; - if (filterRow.length > newProps.length) { - if (i === 0) return; // first column is row names - k = i - 1; - } - // Update the filters to reflect the updated data. - // Allow "falsy" (e.g. NULL) to signify a no-op. - if (newProps[k]) { - setFilterProps(td, newProps[k]); - } - }); - }; - - table.shinyMethods = methods; - }, - resize: function(el, width, height, instance) { - if (instance.data) this.renderValue(el, instance.data, instance); - - // dynamically adjust height if fillContainer = TRUE - if (instance.fillContainer) - this.fillAvailableHeight(el, height); - - this.adjustWidth(el); - }, - - // dynamically set the scroll body to fill available height - // (used with fillContainer = TRUE) - fillAvailableHeight: function(el, availableHeight) { - - // see how much of the table is occupied by header/footer elements - // and use that to compute a target scroll body height - var dtWrapper = $(el).find('div.dataTables_wrapper'); - var dtScrollBody = $(el).find($('div.dataTables_scrollBody')); - var framingHeight = dtWrapper.innerHeight() - dtScrollBody.innerHeight(); - var scrollBodyHeight = availableHeight - framingHeight; - - // we need to set `max-height` to none as datatables library now sets this - // to a fixed height, disabling the ability to resize to fill the window, - // as it will be set to a fixed 100px under such circumstances, e.g., RStudio IDE, - // or FlexDashboard - // see https://github.com/rstudio/DT/issues/951#issuecomment-1026464509 - dtScrollBody.css('max-height', 'none'); - // set the height - dtScrollBody.height(scrollBodyHeight + 'px'); - }, - - // adjust the width of columns; remove the hard-coded widths on table and the - // scroll header when scrollX/Y are enabled - adjustWidth: function(el) { - var $el = $(el), table = $el.data('datatable'); - if (table) table.columns.adjust(); - $el.find('.dataTables_scrollHeadInner').css('width', '') - .children('table').css('margin-left', ''); - } -}); - - if (!HTMLWidgets.shinyMode) return; - - Shiny.addCustomMessageHandler('datatable-calls', function(data) { - var id = data.id; - var el = document.getElementById(id); - var table = el ? $(el).data('datatable') : null; - if (!table) { - console.log("Couldn't find table with id " + id); - return; - } - - var methods = table.shinyMethods, call = data.call; - if (methods[call.method]) { - methods[call.method].apply(table, call.args); - } else { - console.log("Unknown method " + call.method); - } - }); - -})(); diff --git a/content/find/all/index_files/datatables-css/datatables-crosstalk.css b/content/find/all/index_files/datatables-css/datatables-crosstalk.css deleted file mode 100644 index fb5bae84..00000000 --- a/content/find/all/index_files/datatables-css/datatables-crosstalk.css +++ /dev/null @@ -1,23 +0,0 @@ -.dt-crosstalk-fade { - opacity: 0.2; -} - -html body div.DTS div.dataTables_scrollBody { - background: none; -} - - -/* -Fix https://github.com/rstudio/DT/issues/563 -If the `table.display` is set to "block" (e.g., pkgdown), the browser will display -datatable objects strangely. The search panel and the page buttons will still be -in full-width but the table body will be "compact" and shorter. -In therory, having this attributes will affect `dom="t"` -with `display: block` users. But in reality, there should be no one. -We may remove the below lines in the future if the upstream agree to have this there. -See https://github.com/DataTables/DataTablesSrc/issues/160 -*/ - -table.dataTable { - display: table; -} diff --git a/content/find/all/index_files/header-attrs/header-attrs.js b/content/find/all/index_files/header-attrs/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/content/find/all/index_files/header-attrs/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/content/find/all/index_files/htmlwidgets/htmlwidgets.js b/content/find/all/index_files/htmlwidgets/htmlwidgets.js deleted file mode 100644 index da8b2367..00000000 --- a/content/find/all/index_files/htmlwidgets/htmlwidgets.js +++ /dev/null @@ -1,903 +0,0 @@ -(function() { - // If window.HTMLWidgets is already defined, then use it; otherwise create a - // new object. This allows preceding code to set options that affect the - // initialization process (though none currently exist). - window.HTMLWidgets = window.HTMLWidgets || {}; - - // See if we're running in a viewer pane. If not, we're in a web browser. - var viewerMode = window.HTMLWidgets.viewerMode = - /\bviewer_pane=1\b/.test(window.location); - - // See if we're running in Shiny mode. If not, it's a static document. - // Note that static widgets can appear in both Shiny and static modes, but - // obviously, Shiny widgets can only appear in Shiny apps/documents. - var shinyMode = window.HTMLWidgets.shinyMode = - typeof(window.Shiny) !== "undefined" && !!window.Shiny.outputBindings; - - // We can't count on jQuery being available, so we implement our own - // version if necessary. - function querySelectorAll(scope, selector) { - if (typeof(jQuery) !== "undefined" && scope instanceof jQuery) { - return scope.find(selector); - } - if (scope.querySelectorAll) { - return scope.querySelectorAll(selector); - } - } - - function asArray(value) { - if (value === null) - return []; - if ($.isArray(value)) - return value; - return [value]; - } - - // Implement jQuery's extend - function extend(target /*, ... */) { - if (arguments.length == 1) { - return target; - } - for (var i = 1; i < arguments.length; i++) { - var source = arguments[i]; - for (var prop in source) { - if (source.hasOwnProperty(prop)) { - target[prop] = source[prop]; - } - } - } - return target; - } - - // IE8 doesn't support Array.forEach. - function forEach(values, callback, thisArg) { - if (values.forEach) { - values.forEach(callback, thisArg); - } else { - for (var i = 0; i < values.length; i++) { - callback.call(thisArg, values[i], i, values); - } - } - } - - // Replaces the specified method with the return value of funcSource. - // - // Note that funcSource should not BE the new method, it should be a function - // that RETURNS the new method. funcSource receives a single argument that is - // the overridden method, it can be called from the new method. The overridden - // method can be called like a regular function, it has the target permanently - // bound to it so "this" will work correctly. - function overrideMethod(target, methodName, funcSource) { - var superFunc = target[methodName] || function() {}; - var superFuncBound = function() { - return superFunc.apply(target, arguments); - }; - target[methodName] = funcSource(superFuncBound); - } - - // Add a method to delegator that, when invoked, calls - // delegatee.methodName. If there is no such method on - // the delegatee, but there was one on delegator before - // delegateMethod was called, then the original version - // is invoked instead. - // For example: - // - // var a = { - // method1: function() { console.log('a1'); } - // method2: function() { console.log('a2'); } - // }; - // var b = { - // method1: function() { console.log('b1'); } - // }; - // delegateMethod(a, b, "method1"); - // delegateMethod(a, b, "method2"); - // a.method1(); - // a.method2(); - // - // The output would be "b1", "a2". - function delegateMethod(delegator, delegatee, methodName) { - var inherited = delegator[methodName]; - delegator[methodName] = function() { - var target = delegatee; - var method = delegatee[methodName]; - - // The method doesn't exist on the delegatee. Instead, - // call the method on the delegator, if it exists. - if (!method) { - target = delegator; - method = inherited; - } - - if (method) { - return method.apply(target, arguments); - } - }; - } - - // Implement a vague facsimilie of jQuery's data method - function elementData(el, name, value) { - if (arguments.length == 2) { - return el["htmlwidget_data_" + name]; - } else if (arguments.length == 3) { - el["htmlwidget_data_" + name] = value; - return el; - } else { - throw new Error("Wrong number of arguments for elementData: " + - arguments.length); - } - } - - // http://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex - function escapeRegExp(str) { - return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); - } - - function hasClass(el, className) { - var re = new RegExp("\\b" + escapeRegExp(className) + "\\b"); - return re.test(el.className); - } - - // elements - array (or array-like object) of HTML elements - // className - class name to test for - // include - if true, only return elements with given className; - // if false, only return elements *without* given className - function filterByClass(elements, className, include) { - var results = []; - for (var i = 0; i < elements.length; i++) { - if (hasClass(elements[i], className) == include) - results.push(elements[i]); - } - return results; - } - - function on(obj, eventName, func) { - if (obj.addEventListener) { - obj.addEventListener(eventName, func, false); - } else if (obj.attachEvent) { - obj.attachEvent(eventName, func); - } - } - - function off(obj, eventName, func) { - if (obj.removeEventListener) - obj.removeEventListener(eventName, func, false); - else if (obj.detachEvent) { - obj.detachEvent(eventName, func); - } - } - - // Translate array of values to top/right/bottom/left, as usual with - // the "padding" CSS property - // https://developer.mozilla.org/en-US/docs/Web/CSS/padding - function unpackPadding(value) { - if (typeof(value) === "number") - value = [value]; - if (value.length === 1) { - return {top: value[0], right: value[0], bottom: value[0], left: value[0]}; - } - if (value.length === 2) { - return {top: value[0], right: value[1], bottom: value[0], left: value[1]}; - } - if (value.length === 3) { - return {top: value[0], right: value[1], bottom: value[2], left: value[1]}; - } - if (value.length === 4) { - return {top: value[0], right: value[1], bottom: value[2], left: value[3]}; - } - } - - // Convert an unpacked padding object to a CSS value - function paddingToCss(paddingObj) { - return paddingObj.top + "px " + paddingObj.right + "px " + paddingObj.bottom + "px " + paddingObj.left + "px"; - } - - // Makes a number suitable for CSS - function px(x) { - if (typeof(x) === "number") - return x + "px"; - else - return x; - } - - // Retrieves runtime widget sizing information for an element. - // The return value is either null, or an object with fill, padding, - // defaultWidth, defaultHeight fields. - function sizingPolicy(el) { - var sizingEl = document.querySelector("script[data-for='" + el.id + "'][type='application/htmlwidget-sizing']"); - if (!sizingEl) - return null; - var sp = JSON.parse(sizingEl.textContent || sizingEl.text || "{}"); - if (viewerMode) { - return sp.viewer; - } else { - return sp.browser; - } - } - - // @param tasks Array of strings (or falsy value, in which case no-op). - // Each element must be a valid JavaScript expression that yields a - // function. Or, can be an array of objects with "code" and "data" - // properties; in this case, the "code" property should be a string - // of JS that's an expr that yields a function, and "data" should be - // an object that will be added as an additional argument when that - // function is called. - // @param target The object that will be "this" for each function - // execution. - // @param args Array of arguments to be passed to the functions. (The - // same arguments will be passed to all functions.) - function evalAndRun(tasks, target, args) { - if (tasks) { - forEach(tasks, function(task) { - var theseArgs = args; - if (typeof(task) === "object") { - theseArgs = theseArgs.concat([task.data]); - task = task.code; - } - var taskFunc = tryEval(task); - if (typeof(taskFunc) !== "function") { - throw new Error("Task must be a function! Source:\n" + task); - } - taskFunc.apply(target, theseArgs); - }); - } - } - - // Attempt eval() both with and without enclosing in parentheses. - // Note that enclosing coerces a function declaration into - // an expression that eval() can parse - // (otherwise, a SyntaxError is thrown) - function tryEval(code) { - var result = null; - try { - result = eval("(" + code + ")"); - } catch(error) { - if (!(error instanceof SyntaxError)) { - throw error; - } - try { - result = eval(code); - } catch(e) { - if (e instanceof SyntaxError) { - throw error; - } else { - throw e; - } - } - } - return result; - } - - function initSizing(el) { - var sizing = sizingPolicy(el); - if (!sizing) - return; - - var cel = document.getElementById("htmlwidget_container"); - if (!cel) - return; - - if (typeof(sizing.padding) !== "undefined") { - document.body.style.margin = "0"; - document.body.style.padding = paddingToCss(unpackPadding(sizing.padding)); - } - - if (sizing.fill) { - document.body.style.overflow = "hidden"; - document.body.style.width = "100%"; - document.body.style.height = "100%"; - document.documentElement.style.width = "100%"; - document.documentElement.style.height = "100%"; - if (cel) { - cel.style.position = "absolute"; - var pad = unpackPadding(sizing.padding); - cel.style.top = pad.top + "px"; - cel.style.right = pad.right + "px"; - cel.style.bottom = pad.bottom + "px"; - cel.style.left = pad.left + "px"; - el.style.width = "100%"; - el.style.height = "100%"; - } - - return { - getWidth: function() { return cel.offsetWidth; }, - getHeight: function() { return cel.offsetHeight; } - }; - - } else { - el.style.width = px(sizing.width); - el.style.height = px(sizing.height); - - return { - getWidth: function() { return el.offsetWidth; }, - getHeight: function() { return el.offsetHeight; } - }; - } - } - - // Default implementations for methods - var defaults = { - find: function(scope) { - return querySelectorAll(scope, "." + this.name); - }, - renderError: function(el, err) { - var $el = $(el); - - this.clearError(el); - - // Add all these error classes, as Shiny does - var errClass = "shiny-output-error"; - if (err.type !== null) { - // use the classes of the error condition as CSS class names - errClass = errClass + " " + $.map(asArray(err.type), function(type) { - return errClass + "-" + type; - }).join(" "); - } - errClass = errClass + " htmlwidgets-error"; - - // Is el inline or block? If inline or inline-block, just display:none it - // and add an inline error. - var display = $el.css("display"); - $el.data("restore-display-mode", display); - - if (display === "inline" || display === "inline-block") { - $el.hide(); - if (err.message !== "") { - var errorSpan = $("").addClass(errClass); - errorSpan.text(err.message); - $el.after(errorSpan); - } - } else if (display === "block") { - // If block, add an error just after the el, set visibility:none on the - // el, and position the error to be on top of the el. - // Mark it with a unique ID and CSS class so we can remove it later. - $el.css("visibility", "hidden"); - if (err.message !== "") { - var errorDiv = $("
    ").addClass(errClass).css("position", "absolute") - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - // setting width can push out the page size, forcing otherwise - // unnecessary scrollbars to appear and making it impossible for - // the element to shrink; so use max-width instead - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - errorDiv.text(err.message); - $el.after(errorDiv); - - // Really dumb way to keep the size/position of the error in sync with - // the parent element as the window is resized or whatever. - var intId = setInterval(function() { - if (!errorDiv[0].parentElement) { - clearInterval(intId); - return; - } - errorDiv - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - }, 500); - } - } - }, - clearError: function(el) { - var $el = $(el); - var display = $el.data("restore-display-mode"); - $el.data("restore-display-mode", null); - - if (display === "inline" || display === "inline-block") { - if (display) - $el.css("display", display); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } else if (display === "block"){ - $el.css("visibility", "inherit"); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } - }, - sizing: {} - }; - - // Called by widget bindings to register a new type of widget. The definition - // object can contain the following properties: - // - name (required) - A string indicating the binding name, which will be - // used by default as the CSS classname to look for. - // - initialize (optional) - A function(el) that will be called once per - // widget element; if a value is returned, it will be passed as the third - // value to renderValue. - // - renderValue (required) - A function(el, data, initValue) that will be - // called with data. Static contexts will cause this to be called once per - // element; Shiny apps will cause this to be called multiple times per - // element, as the data changes. - window.HTMLWidgets.widget = function(definition) { - if (!definition.name) { - throw new Error("Widget must have a name"); - } - if (!definition.type) { - throw new Error("Widget must have a type"); - } - // Currently we only support output widgets - if (definition.type !== "output") { - throw new Error("Unrecognized widget type '" + definition.type + "'"); - } - // TODO: Verify that .name is a valid CSS classname - - // Support new-style instance-bound definitions. Old-style class-bound - // definitions have one widget "object" per widget per type/class of - // widget; the renderValue and resize methods on such widget objects - // take el and instance arguments, because the widget object can't - // store them. New-style instance-bound definitions have one widget - // object per widget instance; the definition that's passed in doesn't - // provide renderValue or resize methods at all, just the single method - // factory(el, width, height) - // which returns an object that has renderValue(x) and resize(w, h). - // This enables a far more natural programming style for the widget - // author, who can store per-instance state using either OO-style - // instance fields or functional-style closure variables (I guess this - // is in contrast to what can only be called C-style pseudo-OO which is - // what we required before). - if (definition.factory) { - definition = createLegacyDefinitionAdapter(definition); - } - - if (!definition.renderValue) { - throw new Error("Widget must have a renderValue function"); - } - - // For static rendering (non-Shiny), use a simple widget registration - // scheme. We also use this scheme for Shiny apps/documents that also - // contain static widgets. - window.HTMLWidgets.widgets = window.HTMLWidgets.widgets || []; - // Merge defaults into the definition; don't mutate the original definition. - var staticBinding = extend({}, defaults, definition); - overrideMethod(staticBinding, "find", function(superfunc) { - return function(scope) { - var results = superfunc(scope); - // Filter out Shiny outputs, we only want the static kind - return filterByClass(results, "html-widget-output", false); - }; - }); - window.HTMLWidgets.widgets.push(staticBinding); - - if (shinyMode) { - // Shiny is running. Register the definition with an output binding. - // The definition itself will not be the output binding, instead - // we will make an output binding object that delegates to the - // definition. This is because we foolishly used the same method - // name (renderValue) for htmlwidgets definition and Shiny bindings - // but they actually have quite different semantics (the Shiny - // bindings receive data that includes lots of metadata that it - // strips off before calling htmlwidgets renderValue). We can't - // just ignore the difference because in some widgets it's helpful - // to call this.renderValue() from inside of resize(), and if - // we're not delegating, then that call will go to the Shiny - // version instead of the htmlwidgets version. - - // Merge defaults with definition, without mutating either. - var bindingDef = extend({}, defaults, definition); - - // This object will be our actual Shiny binding. - var shinyBinding = new Shiny.OutputBinding(); - - // With a few exceptions, we'll want to simply use the bindingDef's - // version of methods if they are available, otherwise fall back to - // Shiny's defaults. NOTE: If Shiny's output bindings gain additional - // methods in the future, and we want them to be overrideable by - // HTMLWidget binding definitions, then we'll need to add them to this - // list. - delegateMethod(shinyBinding, bindingDef, "getId"); - delegateMethod(shinyBinding, bindingDef, "onValueChange"); - delegateMethod(shinyBinding, bindingDef, "onValueError"); - delegateMethod(shinyBinding, bindingDef, "renderError"); - delegateMethod(shinyBinding, bindingDef, "clearError"); - delegateMethod(shinyBinding, bindingDef, "showProgress"); - - // The find, renderValue, and resize are handled differently, because we - // want to actually decorate the behavior of the bindingDef methods. - - shinyBinding.find = function(scope) { - var results = bindingDef.find(scope); - - // Only return elements that are Shiny outputs, not static ones - var dynamicResults = results.filter(".html-widget-output"); - - // It's possible that whatever caused Shiny to think there might be - // new dynamic outputs, also caused there to be new static outputs. - // Since there might be lots of different htmlwidgets bindings, we - // schedule execution for later--no need to staticRender multiple - // times. - if (results.length !== dynamicResults.length) - scheduleStaticRender(); - - return dynamicResults; - }; - - // Wrap renderValue to handle initialization, which unfortunately isn't - // supported natively by Shiny at the time of this writing. - - shinyBinding.renderValue = function(el, data) { - Shiny.renderDependencies(data.deps); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var i = 0; data.evals && i < data.evals.length; i++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[i]); - } - if (!bindingDef.renderOnNullValue) { - if (data.x === null) { - el.style.visibility = "hidden"; - return; - } else { - el.style.visibility = "inherit"; - } - } - if (!elementData(el, "initialized")) { - initSizing(el); - - elementData(el, "initialized", true); - if (bindingDef.initialize) { - var result = bindingDef.initialize(el, el.offsetWidth, - el.offsetHeight); - elementData(el, "init_result", result); - } - } - bindingDef.renderValue(el, data.x, elementData(el, "init_result")); - evalAndRun(data.jsHooks.render, elementData(el, "init_result"), [el, data.x]); - }; - - // Only override resize if bindingDef implements it - if (bindingDef.resize) { - shinyBinding.resize = function(el, width, height) { - // Shiny can call resize before initialize/renderValue have been - // called, which doesn't make sense for widgets. - if (elementData(el, "initialized")) { - bindingDef.resize(el, width, height, elementData(el, "init_result")); - } - }; - } - - Shiny.outputBindings.register(shinyBinding, bindingDef.name); - } - }; - - var scheduleStaticRenderTimerId = null; - function scheduleStaticRender() { - if (!scheduleStaticRenderTimerId) { - scheduleStaticRenderTimerId = setTimeout(function() { - scheduleStaticRenderTimerId = null; - window.HTMLWidgets.staticRender(); - }, 1); - } - } - - // Render static widgets after the document finishes loading - // Statically render all elements that are of this widget's class - window.HTMLWidgets.staticRender = function() { - var bindings = window.HTMLWidgets.widgets || []; - forEach(bindings, function(binding) { - var matches = binding.find(document.documentElement); - forEach(matches, function(el) { - var sizeObj = initSizing(el, binding); - - if (hasClass(el, "html-widget-static-bound")) - return; - el.className = el.className + " html-widget-static-bound"; - - var initResult; - if (binding.initialize) { - initResult = binding.initialize(el, - sizeObj ? sizeObj.getWidth() : el.offsetWidth, - sizeObj ? sizeObj.getHeight() : el.offsetHeight - ); - elementData(el, "init_result", initResult); - } - - if (binding.resize) { - var lastSize = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - var resizeHandler = function(e) { - var size = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - if (size.w === 0 && size.h === 0) - return; - if (size.w === lastSize.w && size.h === lastSize.h) - return; - lastSize = size; - binding.resize(el, size.w, size.h, initResult); - }; - - on(window, "resize", resizeHandler); - - // This is needed for cases where we're running in a Shiny - // app, but the widget itself is not a Shiny output, but - // rather a simple static widget. One example of this is - // an rmarkdown document that has runtime:shiny and widget - // that isn't in a render function. Shiny only knows to - // call resize handlers for Shiny outputs, not for static - // widgets, so we do it ourselves. - if (window.jQuery) { - window.jQuery(document).on( - "shown.htmlwidgets shown.bs.tab.htmlwidgets shown.bs.collapse.htmlwidgets", - resizeHandler - ); - window.jQuery(document).on( - "hidden.htmlwidgets hidden.bs.tab.htmlwidgets hidden.bs.collapse.htmlwidgets", - resizeHandler - ); - } - - // This is needed for the specific case of ioslides, which - // flips slides between display:none and display:block. - // Ideally we would not have to have ioslide-specific code - // here, but rather have ioslides raise a generic event, - // but the rmarkdown package just went to CRAN so the - // window to getting that fixed may be long. - if (window.addEventListener) { - // It's OK to limit this to window.addEventListener - // browsers because ioslides itself only supports - // such browsers. - on(document, "slideenter", resizeHandler); - on(document, "slideleave", resizeHandler); - } - } - - var scriptData = document.querySelector("script[data-for='" + el.id + "'][type='application/json']"); - if (scriptData) { - var data = JSON.parse(scriptData.textContent || scriptData.text); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var k = 0; data.evals && k < data.evals.length; k++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[k]); - } - binding.renderValue(el, data.x, initResult); - evalAndRun(data.jsHooks.render, initResult, [el, data.x]); - } - }); - }); - - invokePostRenderHandlers(); - } - - - function has_jQuery3() { - if (!window.jQuery) { - return false; - } - var $version = window.jQuery.fn.jquery; - var $major_version = parseInt($version.split(".")[0]); - return $major_version >= 3; - } - - /* - / Shiny 1.4 bumped jQuery from 1.x to 3.x which means jQuery's - / on-ready handler (i.e., $(fn)) is now asyncronous (i.e., it now - / really means $(setTimeout(fn)). - / https://jquery.com/upgrade-guide/3.0/#breaking-change-document-ready-handlers-are-now-asynchronous - / - / Since Shiny uses $() to schedule initShiny, shiny>=1.4 calls initShiny - / one tick later than it did before, which means staticRender() is - / called renderValue() earlier than (advanced) widget authors might be expecting. - / https://github.com/rstudio/shiny/issues/2630 - / - / For a concrete example, leaflet has some methods (e.g., updateBounds) - / which reference Shiny methods registered in initShiny (e.g., setInputValue). - / Since leaflet is privy to this life-cycle, it knows to use setTimeout() to - / delay execution of those methods (until Shiny methods are ready) - / https://github.com/rstudio/leaflet/blob/18ec981/javascript/src/index.js#L266-L268 - / - / Ideally widget authors wouldn't need to use this setTimeout() hack that - / leaflet uses to call Shiny methods on a staticRender(). In the long run, - / the logic initShiny should be broken up so that method registration happens - / right away, but binding happens later. - */ - function maybeStaticRenderLater() { - if (shinyMode && has_jQuery3()) { - window.jQuery(window.HTMLWidgets.staticRender); - } else { - window.HTMLWidgets.staticRender(); - } - } - - if (document.addEventListener) { - document.addEventListener("DOMContentLoaded", function() { - document.removeEventListener("DOMContentLoaded", arguments.callee, false); - maybeStaticRenderLater(); - }, false); - } else if (document.attachEvent) { - document.attachEvent("onreadystatechange", function() { - if (document.readyState === "complete") { - document.detachEvent("onreadystatechange", arguments.callee); - maybeStaticRenderLater(); - } - }); - } - - - window.HTMLWidgets.getAttachmentUrl = function(depname, key) { - // If no key, default to the first item - if (typeof(key) === "undefined") - key = 1; - - var link = document.getElementById(depname + "-" + key + "-attachment"); - if (!link) { - throw new Error("Attachment " + depname + "/" + key + " not found in document"); - } - return link.getAttribute("href"); - }; - - window.HTMLWidgets.dataframeToD3 = function(df) { - var names = []; - var length; - for (var name in df) { - if (df.hasOwnProperty(name)) - names.push(name); - if (typeof(df[name]) !== "object" || typeof(df[name].length) === "undefined") { - throw new Error("All fields must be arrays"); - } else if (typeof(length) !== "undefined" && length !== df[name].length) { - throw new Error("All fields must be arrays of the same length"); - } - length = df[name].length; - } - var results = []; - var item; - for (var row = 0; row < length; row++) { - item = {}; - for (var col = 0; col < names.length; col++) { - item[names[col]] = df[names[col]][row]; - } - results.push(item); - } - return results; - }; - - window.HTMLWidgets.transposeArray2D = function(array) { - if (array.length === 0) return array; - var newArray = array[0].map(function(col, i) { - return array.map(function(row) { - return row[i] - }) - }); - return newArray; - }; - // Split value at splitChar, but allow splitChar to be escaped - // using escapeChar. Any other characters escaped by escapeChar - // will be included as usual (including escapeChar itself). - function splitWithEscape(value, splitChar, escapeChar) { - var results = []; - var escapeMode = false; - var currentResult = ""; - for (var pos = 0; pos < value.length; pos++) { - if (!escapeMode) { - if (value[pos] === splitChar) { - results.push(currentResult); - currentResult = ""; - } else if (value[pos] === escapeChar) { - escapeMode = true; - } else { - currentResult += value[pos]; - } - } else { - currentResult += value[pos]; - escapeMode = false; - } - } - if (currentResult !== "") { - results.push(currentResult); - } - return results; - } - // Function authored by Yihui/JJ Allaire - window.HTMLWidgets.evaluateStringMember = function(o, member) { - var parts = splitWithEscape(member, '.', '\\'); - for (var i = 0, l = parts.length; i < l; i++) { - var part = parts[i]; - // part may be a character or 'numeric' member name - if (o !== null && typeof o === "object" && part in o) { - if (i == (l - 1)) { // if we are at the end of the line then evalulate - if (typeof o[part] === "string") - o[part] = tryEval(o[part]); - } else { // otherwise continue to next embedded object - o = o[part]; - } - } - } - }; - - // Retrieve the HTMLWidget instance (i.e. the return value of an - // HTMLWidget binding's initialize() or factory() function) - // associated with an element, or null if none. - window.HTMLWidgets.getInstance = function(el) { - return elementData(el, "init_result"); - }; - - // Finds the first element in the scope that matches the selector, - // and returns the HTMLWidget instance (i.e. the return value of - // an HTMLWidget binding's initialize() or factory() function) - // associated with that element, if any. If no element matches the - // selector, or the first matching element has no HTMLWidget - // instance associated with it, then null is returned. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.find = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var el = scope.querySelector(selector); - if (el === null) { - return null; - } else { - return window.HTMLWidgets.getInstance(el); - } - }; - - // Finds all elements in the scope that match the selector, and - // returns the HTMLWidget instances (i.e. the return values of - // an HTMLWidget binding's initialize() or factory() function) - // associated with the elements, in an array. If elements that - // match the selector don't have an associated HTMLWidget - // instance, the returned array will contain nulls. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.findAll = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var nodes = scope.querySelectorAll(selector); - var results = []; - for (var i = 0; i < nodes.length; i++) { - results.push(window.HTMLWidgets.getInstance(nodes[i])); - } - return results; - }; - - var postRenderHandlers = []; - function invokePostRenderHandlers() { - while (postRenderHandlers.length) { - var handler = postRenderHandlers.shift(); - if (handler) { - handler(); - } - } - } - - // Register the given callback function to be invoked after the - // next time static widgets are rendered. - window.HTMLWidgets.addPostRenderHandler = function(callback) { - postRenderHandlers.push(callback); - }; - - // Takes a new-style instance-bound definition, and returns an - // old-style class-bound definition. This saves us from having - // to rewrite all the logic in this file to accomodate both - // types of definitions. - function createLegacyDefinitionAdapter(defn) { - var result = { - name: defn.name, - type: defn.type, - initialize: function(el, width, height) { - return defn.factory(el, width, height); - }, - renderValue: function(el, x, instance) { - return instance.renderValue(x); - }, - resize: function(el, width, height, instance) { - return instance.resize(width, height); - } - }; - - if (defn.find) - result.find = defn.find; - if (defn.renderError) - result.renderError = defn.renderError; - if (defn.clearError) - result.clearError = defn.clearError; - - return result; - } -})(); - diff --git a/content/find/parsnip/index.Rmd b/content/find/parsnip/index.Rmd deleted file mode 100644 index 35308738..00000000 --- a/content/find/parsnip/index.Rmd +++ /dev/null @@ -1,120 +0,0 @@ ---- -subtitle: Parsnip -title: Search parsnip models -weight: 2 -description: | - Find model types, engines, and arguments to fit and predict in the tidymodels framework. ---- - -To learn about the parsnip package, see [*Get Started: Build a Model*](/start/models/). Use the tables below to find [model types and engines](#models) and to explore [model arguments](#model-args). - -```{r ex_setup, include=FALSE} -knitr::opts_chunk$set( - message = FALSE, - digits = 3, - collapse = TRUE, - comment = "#>" - ) -options(digits = 3) -script <- here::here("static/code/get_pkgdown_urls.R") -source(script) -library(DT) -library(tibble) -pkgs <- - tibble( - pkg = c("agua", "baguette", "bonsai", "censored", "discrim", "modeltime", - "multilevelmod", "parsnip", "plsmod", "poissonreg", "rules"), - base_url = case_when(pkg == "modeltime" ~ "https://business-science.github.io/modeltime/", - TRUE ~ paste0("https://", pkg, ".tidymodels.org/")) - ) -library(discrim) -library(parsnip) -library(poissonreg) -library(plsmod) -library(baguette) -library(rules) -library(modeltime) -library(multilevelmod) -library(censored) -library(bonsai) -library(agua) -``` - -## Explore models {#models} - -```{r model-table, include = FALSE} -mod_names <- get_from_env("models") - -model_info <- - map_dfr(mod_names, ~ get_from_env(paste0(.x, "_predict")) %>% mutate(alias = .x)) - -model_modes <- - model_info %>% - distinct(alias, mode) %>% - group_by(alias) %>% - summarize(mode = paste0(sort(mode), collapse = ", ")) - -model_eng <- - model_info %>% - distinct(alias, engine) %>% - group_by(alias) %>% - summarize(engine = paste0("", sort(engine), "", collapse = ", ")) - -pkg_urls <- - get_pkgdown_urls(pkgs) %>% - dplyr::filter(alias %in% mod_names) %>% - mutate(title = str_remove(title, "General Interface for ")) %>% - full_join(model_modes, by = "alias") %>% - full_join(model_eng, by = "alias") %>% - dplyr::select(-alias) %>% - select(title, topic, package, mode, engine) -``` - -```{r table-display, echo = FALSE, results = "asis"} -pkg_urls %>% - mutate( - package = as.factor(package), - mode = as.factor(mode) - ) %>% - rename(`model type`=topic ) %>% - DT::datatable( - rownames = FALSE, - class = 'cell-border stripe', - escape = FALSE, - filter = "top", - options = list(pageLength = 5) - ) -``` - -
    - -Models can be added by the user too. The article [How to build a parsnip model](/learn/develop/models/) walks you through the steps. - -## Explore model arguments {#model-args} - -The parsnip package provides consistent interface for working with similar models across different engines. This means that parsnip adopts standardized parameter names as arguments, and those names may be different from those used by the individual engines. The searchable table below provides a mapping between the parsnip and the engine arguments: - -
    - -```{r params, echo = FALSE, results = "asis"} -envir <- parsnip::get_model_env() - -args <- - ls(envir) %>% - tibble(name = .) %>% - filter(str_detect(name, "args")) %>% - mutate(model = str_replace(name, "_args", ""), - args = map(name, ~envir[[.x]])) %>% - unnest(args) %>% - select(model:original) %>% - rename(`model type`=model ) - -DT::datatable( - args, - rownames = FALSE, - class = 'cell-border stripe', - filter = 'top', - escape = FALSE, - options = list(pageLength = 5) - ) -``` diff --git a/content/find/parsnip/index.html b/content/find/parsnip/index.html deleted file mode 100644 index 5a830a43..00000000 --- a/content/find/parsnip/index.html +++ /dev/null @@ -1,38 +0,0 @@ ---- -subtitle: Parsnip -title: Search parsnip models -weight: 2 -description: | - Find model types, engines, and arguments to fit and predict in the tidymodels framework. ---- - - - - - - - - - - - - - - - - -

    To learn about the parsnip package, see Get Started: Build a Model. Use the tables below to find model types and engines and to explore model arguments.

    -
    -

    Explore models

    -
    - -


    -

    Models can be added by the user too. The article How to build a parsnip model walks you through the steps.

    -
    -
    -

    Explore model arguments

    -

    The parsnip package provides consistent interface for working with similar models across different engines. This means that parsnip adopts standardized parameter names as arguments, and those names may be different from those used by the individual engines. The searchable table below provides a mapping between the parsnip and the engine arguments:

    -


    -
    - -
    diff --git a/content/find/parsnip/index_files/datatables-binding/datatables.js b/content/find/parsnip/index_files/datatables-binding/datatables.js deleted file mode 100644 index fcee8ce2..00000000 --- a/content/find/parsnip/index_files/datatables-binding/datatables.js +++ /dev/null @@ -1,1512 +0,0 @@ -(function() { - -// some helper functions: using a global object DTWidget so that it can be used -// in JS() code, e.g. datatable(options = list(foo = JS('code'))); unlike R's -// dynamic scoping, when 'code' is eval()'ed, JavaScript does not know objects -// from the "parent frame", e.g. JS('DTWidget') will not work unless it was made -// a global object -var DTWidget = {}; - -// 123456666.7890 -> 123,456,666.7890 -var markInterval = function(d, digits, interval, mark, decMark, precision) { - x = precision ? d.toPrecision(digits) : d.toFixed(digits); - if (!/^-?[\d.]+$/.test(x)) return x; - var xv = x.split('.'); - if (xv.length > 2) return x; // should have at most one decimal point - xv[0] = xv[0].replace(new RegExp('\\B(?=(\\d{' + interval + '})+(?!\\d))', 'g'), mark); - return xv.join(decMark); -}; - -DTWidget.formatCurrency = function(data, currency, digits, interval, mark, decMark, before, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - var res = markInterval(d, digits, interval, mark, decMark); - res = before ? (/^-/.test(res) ? '-' + currency + res.replace(/^-/, '') : currency + res) : - res + currency; - return res; -}; - -DTWidget.formatString = function(data, prefix, suffix) { - var d = data; - if (d === null) return ''; - return prefix + d + suffix; -}; - -DTWidget.formatPercentage = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d * 100, digits, interval, mark, decMark) + '%'; -}; - -DTWidget.formatRound = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d, digits, interval, mark, decMark); -}; - -DTWidget.formatSignif = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d, digits, interval, mark, decMark, true); -}; - -DTWidget.formatDate = function(data, method, params) { - var d = data; - if (d === null) return ''; - // (new Date('2015-10-28')).toDateString() may return 2015-10-27 because the - // actual time created could be like 'Tue Oct 27 2015 19:00:00 GMT-0500 (CDT)', - // i.e. the date-only string is treated as UTC time instead of local time - if ((method === 'toDateString' || method === 'toLocaleDateString') && /^\d{4,}\D\d{2}\D\d{2}$/.test(d)) { - d = d.split(/\D/); - d = new Date(d[0], d[1] - 1, d[2]); - } else { - d = new Date(d); - } - return d[method].apply(d, params); -}; - -window.DTWidget = DTWidget; - -// A helper function to update the properties of existing filters -var setFilterProps = function(td, props) { - // Update enabled/disabled state - var $input = $(td).find('input').first(); - var searchable = $input.data('searchable'); - $input.prop('disabled', !searchable || props.disabled); - - // Based on the filter type, set its new values - var type = td.getAttribute('data-type'); - if (['factor', 'logical'].includes(type)) { - // Reformat the new dropdown options for use with selectize - var new_vals = props.params.options.map(function(item) { - return { text: item, value: item }; - }); - - // Find the selectize object - var dropdown = $(td).find('.selectized').eq(0)[0].selectize; - - // Note the current values - var old_vals = dropdown.getValue(); - - // Remove the existing values - dropdown.clearOptions(); - - // Add the new options - dropdown.addOption(new_vals); - - // Preserve the existing values - dropdown.setValue(old_vals); - - } else if (['number', 'integer', 'date', 'time'].includes(type)) { - // Apply internal scaling to new limits. Updating scale not yet implemented. - var slider = $(td).find('.noUi-target').eq(0); - var scale = Math.pow(10, Math.max(0, +slider.data('scale') || 0)); - var new_vals = [props.params.min * scale, props.params.max * scale]; - - // Note what the new limits will be just for this filter - var new_lims = new_vals.slice(); - - // Determine the current values and limits - var old_vals = slider.val().map(Number); - var old_lims = slider.noUiSlider('options').range; - old_lims = [old_lims.min, old_lims.max]; - - // Preserve the current values if filters have been applied; otherwise, apply no filtering - if (old_vals[0] != old_lims[0]) { - new_vals[0] = Math.max(old_vals[0], new_vals[0]); - } - - if (old_vals[1] != old_lims[1]) { - new_vals[1] = Math.min(old_vals[1], new_vals[1]); - } - - // Update the endpoints of the slider - slider.noUiSlider({ - start: new_vals, - range: {'min': new_lims[0], 'max': new_lims[1]} - }, true); - } -}; - -var transposeArray2D = function(a) { - return a.length === 0 ? a : HTMLWidgets.transposeArray2D(a); -}; - -var crosstalkPluginsInstalled = false; - -function maybeInstallCrosstalkPlugins() { - if (crosstalkPluginsInstalled) - return; - crosstalkPluginsInstalled = true; - - $.fn.dataTable.ext.afnFiltering.push( - function(oSettings, aData, iDataIndex) { - var ctfilter = oSettings.nTable.ctfilter; - if (ctfilter && !ctfilter[iDataIndex]) - return false; - - var ctselect = oSettings.nTable.ctselect; - if (ctselect && !ctselect[iDataIndex]) - return false; - - return true; - } - ); -} - -HTMLWidgets.widget({ - name: "datatables", - type: "output", - renderOnNullValue: true, - initialize: function(el, width, height) { - // in order that the type=number inputs return a number - $.valHooks.number = { - get: function(el) { - var value = parseFloat(el.value); - return isNaN(value) ? "" : value; - } - }; - $(el).html(' '); - return { - data: null, - ctfilterHandle: new crosstalk.FilterHandle(), - ctfilterSubscription: null, - ctselectHandle: new crosstalk.SelectionHandle(), - ctselectSubscription: null - }; - }, - renderValue: function(el, data, instance) { - if (el.offsetWidth === 0 || el.offsetHeight === 0) { - instance.data = data; - return; - } - instance.data = null; - var $el = $(el); - $el.empty(); - - if (data === null) { - $el.append(' '); - // clear previous Shiny inputs (if any) - for (var i in instance.clearInputs) instance.clearInputs[i](); - instance.clearInputs = {}; - return; - } - - var crosstalkOptions = data.crosstalkOptions; - if (!crosstalkOptions) crosstalkOptions = { - 'key': null, 'group': null - }; - if (crosstalkOptions.group) { - maybeInstallCrosstalkPlugins(); - instance.ctfilterHandle.setGroup(crosstalkOptions.group); - instance.ctselectHandle.setGroup(crosstalkOptions.group); - } - - // if we are in the viewer then we always want to fillContainer and - // and autoHideNavigation (unless the user has explicitly set these) - if (window.HTMLWidgets.viewerMode) { - if (!data.hasOwnProperty("fillContainer")) - data.fillContainer = true; - if (!data.hasOwnProperty("autoHideNavigation")) - data.autoHideNavigation = true; - } - - // propagate fillContainer to instance (so we have it in resize) - instance.fillContainer = data.fillContainer; - - var cells = data.data; - - if (cells instanceof Array) cells = transposeArray2D(cells); - - $el.append(data.container); - var $table = $el.find('table'); - if (data.class) $table.addClass(data.class); - if (data.caption) $table.prepend(data.caption); - - if (!data.selection) data.selection = { - mode: 'none', selected: null, target: 'row', selectable: null - }; - if (HTMLWidgets.shinyMode && data.selection.mode !== 'none' && - data.selection.target === 'row+column') { - if ($table.children('tfoot').length === 0) { - $table.append($('')); - $table.find('thead tr').clone().appendTo($table.find('tfoot')); - } - } - - // column filters - var filterRow; - switch (data.filter) { - case 'top': - $table.children('thead').append(data.filterHTML); - filterRow = $table.find('thead tr:last td'); - break; - case 'bottom': - if ($table.children('tfoot').length === 0) { - $table.append($('')); - } - $table.children('tfoot').prepend(data.filterHTML); - filterRow = $table.find('tfoot tr:first td'); - break; - } - - var options = { searchDelay: 1000 }; - if (cells !== null) $.extend(options, { - data: cells - }); - - // options for fillContainer - var bootstrapActive = typeof($.fn.popover) != 'undefined'; - if (instance.fillContainer) { - - // force scrollX/scrollY and turn off autoWidth - options.scrollX = true; - options.scrollY = "100px"; // can be any value, we'll adjust below - - // if we aren't paginating then move around the info/filter controls - // to save space at the bottom and rephrase the info callback - if (data.options.paging === false) { - - // we know how to do this cleanly for bootstrap, not so much - // for other themes/layouts - if (bootstrapActive) { - options.dom = "<'row'<'col-sm-4'i><'col-sm-8'f>>" + - "<'row'<'col-sm-12'tr>>"; - } - - options.fnInfoCallback = function(oSettings, iStart, iEnd, - iMax, iTotal, sPre) { - return Number(iTotal).toLocaleString() + " records"; - }; - } - } - - // auto hide navigation if requested - // Note, this only works on client-side processing mode as on server-side, - // cells (data.data) is null; In addition, we require the pageLength option - // being provided explicitly to enable this. Despite we may be able to deduce - // the default value of pageLength, it may complicate things so we'd rather - // put this responsiblity to users and warn them on the R side. - if (data.autoHideNavigation === true && data.options.paging !== false) { - // strip all nav if length >= cells - if ((cells instanceof Array) && data.options.pageLength >= cells.length) - options.dom = bootstrapActive ? "<'row'<'col-sm-12'tr>>" : "t"; - // alternatively lean things out for flexdashboard mobile portrait - else if (bootstrapActive && window.FlexDashboard && window.FlexDashboard.isMobilePhone()) - options.dom = "<'row'<'col-sm-12'f>>" + - "<'row'<'col-sm-12'tr>>" + - "<'row'<'col-sm-12'p>>"; - } - - $.extend(true, options, data.options || {}); - - var searchCols = options.searchCols; - if (searchCols) { - searchCols = searchCols.map(function(x) { - return x === null ? '' : x.search; - }); - // FIXME: this means I don't respect the escapeRegex setting - delete options.searchCols; - } - - // server-side processing? - var server = options.serverSide === true; - - // use the dataSrc function to pre-process JSON data returned from R - var DT_rows_all = [], DT_rows_current = []; - if (server && HTMLWidgets.shinyMode && typeof options.ajax === 'object' && - /^session\/[\da-z]+\/dataobj/.test(options.ajax.url) && !options.ajax.dataSrc) { - options.ajax.dataSrc = function(json) { - DT_rows_all = $.makeArray(json.DT_rows_all); - DT_rows_current = $.makeArray(json.DT_rows_current); - var data = json.data; - if (!colReorderEnabled()) return data; - var table = $table.DataTable(), order = table.colReorder.order(), flag = true, i, j, row; - for (i = 0; i < order.length; ++i) if (order[i] !== i) flag = false; - if (flag) return data; - for (i = 0; i < data.length; ++i) { - row = data[i].slice(); - for (j = 0; j < order.length; ++j) data[i][j] = row[order[j]]; - } - return data; - }; - } - - var thiz = this; - if (instance.fillContainer) $table.on('init.dt', function(e) { - thiz.fillAvailableHeight(el, $(el).innerHeight()); - }); - // If the page contains serveral datatables and one of which enables colReorder, - // the table.colReorder.order() function will exist but throws error when called. - // So it seems like the only way to know if colReorder is enabled or not is to - // check the options. - var colReorderEnabled = function() { return "colReorder" in options; }; - var table = $table.DataTable(options); - $el.data('datatable', table); - - // Unregister previous Crosstalk event subscriptions, if they exist - if (instance.ctfilterSubscription) { - instance.ctfilterHandle.off("change", instance.ctfilterSubscription); - instance.ctfilterSubscription = null; - } - if (instance.ctselectSubscription) { - instance.ctselectHandle.off("change", instance.ctselectSubscription); - instance.ctselectSubscription = null; - } - - if (!crosstalkOptions.group) { - $table[0].ctfilter = null; - $table[0].ctselect = null; - } else { - var key = crosstalkOptions.key; - function keysToMatches(keys) { - if (!keys) { - return null; - } else { - var selectedKeys = {}; - for (var i = 0; i < keys.length; i++) { - selectedKeys[keys[i]] = true; - } - var matches = {}; - for (var j = 0; j < key.length; j++) { - if (selectedKeys[key[j]]) - matches[j] = true; - } - return matches; - } - } - - function applyCrosstalkFilter(e) { - $table[0].ctfilter = keysToMatches(e.value); - table.draw(); - } - instance.ctfilterSubscription = instance.ctfilterHandle.on("change", applyCrosstalkFilter); - applyCrosstalkFilter({value: instance.ctfilterHandle.filteredKeys}); - - function applyCrosstalkSelection(e) { - if (e.sender !== instance.ctselectHandle) { - table - .rows('.' + selClass, {search: 'applied'}) - .nodes() - .to$() - .removeClass(selClass); - if (selectedRows) - changeInput('rows_selected', selectedRows(), void 0, true); - } - - if (e.sender !== instance.ctselectHandle && e.value && e.value.length) { - var matches = keysToMatches(e.value); - - // persistent selection with plotly (& leaflet) - var ctOpts = crosstalk.var("plotlyCrosstalkOpts").get() || {}; - if (ctOpts.persistent === true) { - var matches = $.extend(matches, $table[0].ctselect); - } - - $table[0].ctselect = matches; - table.draw(); - } else { - if ($table[0].ctselect) { - $table[0].ctselect = null; - table.draw(); - } - } - } - instance.ctselectSubscription = instance.ctselectHandle.on("change", applyCrosstalkSelection); - // TODO: This next line doesn't seem to work when renderDataTable is used - applyCrosstalkSelection({value: instance.ctselectHandle.value}); - } - - var inArray = function(val, array) { - return $.inArray(val, $.makeArray(array)) > -1; - }; - - // search the i-th column - var searchColumn = function(i, value) { - var regex = false, ci = true; - if (options.search) { - regex = options.search.regex, - ci = options.search.caseInsensitive !== false; - } - return table.column(i).search(value, regex, !regex, ci); - }; - - if (data.filter !== 'none') { - - filterRow.each(function(i, td) { - - var $td = $(td), type = $td.data('type'), filter; - var $input = $td.children('div').first().children('input'); - var disabled = $input.prop('disabled'); - var searchable = table.settings()[0].aoColumns[i].bSearchable; - $input.prop('disabled', !searchable || disabled); - $input.data('searchable', searchable); // for updating later - $input.on('input blur', function() { - $input.next('span').toggle(Boolean($input.val())); - }); - // Bootstrap sets pointer-events to none and we won't be able to click - // the clear button - $input.next('span').css('pointer-events', 'auto').hide().click(function() { - $(this).hide().prev('input').val('').trigger('input').focus(); - }); - var searchCol; // search string for this column - if (searchCols && searchCols[i]) { - searchCol = searchCols[i]; - $input.val(searchCol).trigger('input'); - } - var $x = $td.children('div').last(); - - // remove the overflow: hidden attribute of the scrollHead - // (otherwise the scrolling table body obscures the filters) - // The workaround and the discussion from - // https://github.com/rstudio/DT/issues/554#issuecomment-518007347 - // Otherwise the filter selection will not be anchored to the values - // when the columns number is many and scrollX is enabled. - var scrollHead = $(el).find('.dataTables_scrollHead,.dataTables_scrollFoot'); - var cssOverflowHead = scrollHead.css('overflow'); - var scrollBody = $(el).find('.dataTables_scrollBody'); - var cssOverflowBody = scrollBody.css('overflow'); - var scrollTable = $(el).find('.dataTables_scroll'); - var cssOverflowTable = scrollTable.css('overflow'); - if (cssOverflowHead === 'hidden') { - $x.on('show hide', function(e) { - if (e.type === 'show') { - scrollHead.css('overflow', 'visible'); - scrollBody.css('overflow', 'visible'); - scrollTable.css('overflow-x', 'scroll'); - } else { - scrollHead.css('overflow', cssOverflowHead); - scrollBody.css('overflow', cssOverflowBody); - scrollTable.css('overflow-x', cssOverflowTable); - } - }); - $x.css('z-index', 25); - } - - if (inArray(type, ['factor', 'logical'])) { - $input.on({ - click: function() { - $input.parent().hide(); $x.show().trigger('show'); filter[0].selectize.focus(); - }, - input: function() { - if ($input.val() === '') filter[0].selectize.setValue([]); - } - }); - var $input2 = $x.children('select'); - filter = $input2.selectize({ - options: $input2.data('options').map(function(v, i) { - return ({text: v, value: v}); - }), - plugins: ['remove_button'], - hideSelected: true, - onChange: function(value) { - if (value === null) value = []; // compatibility with jQuery 3.0 - $input.val(value.length ? JSON.stringify(value) : ''); - if (value.length) $input.trigger('input'); - $input.attr('title', $input.val()); - if (server) { - table.column(i).search(value.length ? JSON.stringify(value) : '').draw(); - return; - } - // turn off filter if nothing selected - $td.data('filter', value.length > 0); - table.draw(); // redraw table, and filters will be applied - } - }); - if (searchCol) filter[0].selectize.setValue(JSON.parse(searchCol)); - filter[0].selectize.on('blur', function() { - $x.hide().trigger('hide'); $input.parent().show(); $input.trigger('blur'); - }); - filter.next('div').css('margin-bottom', 'auto'); - } else if (type === 'character') { - var fun = function() { - searchColumn(i, $input.val()).draw(); - }; - if (server) { - fun = $.fn.dataTable.util.throttle(fun, options.searchDelay); - } - $input.on('input', fun); - } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { - var $x0 = $x; - $x = $x0.children('div').first(); - $x0.css({ - 'background-color': '#fff', - 'border': '1px #ddd solid', - 'border-radius': '4px', - 'padding': data.vertical ? '35px 20px': '20px 20px 10px 20px' - }); - var $spans = $x0.children('span').css({ - 'margin-top': data.vertical ? '0' : '10px', - 'white-space': 'nowrap' - }); - var $span1 = $spans.first(), $span2 = $spans.last(); - var r1 = +$x.data('min'), r2 = +$x.data('max'); - // when the numbers are too small or have many decimal places, the - // slider may have numeric precision problems (#150) - var scale = Math.pow(10, Math.max(0, +$x.data('scale') || 0)); - r1 = Math.round(r1 * scale); r2 = Math.round(r2 * scale); - var scaleBack = function(x, scale) { - if (scale === 1) return x; - var d = Math.round(Math.log(scale) / Math.log(10)); - // to avoid problems like 3.423/100 -> 0.034230000000000003 - return (x / scale).toFixed(d); - }; - var slider_min = function() { - return filter.noUiSlider('options').range.min; - }; - var slider_max = function() { - return filter.noUiSlider('options').range.max; - }; - $input.on({ - focus: function() { - $x0.show().trigger('show'); - // first, make sure the slider div leaves at least 20px between - // the two (slider value) span's - $x0.width(Math.max(160, $span1.outerWidth() + $span2.outerWidth() + 20)); - // then, if the input is really wide or slider is vertical, - // make the slider the same width as the input - if ($x0.outerWidth() < $input.outerWidth() || data.vertical) { - $x0.outerWidth($input.outerWidth()); - } - // make sure the slider div does not reach beyond the right margin - if ($(window).width() < $x0.offset().left + $x0.width()) { - $x0.offset({ - 'left': $input.offset().left + $input.outerWidth() - $x0.outerWidth() - }); - } - }, - blur: function() { - $x0.hide().trigger('hide'); - }, - input: function() { - if ($input.val() === '') filter.val([slider_min(), slider_max()]); - }, - change: function() { - var v = $input.val().replace(/\s/g, ''); - if (v === '') return; - v = v.split('...'); - if (v.length !== 2) { - $input.parent().addClass('has-error'); - return; - } - if (v[0] === '') v[0] = slider_min(); - if (v[1] === '') v[1] = slider_max(); - $input.parent().removeClass('has-error'); - // treat date as UTC time at midnight - var strTime = function(x) { - var s = type === 'date' ? 'T00:00:00Z' : ''; - var t = new Date(x + s).getTime(); - // add 10 minutes to date since it does not hurt the date, and - // it helps avoid the tricky floating point arithmetic problems, - // e.g. sometimes the date may be a few milliseconds earlier - // than the midnight due to precision problems in noUiSlider - return type === 'date' ? t + 3600000 : t; - }; - if (inArray(type, ['date', 'time'])) { - v[0] = strTime(v[0]); - v[1] = strTime(v[1]); - } - if (v[0] != slider_min()) v[0] *= scale; - if (v[1] != slider_max()) v[1] *= scale; - filter.val(v); - } - }); - var formatDate = function(d, isoFmt) { - d = scaleBack(d, scale); - if (type === 'number') return d; - if (type === 'integer') return parseInt(d); - var x = new Date(+d); - var fmt = ('filterDateFmt' in data) ? data.filterDateFmt[i] : undefined; - if (fmt !== undefined && isoFmt === false) return x[fmt.method].apply(x, fmt.params); - if (type === 'date') { - var pad0 = function(x) { - return ('0' + x).substr(-2, 2); - }; - return x.getUTCFullYear() + '-' + pad0(1 + x.getUTCMonth()) - + '-' + pad0(x.getUTCDate()); - } else { - return x.toISOString(); - } - }; - var opts = type === 'date' ? { step: 60 * 60 * 1000 } : - type === 'integer' ? { step: 1 } : {}; - - opts.orientation = data.vertical ? 'vertical': 'horizontal'; - opts.direction = data.vertical ? 'rtl': 'ltr'; - - filter = $x.noUiSlider($.extend({ - start: [r1, r2], - range: {min: r1, max: r2}, - connect: true - }, opts)); - if (scale > 1) (function() { - var t1 = r1, t2 = r2; - var val = filter.val(); - while (val[0] > r1 || val[1] < r2) { - if (val[0] > r1) { - t1 -= val[0] - r1; - } - if (val[1] < r2) { - t2 += r2 - val[1]; - } - filter = $x.noUiSlider($.extend({ - start: [t1, t2], - range: {min: t1, max: t2}, - connect: true - }, opts), true); - val = filter.val(); - } - r1 = t1; r2 = t2; - })(); - var updateSliderText = function(v1, v2) { - $span1.text(formatDate(v1, false)); $span2.text(formatDate(v2, false)); - }; - updateSliderText(r1, r2); - var updateSlider = function(e) { - var val = filter.val(); - // turn off filter if in full range - $td.data('filter', val[0] > slider_min() || val[1] < slider_max()); - var v1 = formatDate(val[0]), v2 = formatDate(val[1]), ival; - if ($td.data('filter')) { - ival = v1 + ' ... ' + v2; - $input.attr('title', ival).val(ival).trigger('input'); - } else { - $input.attr('title', '').val(''); - } - updateSliderText(val[0], val[1]); - if (e.type === 'slide') return; // no searching when sliding only - if (server) { - table.column(i).search($td.data('filter') ? ival : '').draw(); - return; - } - table.draw(); - }; - filter.on({ - set: updateSlider, - slide: updateSlider - }); - } - - // server-side processing will be handled by R (or whatever server - // language you use); the following code is only needed for client-side - // processing - if (server) { - // if a search string has been pre-set, search now - if (searchCol) searchColumn(i, searchCol).draw(); - return; - } - - var customFilter = function(settings, data, dataIndex) { - // there is no way to attach a search function to a specific table, - // and we need to make sure a global search function is not applied to - // all tables (i.e. a range filter in a previous table should not be - // applied to the current table); we use the settings object to - // determine if we want to perform searching on the current table, - // since settings.sTableId will be different to different tables - if (table.settings()[0] !== settings) return true; - // no filter on this column or no need to filter this column - if (typeof filter === 'undefined' || !$td.data('filter')) return true; - - var r = filter.val(), v, r0, r1; - var i_data = function(i) { - if (!colReorderEnabled()) return i; - var order = table.colReorder.order(), k; - for (k = 0; k < order.length; ++k) if (order[k] === i) return k; - return i; // in theory it will never be here... - } - v = data[i_data(i)]; - if (type === 'number' || type === 'integer') { - v = parseFloat(v); - // how to handle NaN? currently exclude these rows - if (isNaN(v)) return(false); - r0 = parseFloat(scaleBack(r[0], scale)) - r1 = parseFloat(scaleBack(r[1], scale)); - if (v >= r0 && v <= r1) return true; - } else if (type === 'date' || type === 'time') { - v = new Date(v); - r0 = new Date(r[0] / scale); r1 = new Date(r[1] / scale); - if (v >= r0 && v <= r1) return true; - } else if (type === 'factor') { - if (r.length === 0 || inArray(v, r)) return true; - } else if (type === 'logical') { - if (r.length === 0) return true; - if (inArray(v === '' ? 'na' : v, r)) return true; - } - return false; - }; - - $.fn.dataTable.ext.search.push(customFilter); - - // search for the preset search strings if it is non-empty - if (searchCol) { - if (inArray(type, ['factor', 'logical'])) { - filter[0].selectize.setValue(JSON.parse(searchCol)); - } else if (type === 'character') { - $input.trigger('input'); - } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { - $input.trigger('change'); - } - } - - }); - - } - - // highlight search keywords - var highlight = function() { - var body = $(table.table().body()); - // removing the old highlighting first - body.unhighlight(); - - // don't highlight the "not found" row, so we get the rows using the api - if (table.rows({ filter: 'applied' }).data().length === 0) return; - // highlight global search keywords - body.highlight($.trim(table.search()).split(/\s+/)); - // then highlight keywords from individual column filters - if (filterRow) filterRow.each(function(i, td) { - var $td = $(td), type = $td.data('type'); - if (type !== 'character') return; - var $input = $td.children('div').first().children('input'); - var column = table.column(i).nodes().to$(), - val = $.trim($input.val()); - if (type !== 'character' || val === '') return; - column.highlight(val.split(/\s+/)); - }); - }; - - if (options.searchHighlight) { - table - .on('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth', highlight) - .on('destroy', function() { - // remove event handler - table.off('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth'); - }); - - // Set the option for escaping regex characters in our search string. This will be used - // for all future matching. - jQuery.fn.highlight.options.escapeRegex = (!options.search || !options.search.regex); - - // initial highlight for state saved conditions and initial states - highlight(); - } - - // run the callback function on the table instance - if (typeof data.callback === 'function') data.callback(table); - - // double click to edit the cell, row, column, or all cells - if (data.editable) table.on('dblclick.dt', 'tbody td', function(e) { - // only bring up the editor when the cell itself is dbclicked, and ignore - // other dbclick events bubbled up (e.g. from the ) - if (e.target !== this) return; - var target = [], immediate = false; - switch (data.editable.target) { - case 'cell': - target = [this]; - immediate = true; // edit will take effect immediately - break; - case 'row': - target = table.cells(table.cell(this).index().row, '*').nodes(); - break; - case 'column': - target = table.cells('*', table.cell(this).index().column).nodes(); - break; - case 'all': - target = table.cells().nodes(); - break; - default: - throw 'The editable parameter must be "cell", "row", "column", or "all"'; - } - var disableCols = data.editable.disable ? data.editable.disable.columns : null; - var numericCols = data.editable.numeric; - var areaCols = data.editable.area; - for (var i = 0; i < target.length; i++) { - (function(cell, current) { - var $cell = $(cell), html = $cell.html(); - var _cell = table.cell(cell), value = _cell.data(), index = _cell.index().column; - var $input; - if (inArray(index, numericCols)) { - $input = $(''); - } else if (inArray(index, areaCols)) { - $input = $(''); - } else { - $input = $(''); - } - if (!immediate) { - $cell.data('input', $input).data('html', html); - $input.attr('title', 'Hit Ctrl+Enter to finish editing, or Esc to cancel'); - } - $input.val(value); - if (inArray(index, disableCols)) { - $input.attr('readonly', '').css('filter', 'invert(25%)'); - } - $cell.empty().append($input); - if (cell === current) $input.focus(); - $input.css('width', '100%'); - - if (immediate) $input.on('blur', function(e) { - var valueNew = $input.val(); - if (valueNew != value) { - _cell.data(valueNew); - if (HTMLWidgets.shinyMode) { - changeInput('cell_edit', [cellInfo(cell)], 'DT.cellInfo', null, {priority: 'event'}); - } - // for server-side processing, users have to call replaceData() to update the table - if (!server) table.draw(false); - } else { - $cell.html(html); - } - }).on('keyup', function(e) { - // hit Escape to cancel editing - if (e.keyCode === 27) $input.trigger('blur'); - }); - - // bulk edit (row, column, or all) - if (!immediate) $input.on('keyup', function(e) { - var removeInput = function($cell, restore) { - $cell.data('input').remove(); - if (restore) $cell.html($cell.data('html')); - } - if (e.keyCode === 27) { - for (var i = 0; i < target.length; i++) { - removeInput($(target[i]), true); - } - } else if (e.keyCode === 13 && e.ctrlKey) { - // Ctrl + Enter - var cell, $cell, _cell, cellData = []; - for (var i = 0; i < target.length; i++) { - cell = target[i]; $cell = $(cell); _cell = table.cell(cell); - _cell.data($cell.data('input').val()); - HTMLWidgets.shinyMode && cellData.push(cellInfo(cell)); - removeInput($cell, false); - } - if (HTMLWidgets.shinyMode) { - changeInput('cell_edit', cellData, 'DT.cellInfo', null, {priority: "event"}); - } - if (!server) table.draw(false); - } - }); - })(target[i], this); - } - }); - - // interaction with shiny - if (!HTMLWidgets.shinyMode && !crosstalkOptions.group) return; - - var methods = {}; - var shinyData = {}; - - methods.updateCaption = function(caption) { - if (!caption) return; - $table.children('caption').replaceWith(caption); - } - - // register clear functions to remove input values when the table is removed - instance.clearInputs = {}; - - var changeInput = function(id, value, type, noCrosstalk, opts) { - var event = id; - id = el.id + '_' + id; - if (type) id = id + ':' + type; - // do not update if the new value is the same as old value - if (event !== 'cell_edit' && !/_clicked$/.test(event) && shinyData.hasOwnProperty(id) && shinyData[id] === JSON.stringify(value)) - return; - shinyData[id] = JSON.stringify(value); - if (HTMLWidgets.shinyMode && Shiny.setInputValue) { - Shiny.setInputValue(id, value, opts); - if (!instance.clearInputs[id]) instance.clearInputs[id] = function() { - Shiny.setInputValue(id, null); - } - } - - // HACK - if (event === "rows_selected" && !noCrosstalk) { - if (crosstalkOptions.group) { - var keys = crosstalkOptions.key; - var selectedKeys = null; - if (value) { - selectedKeys = []; - for (var i = 0; i < value.length; i++) { - // The value array's contents use 1-based row numbers, so we must - // convert to 0-based before indexing into the keys array. - selectedKeys.push(keys[value[i] - 1]); - } - } - instance.ctselectHandle.set(selectedKeys); - } - } - }; - - var addOne = function(x) { - return x.map(function(i) { return 1 + i; }); - }; - - var unique = function(x) { - var ux = []; - $.each(x, function(i, el){ - if ($.inArray(el, ux) === -1) ux.push(el); - }); - return ux; - } - - // change the row index of a cell - var tweakCellIndex = function(cell) { - var info = cell.index(); - // some cell may not be valid. e.g, #759 - // when using the RowGroup extension, datatables will - // generate the row label and the cells are not part of - // the data thus contain no row/col info - if (info === undefined) - return {row: null, col: null}; - if (server) { - info.row = DT_rows_current[info.row]; - } else { - info.row += 1; - } - return {row: info.row, col: info.column}; - } - - var cleanSelectedValues = function() { - changeInput('rows_selected', []); - changeInput('columns_selected', []); - changeInput('cells_selected', transposeArray2D([]), 'shiny.matrix'); - } - // #828 we should clean the selection on the server-side when the table reloads - cleanSelectedValues(); - - // a flag to indicates if select extension is initialized or not - var flagSelectExt = table.settings()[0]._select !== undefined; - // the Select extension should only be used in the client mode and - // when the selection.mode is set to none - if (data.selection.mode === 'none' && !server && flagSelectExt) { - var updateRowsSelected = function() { - var rows = table.rows({selected: true}); - var selected = []; - $.each(rows.indexes().toArray(), function(i, v) { - selected.push(v + 1); - }); - changeInput('rows_selected', selected); - } - var updateColsSelected = function() { - var columns = table.columns({selected: true}); - changeInput('columns_selected', columns.indexes().toArray()); - } - var updateCellsSelected = function() { - var cells = table.cells({selected: true}); - var selected = []; - cells.every(function() { - var row = this.index().row; - var col = this.index().column; - selected = selected.concat([[row + 1, col]]); - }); - changeInput('cells_selected', transposeArray2D(selected), 'shiny.matrix'); - } - table.on('select deselect', function(e, dt, type, indexes) { - updateRowsSelected(); - updateColsSelected(); - updateCellsSelected(); - }) - } - - var selMode = data.selection.mode, selTarget = data.selection.target; - var selDisable = data.selection.selectable === false; - if (inArray(selMode, ['single', 'multiple'])) { - var selClass = inArray(data.style, ['bootstrap', 'bootstrap4']) ? 'active' : 'selected'; - // selected1: row indices; selected2: column indices - var initSel = function(x) { - if (x === null || typeof x === 'boolean' || selTarget === 'cell') { - return {rows: [], cols: []}; - } else if (selTarget === 'row') { - return {rows: $.makeArray(x), cols: []}; - } else if (selTarget === 'column') { - return {rows: [], cols: $.makeArray(x)}; - } else if (selTarget === 'row+column') { - return {rows: $.makeArray(x.rows), cols: $.makeArray(x.cols)}; - } - } - var selected = data.selection.selected; - var selected1 = initSel(selected).rows, selected2 = initSel(selected).cols; - // selectable should contain either all positive or all non-positive values, not both - // positive values indicate "selectable" while non-positive values means "nonselectable" - // the assertion is performed on R side. (only column indicides could be zero which indicates - // the row name) - var selectable = data.selection.selectable; - var selectable1 = initSel(selectable).rows, selectable2 = initSel(selectable).cols; - - // After users reorder the rows or filter the table, we cannot use the table index - // directly. Instead, we need this function to find out the rows between the two clicks. - // If user filter the table again between the start click and the end click, the behavior - // would be undefined, but it should not be a problem. - var shiftSelRowsIndex = function(start, end) { - var indexes = server ? DT_rows_all : table.rows({ search: 'applied' }).indexes().toArray(); - start = indexes.indexOf(start); end = indexes.indexOf(end); - // if start is larger than end, we need to swap - if (start > end) { - var tmp = end; end = start; start = tmp; - } - return indexes.slice(start, end + 1); - } - - var serverRowIndex = function(clientRowIndex) { - return server ? DT_rows_current[clientRowIndex] : clientRowIndex + 1; - } - - // row, column, or cell selection - var lastClickedRow; - if (inArray(selTarget, ['row', 'row+column'])) { - // Get the current selected rows. It will also - // update the selected1's value based on the current row selection state - // Note we can't put this function inside selectRows() directly, - // the reason is method.selectRows() will override selected1's value but this - // function will add rows to selected1 (keep the existing selection), which is - // inconsistent with column and cell selection. - var selectedRows = function() { - var rows = table.rows('.' + selClass); - var idx = rows.indexes().toArray(); - if (!server) { - selected1 = addOne(idx); - return selected1; - } - idx = idx.map(function(i) { - return DT_rows_current[i]; - }); - selected1 = selMode === 'multiple' ? unique(selected1.concat(idx)) : idx; - return selected1; - } - // Change selected1's value based on selectable1, then refresh the row state - var onlyKeepSelectableRows = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected1 = []; - return; - } - if (selectable1.length === 0) return; - var nonselectable = selectable1[0] <= 0; - if (nonselectable) { - // should make selectable1 positive - selected1 = $(selected1).not(selectable1.map(function(i) { return -i; })).get(); - } else { - selected1 = $(selected1).filter(selectable1).get(); - } - } - // Change selected1's value based on selectable1, then - // refresh the row selection state according to values in selected1 - var selectRows = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableRows(); - table.$('tr.' + selClass).removeClass(selClass); - if (selected1.length === 0) return; - if (server) { - table.rows({page: 'current'}).every(function() { - if (inArray(DT_rows_current[this.index()], selected1)) { - $(this.node()).addClass(selClass); - } - }); - } else { - var selected0 = selected1.map(function(i) { return i - 1; }); - $(table.rows(selected0).nodes()).addClass(selClass); - } - } - table.on('mousedown.dt', 'tbody tr', function(e) { - var $this = $(this), thisRow = table.row(this); - if (selMode === 'multiple') { - if (e.shiftKey && lastClickedRow !== undefined) { - // select or de-select depends on the last clicked row's status - var flagSel = !$this.hasClass(selClass); - var crtClickedRow = serverRowIndex(thisRow.index()); - if (server) { - var rowsIndex = shiftSelRowsIndex(lastClickedRow, crtClickedRow); - // update current page's selClass - rowsIndex.map(function(i) { - var rowIndex = DT_rows_current.indexOf(i); - if (rowIndex >= 0) { - var row = table.row(rowIndex).nodes().to$(); - var flagRowSel = !row.hasClass(selClass); - if (flagSel === flagRowSel) row.toggleClass(selClass); - } - }); - // update selected1 - if (flagSel) { - selected1 = unique(selected1.concat(rowsIndex)); - } else { - selected1 = selected1.filter(function(index) { - return !inArray(index, rowsIndex); - }); - } - } else { - // js starts from 0 - shiftSelRowsIndex(lastClickedRow - 1, crtClickedRow - 1).map(function(value) { - var row = table.row(value).nodes().to$(); - var flagRowSel = !row.hasClass(selClass); - if (flagSel === flagRowSel) row.toggleClass(selClass); - }); - } - e.preventDefault(); - } else { - $this.toggleClass(selClass); - } - } else { - if ($this.hasClass(selClass)) { - $this.removeClass(selClass); - } else { - table.$('tr.' + selClass).removeClass(selClass); - $this.addClass(selClass); - } - } - if (server && !$this.hasClass(selClass)) { - var id = DT_rows_current[thisRow.index()]; - // remove id from selected1 since its class .selected has been removed - if (inArray(id, selected1)) selected1.splice($.inArray(id, selected1), 1); - } - selectedRows(); // update selected1's value based on selClass - selectRows(false); // only keep the selectable rows - changeInput('rows_selected', selected1); - changeInput('row_last_clicked', serverRowIndex(thisRow.index()), null, null, {priority: 'event'}); - lastClickedRow = serverRowIndex(thisRow.index()); - }); - selectRows(false); // in case users have specified pre-selected rows - // restore selected rows after the table is redrawn (e.g. sort/search/page); - // client-side tables will preserve the selections automatically; for - // server-side tables, we have to *real* row indices are in `selected1` - changeInput('rows_selected', selected1); - if (server) table.on('draw.dt', function(e) { selectRows(false); }); - methods.selectRows = function(selected, ignoreSelectable) { - selected1 = $.makeArray(selected); - selectRows(ignoreSelectable); - changeInput('rows_selected', selected1); - } - } - - if (inArray(selTarget, ['column', 'row+column'])) { - if (selTarget === 'row+column') { - $(table.columns().footer()).css('cursor', 'pointer'); - } - // update selected2's value based on selectable2 - var onlyKeepSelectableCols = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected2 = []; - return; - } - if (selectable2.length === 0) return; - var nonselectable = selectable2[0] <= 0; - if (nonselectable) { - // need to make selectable2 positive - selected2 = $(selected2).not(selectable2.map(function(i) { return -i; })).get(); - } else { - selected2 = $(selected2).filter(selectable2).get(); - } - } - // update selected2 and then - // refresh the col selection state according to values in selected2 - var selectCols = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableCols(); - // if selected2 is not a valide index (e.g., larger than the column number) - // table.columns(selected2) will fail and result in a blank table - // this is different from the table.rows(), where the out-of-range indexes - // doesn't affect at all - selected2 = $(selected2).filter(table.columns().indexes()).get(); - table.columns().nodes().flatten().to$().removeClass(selClass); - if (selected2.length > 0) - table.columns(selected2).nodes().flatten().to$().addClass(selClass); - } - var callback = function() { - var colIdx = selTarget === 'column' ? table.cell(this).index().column : - $.inArray(this, table.columns().footer()), - thisCol = $(table.column(colIdx).nodes()); - if (colIdx === -1) return; - if (thisCol.hasClass(selClass)) { - thisCol.removeClass(selClass); - selected2.splice($.inArray(colIdx, selected2), 1); - } else { - if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); - thisCol.addClass(selClass); - selected2 = selMode === 'single' ? [colIdx] : unique(selected2.concat([colIdx])); - } - selectCols(false); // update selected2 based on selectable - changeInput('columns_selected', selected2); - } - if (selTarget === 'column') { - $(table.table().body()).on('click.dt', 'td', callback); - } else { - $(table.table().footer()).on('click.dt', 'tr th', callback); - } - selectCols(false); // in case users have specified pre-selected columns - changeInput('columns_selected', selected2); - if (server) table.on('draw.dt', function(e) { selectCols(false); }); - methods.selectColumns = function(selected, ignoreSelectable) { - selected2 = $.makeArray(selected); - selectCols(ignoreSelectable); - changeInput('columns_selected', selected2); - } - } - - if (selTarget === 'cell') { - var selected3 = [], selectable3 = []; - if (selected !== null) selected3 = selected; - if (selectable !== null && typeof selectable !== 'boolean') selectable3 = selectable; - var findIndex = function(ij, sel) { - for (var i = 0; i < sel.length; i++) { - if (ij[0] === sel[i][0] && ij[1] === sel[i][1]) return i; - } - return -1; - } - // Change selected3's value based on selectable3, then refresh the cell state - var onlyKeepSelectableCells = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected3 = []; - return; - } - if (selectable3.length === 0) return; - var nonselectable = selectable3[0][0] <= 0; - var out = []; - if (nonselectable) { - selected3.map(function(ij) { - // should make selectable3 positive - if (findIndex([-ij[0], -ij[1]], selectable3) === -1) { out.push(ij); } - }); - } else { - selected3.map(function(ij) { - if (findIndex(ij, selectable3) > -1) { out.push(ij); } - }); - } - selected3 = out; - } - // Change selected3's value based on selectable3, then - // refresh the cell selection state according to values in selected3 - var selectCells = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableCells(); - table.$('td.' + selClass).removeClass(selClass); - if (selected3.length === 0) return; - if (server) { - table.cells({page: 'current'}).every(function() { - var info = tweakCellIndex(this); - if (findIndex([info.row, info.col], selected3) > -1) - $(this.node()).addClass(selClass); - }); - } else { - selected3.map(function(ij) { - $(table.cell(ij[0] - 1, ij[1]).node()).addClass(selClass); - }); - } - }; - table.on('click.dt', 'tbody td', function() { - var $this = $(this), info = tweakCellIndex(table.cell(this)); - if ($this.hasClass(selClass)) { - $this.removeClass(selClass); - selected3.splice(findIndex([info.row, info.col], selected3), 1); - } else { - if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); - $this.addClass(selClass); - selected3 = selMode === 'single' ? [[info.row, info.col]] : - unique(selected3.concat([[info.row, info.col]])); - } - selectCells(false); // must call this to update selected3 based on selectable3 - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - }); - selectCells(false); // in case users have specified pre-selected columns - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - - if (server) table.on('draw.dt', function(e) { selectCells(false); }); - methods.selectCells = function(selected, ignoreSelectable) { - selected3 = selected ? selected : []; - selectCells(ignoreSelectable); - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - } - } - } - - // expose some table info to Shiny - var updateTableInfo = function(e, settings) { - // TODO: is anyone interested in the page info? - // changeInput('page_info', table.page.info()); - var updateRowInfo = function(id, modifier) { - var idx; - if (server) { - idx = modifier.page === 'current' ? DT_rows_current : DT_rows_all; - } else { - var rows = table.rows($.extend({ - search: 'applied', - page: 'all' - }, modifier)); - idx = addOne(rows.indexes().toArray()); - } - changeInput('rows' + '_' + id, idx); - }; - updateRowInfo('current', {page: 'current'}); - updateRowInfo('all', {}); - } - table.on('draw.dt', updateTableInfo); - updateTableInfo(); - - // state info - table.on('draw.dt column-visibility.dt', function() { - changeInput('state', table.state()); - }); - changeInput('state', table.state()); - - // search info - var updateSearchInfo = function() { - changeInput('search', table.search()); - if (filterRow) changeInput('search_columns', filterRow.toArray().map(function(td) { - return $(td).find('input').first().val(); - })); - } - table.on('draw.dt', updateSearchInfo); - updateSearchInfo(); - - var cellInfo = function(thiz) { - var info = tweakCellIndex(table.cell(thiz)); - info.value = table.cell(thiz).data(); - return info; - } - // the current cell clicked on - table.on('click.dt', 'tbody td', function() { - changeInput('cell_clicked', cellInfo(this), null, null, {priority: 'event'}); - }) - changeInput('cell_clicked', {}); - - // do not trigger table selection when clicking on links unless they have classes - table.on('click.dt', 'tbody td a', function(e) { - if (this.className === '') e.stopPropagation(); - }); - - methods.addRow = function(data, rowname, resetPaging) { - var n = table.columns().indexes().length, d = n - data.length; - if (d === 1) { - data = rowname.concat(data) - } else if (d !== 0) { - console.log(data); - console.log(table.columns().indexes()); - throw 'New data must be of the same length as current data (' + n + ')'; - }; - table.row.add(data).draw(resetPaging); - } - - methods.updateSearch = function(keywords) { - if (keywords.global !== null) - $(table.table().container()).find('input[type=search]').first() - .val(keywords.global).trigger('input'); - var columns = keywords.columns; - if (!filterRow || columns === null) return; - filterRow.toArray().map(function(td, i) { - var v = typeof columns === 'string' ? columns : columns[i]; - if (typeof v === 'undefined') { - console.log('The search keyword for column ' + i + ' is undefined') - return; - } - $(td).find('input').first().val(v); - searchColumn(i, v); - }); - table.draw(); - } - - methods.hideCols = function(hide, reset) { - if (reset) table.columns().visible(true, false); - table.columns(hide).visible(false); - } - - methods.showCols = function(show, reset) { - if (reset) table.columns().visible(false, false); - table.columns(show).visible(true); - } - - methods.colReorder = function(order, origOrder) { - table.colReorder.order(order, origOrder); - } - - methods.selectPage = function(page) { - if (table.page.info().pages < page || page < 1) { - throw 'Selected page is out of range'; - }; - table.page(page - 1).draw(false); - } - - methods.reloadData = function(resetPaging, clearSelection) { - // empty selections first if necessary - if (methods.selectRows && inArray('row', clearSelection)) methods.selectRows([]); - if (methods.selectColumns && inArray('column', clearSelection)) methods.selectColumns([]); - if (methods.selectCells && inArray('cell', clearSelection)) methods.selectCells([]); - table.ajax.reload(null, resetPaging); - } - - // update table filters (set new limits of sliders) - methods.updateFilters = function(newProps) { - // loop through each filter in the filter row - filterRow.each(function(i, td) { - var k = i; - if (filterRow.length > newProps.length) { - if (i === 0) return; // first column is row names - k = i - 1; - } - // Update the filters to reflect the updated data. - // Allow "falsy" (e.g. NULL) to signify a no-op. - if (newProps[k]) { - setFilterProps(td, newProps[k]); - } - }); - }; - - table.shinyMethods = methods; - }, - resize: function(el, width, height, instance) { - if (instance.data) this.renderValue(el, instance.data, instance); - - // dynamically adjust height if fillContainer = TRUE - if (instance.fillContainer) - this.fillAvailableHeight(el, height); - - this.adjustWidth(el); - }, - - // dynamically set the scroll body to fill available height - // (used with fillContainer = TRUE) - fillAvailableHeight: function(el, availableHeight) { - - // see how much of the table is occupied by header/footer elements - // and use that to compute a target scroll body height - var dtWrapper = $(el).find('div.dataTables_wrapper'); - var dtScrollBody = $(el).find($('div.dataTables_scrollBody')); - var framingHeight = dtWrapper.innerHeight() - dtScrollBody.innerHeight(); - var scrollBodyHeight = availableHeight - framingHeight; - - // we need to set `max-height` to none as datatables library now sets this - // to a fixed height, disabling the ability to resize to fill the window, - // as it will be set to a fixed 100px under such circumstances, e.g., RStudio IDE, - // or FlexDashboard - // see https://github.com/rstudio/DT/issues/951#issuecomment-1026464509 - dtScrollBody.css('max-height', 'none'); - // set the height - dtScrollBody.height(scrollBodyHeight + 'px'); - }, - - // adjust the width of columns; remove the hard-coded widths on table and the - // scroll header when scrollX/Y are enabled - adjustWidth: function(el) { - var $el = $(el), table = $el.data('datatable'); - if (table) table.columns.adjust(); - $el.find('.dataTables_scrollHeadInner').css('width', '') - .children('table').css('margin-left', ''); - } -}); - - if (!HTMLWidgets.shinyMode) return; - - Shiny.addCustomMessageHandler('datatable-calls', function(data) { - var id = data.id; - var el = document.getElementById(id); - var table = el ? $(el).data('datatable') : null; - if (!table) { - console.log("Couldn't find table with id " + id); - return; - } - - var methods = table.shinyMethods, call = data.call; - if (methods[call.method]) { - methods[call.method].apply(table, call.args); - } else { - console.log("Unknown method " + call.method); - } - }); - -})(); diff --git a/content/find/parsnip/index_files/datatables-css/datatables-crosstalk.css b/content/find/parsnip/index_files/datatables-css/datatables-crosstalk.css deleted file mode 100644 index fb5bae84..00000000 --- a/content/find/parsnip/index_files/datatables-css/datatables-crosstalk.css +++ /dev/null @@ -1,23 +0,0 @@ -.dt-crosstalk-fade { - opacity: 0.2; -} - -html body div.DTS div.dataTables_scrollBody { - background: none; -} - - -/* -Fix https://github.com/rstudio/DT/issues/563 -If the `table.display` is set to "block" (e.g., pkgdown), the browser will display -datatable objects strangely. The search panel and the page buttons will still be -in full-width but the table body will be "compact" and shorter. -In therory, having this attributes will affect `dom="t"` -with `display: block` users. But in reality, there should be no one. -We may remove the below lines in the future if the upstream agree to have this there. -See https://github.com/DataTables/DataTablesSrc/issues/160 -*/ - -table.dataTable { - display: table; -} diff --git a/content/find/parsnip/index_files/htmlwidgets/htmlwidgets.js b/content/find/parsnip/index_files/htmlwidgets/htmlwidgets.js deleted file mode 100644 index da8b2367..00000000 --- a/content/find/parsnip/index_files/htmlwidgets/htmlwidgets.js +++ /dev/null @@ -1,903 +0,0 @@ -(function() { - // If window.HTMLWidgets is already defined, then use it; otherwise create a - // new object. This allows preceding code to set options that affect the - // initialization process (though none currently exist). - window.HTMLWidgets = window.HTMLWidgets || {}; - - // See if we're running in a viewer pane. If not, we're in a web browser. - var viewerMode = window.HTMLWidgets.viewerMode = - /\bviewer_pane=1\b/.test(window.location); - - // See if we're running in Shiny mode. If not, it's a static document. - // Note that static widgets can appear in both Shiny and static modes, but - // obviously, Shiny widgets can only appear in Shiny apps/documents. - var shinyMode = window.HTMLWidgets.shinyMode = - typeof(window.Shiny) !== "undefined" && !!window.Shiny.outputBindings; - - // We can't count on jQuery being available, so we implement our own - // version if necessary. - function querySelectorAll(scope, selector) { - if (typeof(jQuery) !== "undefined" && scope instanceof jQuery) { - return scope.find(selector); - } - if (scope.querySelectorAll) { - return scope.querySelectorAll(selector); - } - } - - function asArray(value) { - if (value === null) - return []; - if ($.isArray(value)) - return value; - return [value]; - } - - // Implement jQuery's extend - function extend(target /*, ... */) { - if (arguments.length == 1) { - return target; - } - for (var i = 1; i < arguments.length; i++) { - var source = arguments[i]; - for (var prop in source) { - if (source.hasOwnProperty(prop)) { - target[prop] = source[prop]; - } - } - } - return target; - } - - // IE8 doesn't support Array.forEach. - function forEach(values, callback, thisArg) { - if (values.forEach) { - values.forEach(callback, thisArg); - } else { - for (var i = 0; i < values.length; i++) { - callback.call(thisArg, values[i], i, values); - } - } - } - - // Replaces the specified method with the return value of funcSource. - // - // Note that funcSource should not BE the new method, it should be a function - // that RETURNS the new method. funcSource receives a single argument that is - // the overridden method, it can be called from the new method. The overridden - // method can be called like a regular function, it has the target permanently - // bound to it so "this" will work correctly. - function overrideMethod(target, methodName, funcSource) { - var superFunc = target[methodName] || function() {}; - var superFuncBound = function() { - return superFunc.apply(target, arguments); - }; - target[methodName] = funcSource(superFuncBound); - } - - // Add a method to delegator that, when invoked, calls - // delegatee.methodName. If there is no such method on - // the delegatee, but there was one on delegator before - // delegateMethod was called, then the original version - // is invoked instead. - // For example: - // - // var a = { - // method1: function() { console.log('a1'); } - // method2: function() { console.log('a2'); } - // }; - // var b = { - // method1: function() { console.log('b1'); } - // }; - // delegateMethod(a, b, "method1"); - // delegateMethod(a, b, "method2"); - // a.method1(); - // a.method2(); - // - // The output would be "b1", "a2". - function delegateMethod(delegator, delegatee, methodName) { - var inherited = delegator[methodName]; - delegator[methodName] = function() { - var target = delegatee; - var method = delegatee[methodName]; - - // The method doesn't exist on the delegatee. Instead, - // call the method on the delegator, if it exists. - if (!method) { - target = delegator; - method = inherited; - } - - if (method) { - return method.apply(target, arguments); - } - }; - } - - // Implement a vague facsimilie of jQuery's data method - function elementData(el, name, value) { - if (arguments.length == 2) { - return el["htmlwidget_data_" + name]; - } else if (arguments.length == 3) { - el["htmlwidget_data_" + name] = value; - return el; - } else { - throw new Error("Wrong number of arguments for elementData: " + - arguments.length); - } - } - - // http://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex - function escapeRegExp(str) { - return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); - } - - function hasClass(el, className) { - var re = new RegExp("\\b" + escapeRegExp(className) + "\\b"); - return re.test(el.className); - } - - // elements - array (or array-like object) of HTML elements - // className - class name to test for - // include - if true, only return elements with given className; - // if false, only return elements *without* given className - function filterByClass(elements, className, include) { - var results = []; - for (var i = 0; i < elements.length; i++) { - if (hasClass(elements[i], className) == include) - results.push(elements[i]); - } - return results; - } - - function on(obj, eventName, func) { - if (obj.addEventListener) { - obj.addEventListener(eventName, func, false); - } else if (obj.attachEvent) { - obj.attachEvent(eventName, func); - } - } - - function off(obj, eventName, func) { - if (obj.removeEventListener) - obj.removeEventListener(eventName, func, false); - else if (obj.detachEvent) { - obj.detachEvent(eventName, func); - } - } - - // Translate array of values to top/right/bottom/left, as usual with - // the "padding" CSS property - // https://developer.mozilla.org/en-US/docs/Web/CSS/padding - function unpackPadding(value) { - if (typeof(value) === "number") - value = [value]; - if (value.length === 1) { - return {top: value[0], right: value[0], bottom: value[0], left: value[0]}; - } - if (value.length === 2) { - return {top: value[0], right: value[1], bottom: value[0], left: value[1]}; - } - if (value.length === 3) { - return {top: value[0], right: value[1], bottom: value[2], left: value[1]}; - } - if (value.length === 4) { - return {top: value[0], right: value[1], bottom: value[2], left: value[3]}; - } - } - - // Convert an unpacked padding object to a CSS value - function paddingToCss(paddingObj) { - return paddingObj.top + "px " + paddingObj.right + "px " + paddingObj.bottom + "px " + paddingObj.left + "px"; - } - - // Makes a number suitable for CSS - function px(x) { - if (typeof(x) === "number") - return x + "px"; - else - return x; - } - - // Retrieves runtime widget sizing information for an element. - // The return value is either null, or an object with fill, padding, - // defaultWidth, defaultHeight fields. - function sizingPolicy(el) { - var sizingEl = document.querySelector("script[data-for='" + el.id + "'][type='application/htmlwidget-sizing']"); - if (!sizingEl) - return null; - var sp = JSON.parse(sizingEl.textContent || sizingEl.text || "{}"); - if (viewerMode) { - return sp.viewer; - } else { - return sp.browser; - } - } - - // @param tasks Array of strings (or falsy value, in which case no-op). - // Each element must be a valid JavaScript expression that yields a - // function. Or, can be an array of objects with "code" and "data" - // properties; in this case, the "code" property should be a string - // of JS that's an expr that yields a function, and "data" should be - // an object that will be added as an additional argument when that - // function is called. - // @param target The object that will be "this" for each function - // execution. - // @param args Array of arguments to be passed to the functions. (The - // same arguments will be passed to all functions.) - function evalAndRun(tasks, target, args) { - if (tasks) { - forEach(tasks, function(task) { - var theseArgs = args; - if (typeof(task) === "object") { - theseArgs = theseArgs.concat([task.data]); - task = task.code; - } - var taskFunc = tryEval(task); - if (typeof(taskFunc) !== "function") { - throw new Error("Task must be a function! Source:\n" + task); - } - taskFunc.apply(target, theseArgs); - }); - } - } - - // Attempt eval() both with and without enclosing in parentheses. - // Note that enclosing coerces a function declaration into - // an expression that eval() can parse - // (otherwise, a SyntaxError is thrown) - function tryEval(code) { - var result = null; - try { - result = eval("(" + code + ")"); - } catch(error) { - if (!(error instanceof SyntaxError)) { - throw error; - } - try { - result = eval(code); - } catch(e) { - if (e instanceof SyntaxError) { - throw error; - } else { - throw e; - } - } - } - return result; - } - - function initSizing(el) { - var sizing = sizingPolicy(el); - if (!sizing) - return; - - var cel = document.getElementById("htmlwidget_container"); - if (!cel) - return; - - if (typeof(sizing.padding) !== "undefined") { - document.body.style.margin = "0"; - document.body.style.padding = paddingToCss(unpackPadding(sizing.padding)); - } - - if (sizing.fill) { - document.body.style.overflow = "hidden"; - document.body.style.width = "100%"; - document.body.style.height = "100%"; - document.documentElement.style.width = "100%"; - document.documentElement.style.height = "100%"; - if (cel) { - cel.style.position = "absolute"; - var pad = unpackPadding(sizing.padding); - cel.style.top = pad.top + "px"; - cel.style.right = pad.right + "px"; - cel.style.bottom = pad.bottom + "px"; - cel.style.left = pad.left + "px"; - el.style.width = "100%"; - el.style.height = "100%"; - } - - return { - getWidth: function() { return cel.offsetWidth; }, - getHeight: function() { return cel.offsetHeight; } - }; - - } else { - el.style.width = px(sizing.width); - el.style.height = px(sizing.height); - - return { - getWidth: function() { return el.offsetWidth; }, - getHeight: function() { return el.offsetHeight; } - }; - } - } - - // Default implementations for methods - var defaults = { - find: function(scope) { - return querySelectorAll(scope, "." + this.name); - }, - renderError: function(el, err) { - var $el = $(el); - - this.clearError(el); - - // Add all these error classes, as Shiny does - var errClass = "shiny-output-error"; - if (err.type !== null) { - // use the classes of the error condition as CSS class names - errClass = errClass + " " + $.map(asArray(err.type), function(type) { - return errClass + "-" + type; - }).join(" "); - } - errClass = errClass + " htmlwidgets-error"; - - // Is el inline or block? If inline or inline-block, just display:none it - // and add an inline error. - var display = $el.css("display"); - $el.data("restore-display-mode", display); - - if (display === "inline" || display === "inline-block") { - $el.hide(); - if (err.message !== "") { - var errorSpan = $("").addClass(errClass); - errorSpan.text(err.message); - $el.after(errorSpan); - } - } else if (display === "block") { - // If block, add an error just after the el, set visibility:none on the - // el, and position the error to be on top of the el. - // Mark it with a unique ID and CSS class so we can remove it later. - $el.css("visibility", "hidden"); - if (err.message !== "") { - var errorDiv = $("
    ").addClass(errClass).css("position", "absolute") - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - // setting width can push out the page size, forcing otherwise - // unnecessary scrollbars to appear and making it impossible for - // the element to shrink; so use max-width instead - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - errorDiv.text(err.message); - $el.after(errorDiv); - - // Really dumb way to keep the size/position of the error in sync with - // the parent element as the window is resized or whatever. - var intId = setInterval(function() { - if (!errorDiv[0].parentElement) { - clearInterval(intId); - return; - } - errorDiv - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - }, 500); - } - } - }, - clearError: function(el) { - var $el = $(el); - var display = $el.data("restore-display-mode"); - $el.data("restore-display-mode", null); - - if (display === "inline" || display === "inline-block") { - if (display) - $el.css("display", display); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } else if (display === "block"){ - $el.css("visibility", "inherit"); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } - }, - sizing: {} - }; - - // Called by widget bindings to register a new type of widget. The definition - // object can contain the following properties: - // - name (required) - A string indicating the binding name, which will be - // used by default as the CSS classname to look for. - // - initialize (optional) - A function(el) that will be called once per - // widget element; if a value is returned, it will be passed as the third - // value to renderValue. - // - renderValue (required) - A function(el, data, initValue) that will be - // called with data. Static contexts will cause this to be called once per - // element; Shiny apps will cause this to be called multiple times per - // element, as the data changes. - window.HTMLWidgets.widget = function(definition) { - if (!definition.name) { - throw new Error("Widget must have a name"); - } - if (!definition.type) { - throw new Error("Widget must have a type"); - } - // Currently we only support output widgets - if (definition.type !== "output") { - throw new Error("Unrecognized widget type '" + definition.type + "'"); - } - // TODO: Verify that .name is a valid CSS classname - - // Support new-style instance-bound definitions. Old-style class-bound - // definitions have one widget "object" per widget per type/class of - // widget; the renderValue and resize methods on such widget objects - // take el and instance arguments, because the widget object can't - // store them. New-style instance-bound definitions have one widget - // object per widget instance; the definition that's passed in doesn't - // provide renderValue or resize methods at all, just the single method - // factory(el, width, height) - // which returns an object that has renderValue(x) and resize(w, h). - // This enables a far more natural programming style for the widget - // author, who can store per-instance state using either OO-style - // instance fields or functional-style closure variables (I guess this - // is in contrast to what can only be called C-style pseudo-OO which is - // what we required before). - if (definition.factory) { - definition = createLegacyDefinitionAdapter(definition); - } - - if (!definition.renderValue) { - throw new Error("Widget must have a renderValue function"); - } - - // For static rendering (non-Shiny), use a simple widget registration - // scheme. We also use this scheme for Shiny apps/documents that also - // contain static widgets. - window.HTMLWidgets.widgets = window.HTMLWidgets.widgets || []; - // Merge defaults into the definition; don't mutate the original definition. - var staticBinding = extend({}, defaults, definition); - overrideMethod(staticBinding, "find", function(superfunc) { - return function(scope) { - var results = superfunc(scope); - // Filter out Shiny outputs, we only want the static kind - return filterByClass(results, "html-widget-output", false); - }; - }); - window.HTMLWidgets.widgets.push(staticBinding); - - if (shinyMode) { - // Shiny is running. Register the definition with an output binding. - // The definition itself will not be the output binding, instead - // we will make an output binding object that delegates to the - // definition. This is because we foolishly used the same method - // name (renderValue) for htmlwidgets definition and Shiny bindings - // but they actually have quite different semantics (the Shiny - // bindings receive data that includes lots of metadata that it - // strips off before calling htmlwidgets renderValue). We can't - // just ignore the difference because in some widgets it's helpful - // to call this.renderValue() from inside of resize(), and if - // we're not delegating, then that call will go to the Shiny - // version instead of the htmlwidgets version. - - // Merge defaults with definition, without mutating either. - var bindingDef = extend({}, defaults, definition); - - // This object will be our actual Shiny binding. - var shinyBinding = new Shiny.OutputBinding(); - - // With a few exceptions, we'll want to simply use the bindingDef's - // version of methods if they are available, otherwise fall back to - // Shiny's defaults. NOTE: If Shiny's output bindings gain additional - // methods in the future, and we want them to be overrideable by - // HTMLWidget binding definitions, then we'll need to add them to this - // list. - delegateMethod(shinyBinding, bindingDef, "getId"); - delegateMethod(shinyBinding, bindingDef, "onValueChange"); - delegateMethod(shinyBinding, bindingDef, "onValueError"); - delegateMethod(shinyBinding, bindingDef, "renderError"); - delegateMethod(shinyBinding, bindingDef, "clearError"); - delegateMethod(shinyBinding, bindingDef, "showProgress"); - - // The find, renderValue, and resize are handled differently, because we - // want to actually decorate the behavior of the bindingDef methods. - - shinyBinding.find = function(scope) { - var results = bindingDef.find(scope); - - // Only return elements that are Shiny outputs, not static ones - var dynamicResults = results.filter(".html-widget-output"); - - // It's possible that whatever caused Shiny to think there might be - // new dynamic outputs, also caused there to be new static outputs. - // Since there might be lots of different htmlwidgets bindings, we - // schedule execution for later--no need to staticRender multiple - // times. - if (results.length !== dynamicResults.length) - scheduleStaticRender(); - - return dynamicResults; - }; - - // Wrap renderValue to handle initialization, which unfortunately isn't - // supported natively by Shiny at the time of this writing. - - shinyBinding.renderValue = function(el, data) { - Shiny.renderDependencies(data.deps); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var i = 0; data.evals && i < data.evals.length; i++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[i]); - } - if (!bindingDef.renderOnNullValue) { - if (data.x === null) { - el.style.visibility = "hidden"; - return; - } else { - el.style.visibility = "inherit"; - } - } - if (!elementData(el, "initialized")) { - initSizing(el); - - elementData(el, "initialized", true); - if (bindingDef.initialize) { - var result = bindingDef.initialize(el, el.offsetWidth, - el.offsetHeight); - elementData(el, "init_result", result); - } - } - bindingDef.renderValue(el, data.x, elementData(el, "init_result")); - evalAndRun(data.jsHooks.render, elementData(el, "init_result"), [el, data.x]); - }; - - // Only override resize if bindingDef implements it - if (bindingDef.resize) { - shinyBinding.resize = function(el, width, height) { - // Shiny can call resize before initialize/renderValue have been - // called, which doesn't make sense for widgets. - if (elementData(el, "initialized")) { - bindingDef.resize(el, width, height, elementData(el, "init_result")); - } - }; - } - - Shiny.outputBindings.register(shinyBinding, bindingDef.name); - } - }; - - var scheduleStaticRenderTimerId = null; - function scheduleStaticRender() { - if (!scheduleStaticRenderTimerId) { - scheduleStaticRenderTimerId = setTimeout(function() { - scheduleStaticRenderTimerId = null; - window.HTMLWidgets.staticRender(); - }, 1); - } - } - - // Render static widgets after the document finishes loading - // Statically render all elements that are of this widget's class - window.HTMLWidgets.staticRender = function() { - var bindings = window.HTMLWidgets.widgets || []; - forEach(bindings, function(binding) { - var matches = binding.find(document.documentElement); - forEach(matches, function(el) { - var sizeObj = initSizing(el, binding); - - if (hasClass(el, "html-widget-static-bound")) - return; - el.className = el.className + " html-widget-static-bound"; - - var initResult; - if (binding.initialize) { - initResult = binding.initialize(el, - sizeObj ? sizeObj.getWidth() : el.offsetWidth, - sizeObj ? sizeObj.getHeight() : el.offsetHeight - ); - elementData(el, "init_result", initResult); - } - - if (binding.resize) { - var lastSize = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - var resizeHandler = function(e) { - var size = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - if (size.w === 0 && size.h === 0) - return; - if (size.w === lastSize.w && size.h === lastSize.h) - return; - lastSize = size; - binding.resize(el, size.w, size.h, initResult); - }; - - on(window, "resize", resizeHandler); - - // This is needed for cases where we're running in a Shiny - // app, but the widget itself is not a Shiny output, but - // rather a simple static widget. One example of this is - // an rmarkdown document that has runtime:shiny and widget - // that isn't in a render function. Shiny only knows to - // call resize handlers for Shiny outputs, not for static - // widgets, so we do it ourselves. - if (window.jQuery) { - window.jQuery(document).on( - "shown.htmlwidgets shown.bs.tab.htmlwidgets shown.bs.collapse.htmlwidgets", - resizeHandler - ); - window.jQuery(document).on( - "hidden.htmlwidgets hidden.bs.tab.htmlwidgets hidden.bs.collapse.htmlwidgets", - resizeHandler - ); - } - - // This is needed for the specific case of ioslides, which - // flips slides between display:none and display:block. - // Ideally we would not have to have ioslide-specific code - // here, but rather have ioslides raise a generic event, - // but the rmarkdown package just went to CRAN so the - // window to getting that fixed may be long. - if (window.addEventListener) { - // It's OK to limit this to window.addEventListener - // browsers because ioslides itself only supports - // such browsers. - on(document, "slideenter", resizeHandler); - on(document, "slideleave", resizeHandler); - } - } - - var scriptData = document.querySelector("script[data-for='" + el.id + "'][type='application/json']"); - if (scriptData) { - var data = JSON.parse(scriptData.textContent || scriptData.text); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var k = 0; data.evals && k < data.evals.length; k++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[k]); - } - binding.renderValue(el, data.x, initResult); - evalAndRun(data.jsHooks.render, initResult, [el, data.x]); - } - }); - }); - - invokePostRenderHandlers(); - } - - - function has_jQuery3() { - if (!window.jQuery) { - return false; - } - var $version = window.jQuery.fn.jquery; - var $major_version = parseInt($version.split(".")[0]); - return $major_version >= 3; - } - - /* - / Shiny 1.4 bumped jQuery from 1.x to 3.x which means jQuery's - / on-ready handler (i.e., $(fn)) is now asyncronous (i.e., it now - / really means $(setTimeout(fn)). - / https://jquery.com/upgrade-guide/3.0/#breaking-change-document-ready-handlers-are-now-asynchronous - / - / Since Shiny uses $() to schedule initShiny, shiny>=1.4 calls initShiny - / one tick later than it did before, which means staticRender() is - / called renderValue() earlier than (advanced) widget authors might be expecting. - / https://github.com/rstudio/shiny/issues/2630 - / - / For a concrete example, leaflet has some methods (e.g., updateBounds) - / which reference Shiny methods registered in initShiny (e.g., setInputValue). - / Since leaflet is privy to this life-cycle, it knows to use setTimeout() to - / delay execution of those methods (until Shiny methods are ready) - / https://github.com/rstudio/leaflet/blob/18ec981/javascript/src/index.js#L266-L268 - / - / Ideally widget authors wouldn't need to use this setTimeout() hack that - / leaflet uses to call Shiny methods on a staticRender(). In the long run, - / the logic initShiny should be broken up so that method registration happens - / right away, but binding happens later. - */ - function maybeStaticRenderLater() { - if (shinyMode && has_jQuery3()) { - window.jQuery(window.HTMLWidgets.staticRender); - } else { - window.HTMLWidgets.staticRender(); - } - } - - if (document.addEventListener) { - document.addEventListener("DOMContentLoaded", function() { - document.removeEventListener("DOMContentLoaded", arguments.callee, false); - maybeStaticRenderLater(); - }, false); - } else if (document.attachEvent) { - document.attachEvent("onreadystatechange", function() { - if (document.readyState === "complete") { - document.detachEvent("onreadystatechange", arguments.callee); - maybeStaticRenderLater(); - } - }); - } - - - window.HTMLWidgets.getAttachmentUrl = function(depname, key) { - // If no key, default to the first item - if (typeof(key) === "undefined") - key = 1; - - var link = document.getElementById(depname + "-" + key + "-attachment"); - if (!link) { - throw new Error("Attachment " + depname + "/" + key + " not found in document"); - } - return link.getAttribute("href"); - }; - - window.HTMLWidgets.dataframeToD3 = function(df) { - var names = []; - var length; - for (var name in df) { - if (df.hasOwnProperty(name)) - names.push(name); - if (typeof(df[name]) !== "object" || typeof(df[name].length) === "undefined") { - throw new Error("All fields must be arrays"); - } else if (typeof(length) !== "undefined" && length !== df[name].length) { - throw new Error("All fields must be arrays of the same length"); - } - length = df[name].length; - } - var results = []; - var item; - for (var row = 0; row < length; row++) { - item = {}; - for (var col = 0; col < names.length; col++) { - item[names[col]] = df[names[col]][row]; - } - results.push(item); - } - return results; - }; - - window.HTMLWidgets.transposeArray2D = function(array) { - if (array.length === 0) return array; - var newArray = array[0].map(function(col, i) { - return array.map(function(row) { - return row[i] - }) - }); - return newArray; - }; - // Split value at splitChar, but allow splitChar to be escaped - // using escapeChar. Any other characters escaped by escapeChar - // will be included as usual (including escapeChar itself). - function splitWithEscape(value, splitChar, escapeChar) { - var results = []; - var escapeMode = false; - var currentResult = ""; - for (var pos = 0; pos < value.length; pos++) { - if (!escapeMode) { - if (value[pos] === splitChar) { - results.push(currentResult); - currentResult = ""; - } else if (value[pos] === escapeChar) { - escapeMode = true; - } else { - currentResult += value[pos]; - } - } else { - currentResult += value[pos]; - escapeMode = false; - } - } - if (currentResult !== "") { - results.push(currentResult); - } - return results; - } - // Function authored by Yihui/JJ Allaire - window.HTMLWidgets.evaluateStringMember = function(o, member) { - var parts = splitWithEscape(member, '.', '\\'); - for (var i = 0, l = parts.length; i < l; i++) { - var part = parts[i]; - // part may be a character or 'numeric' member name - if (o !== null && typeof o === "object" && part in o) { - if (i == (l - 1)) { // if we are at the end of the line then evalulate - if (typeof o[part] === "string") - o[part] = tryEval(o[part]); - } else { // otherwise continue to next embedded object - o = o[part]; - } - } - } - }; - - // Retrieve the HTMLWidget instance (i.e. the return value of an - // HTMLWidget binding's initialize() or factory() function) - // associated with an element, or null if none. - window.HTMLWidgets.getInstance = function(el) { - return elementData(el, "init_result"); - }; - - // Finds the first element in the scope that matches the selector, - // and returns the HTMLWidget instance (i.e. the return value of - // an HTMLWidget binding's initialize() or factory() function) - // associated with that element, if any. If no element matches the - // selector, or the first matching element has no HTMLWidget - // instance associated with it, then null is returned. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.find = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var el = scope.querySelector(selector); - if (el === null) { - return null; - } else { - return window.HTMLWidgets.getInstance(el); - } - }; - - // Finds all elements in the scope that match the selector, and - // returns the HTMLWidget instances (i.e. the return values of - // an HTMLWidget binding's initialize() or factory() function) - // associated with the elements, in an array. If elements that - // match the selector don't have an associated HTMLWidget - // instance, the returned array will contain nulls. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.findAll = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var nodes = scope.querySelectorAll(selector); - var results = []; - for (var i = 0; i < nodes.length; i++) { - results.push(window.HTMLWidgets.getInstance(nodes[i])); - } - return results; - }; - - var postRenderHandlers = []; - function invokePostRenderHandlers() { - while (postRenderHandlers.length) { - var handler = postRenderHandlers.shift(); - if (handler) { - handler(); - } - } - } - - // Register the given callback function to be invoked after the - // next time static widgets are rendered. - window.HTMLWidgets.addPostRenderHandler = function(callback) { - postRenderHandlers.push(callback); - }; - - // Takes a new-style instance-bound definition, and returns an - // old-style class-bound definition. This saves us from having - // to rewrite all the logic in this file to accomodate both - // types of definitions. - function createLegacyDefinitionAdapter(defn) { - var result = { - name: defn.name, - type: defn.type, - initialize: function(el, width, height) { - return defn.factory(el, width, height); - }, - renderValue: function(el, x, instance) { - return instance.renderValue(x); - }, - resize: function(el, width, height, instance) { - return instance.resize(width, height); - } - }; - - if (defn.find) - result.find = defn.find; - if (defn.renderError) - result.renderError = defn.renderError; - if (defn.clearError) - result.clearError = defn.clearError; - - return result; - } -})(); - diff --git a/content/find/recipes/index.Rmd b/content/find/recipes/index.Rmd deleted file mode 100644 index 3ae9b84c..00000000 --- a/content/find/recipes/index.Rmd +++ /dev/null @@ -1,66 +0,0 @@ ---- -subtitle: Recipes -title: Search recipe steps -weight: 3 -description: | - Find recipe steps in the tidymodels framework to help you prep your data for modeling. ---- - -```{r ex_setup, include=FALSE} -knitr::opts_chunk$set( - message = FALSE, - digits = 3, - collapse = TRUE, - comment = "#>" - ) -options(digits = 3) -script <- here::here("static/code/get_pkgdown_urls.R") -source(script) -library(DT) -library(tibble) -pkgs <- - tibble(pkg = c("recipes", "embed", "textrecipes", "themis")) %>% - mutate(base_url = glue::glue("https://{pkg}.tidymodels.org/")) %>% - dplyr::bind_rows( - tibble( - pkg = "timetk", - base_url = "https://business-science.github.io/timetk/" - ) - ) - # MachineShop has step functions but pkgdown reference pages - # redirect to https://www.rdocumentation.org/ - # dplyr::bind_rows( - # tibble( - # pkg = "MachineShop", - # base_url = "https://brian-j-smith.github.io/" - # ) - # ) - # healthcareai has a number of step functions but they are not documented - # on their pkgdown site - # dplyr::bind_rows( - # tibble( - # pkg = "healthcareai", - # base_url = "https://docs.healthcare.ai/" - # ) - # ) - # customsteps package has no pkgdown site -``` - -To learn about the recipes package, see [*Get Started: Preprocess your data with recipes*](/start/recipes/). The table below allows you to search for recipe steps across tidymodels packages. - -```{r table-compute, include = FALSE} -pkg_urls <- get_pkgdown_urls(pkgs, fltr = "(^check_)|(^step_)") %>% - select(title, topic, package) %>% - mutate(package = as.factor(package)) -``` - -```{r table-display, echo = FALSE, results = "asis"} -DT::datatable( - pkg_urls, - rownames = FALSE, - class = 'cell-border stripe', - escape = FALSE, - filter = "top", - options = list(pageLength = 5) -) -``` diff --git a/content/find/recipes/index.html b/content/find/recipes/index.html deleted file mode 100644 index b61ab24d..00000000 --- a/content/find/recipes/index.html +++ /dev/null @@ -1,26 +0,0 @@ ---- -subtitle: Recipes -title: Search recipe steps -weight: 3 -description: | - Find recipe steps in the tidymodels framework to help you prep your data for modeling. ---- - - - - - - - - - - - - - - - - -

    To learn about the recipes package, see Get Started: Preprocess your data with recipes. The table below allows you to search for recipe steps across tidymodels packages.

    -
    - diff --git a/content/find/recipes/index_files/crosstalk/css/crosstalk.min.css b/content/find/recipes/index_files/crosstalk/css/crosstalk.min.css deleted file mode 100644 index 6b453828..00000000 --- a/content/find/recipes/index_files/crosstalk/css/crosstalk.min.css +++ /dev/null @@ -1 +0,0 @@ -.container-fluid.crosstalk-bscols{margin-left:-30px;margin-right:-30px;white-space:normal}body>.container-fluid.crosstalk-bscols{margin-left:auto;margin-right:auto}.crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column{display:inline-block;padding-right:12px;vertical-align:top}@media only screen and (max-width: 480px){.crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column{display:block;padding-right:inherit}}.crosstalk-input{margin-bottom:15px}.crosstalk-input .control-label{margin-bottom:0;vertical-align:middle}.crosstalk-input input[type="checkbox"]{margin:4px 0 0;margin-top:1px;line-height:normal}.crosstalk-input .checkbox{position:relative;display:block;margin-top:10px;margin-bottom:10px}.crosstalk-input .checkbox>label{padding-left:20px;margin-bottom:0;font-weight:400;cursor:pointer}.crosstalk-input .checkbox input[type="checkbox"],.crosstalk-input .checkbox-inline input[type="checkbox"]{position:absolute;margin-top:2px;margin-left:-20px}.crosstalk-input .checkbox+.checkbox{margin-top:-5px}.crosstalk-input .checkbox-inline{position:relative;display:inline-block;padding-left:20px;margin-bottom:0;font-weight:400;vertical-align:middle;cursor:pointer}.crosstalk-input .checkbox-inline+.checkbox-inline{margin-top:0;margin-left:10px} diff --git a/content/find/recipes/index_files/crosstalk/js/crosstalk.js b/content/find/recipes/index_files/crosstalk/js/crosstalk.js deleted file mode 100644 index fd9eb53d..00000000 --- a/content/find/recipes/index_files/crosstalk/js/crosstalk.js +++ /dev/null @@ -1,1474 +0,0 @@ -(function(){function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o b) { - return 1; - } -} - -/** - * @private - */ - -var FilterSet = function () { - function FilterSet() { - _classCallCheck(this, FilterSet); - - this.reset(); - } - - _createClass(FilterSet, [{ - key: "reset", - value: function reset() { - // Key: handle ID, Value: array of selected keys, or null - this._handles = {}; - // Key: key string, Value: count of handles that include it - this._keys = {}; - this._value = null; - this._activeHandles = 0; - } - }, { - key: "update", - value: function update(handleId, keys) { - if (keys !== null) { - keys = keys.slice(0); // clone before sorting - keys.sort(naturalComparator); - } - - var _diffSortedLists = (0, _util.diffSortedLists)(this._handles[handleId], keys), - added = _diffSortedLists.added, - removed = _diffSortedLists.removed; - - this._handles[handleId] = keys; - - for (var i = 0; i < added.length; i++) { - this._keys[added[i]] = (this._keys[added[i]] || 0) + 1; - } - for (var _i = 0; _i < removed.length; _i++) { - this._keys[removed[_i]]--; - } - - this._updateValue(keys); - } - - /** - * @param {string[]} keys Sorted array of strings that indicate - * a superset of possible keys. - * @private - */ - - }, { - key: "_updateValue", - value: function _updateValue() { - var keys = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : this._allKeys; - - var handleCount = Object.keys(this._handles).length; - if (handleCount === 0) { - this._value = null; - } else { - this._value = []; - for (var i = 0; i < keys.length; i++) { - var count = this._keys[keys[i]]; - if (count === handleCount) { - this._value.push(keys[i]); - } - } - } - } - }, { - key: "clear", - value: function clear(handleId) { - if (typeof this._handles[handleId] === "undefined") { - return; - } - - var keys = this._handles[handleId]; - if (!keys) { - keys = []; - } - - for (var i = 0; i < keys.length; i++) { - this._keys[keys[i]]--; - } - delete this._handles[handleId]; - - this._updateValue(); - } - }, { - key: "value", - get: function get() { - return this._value; - } - }, { - key: "_allKeys", - get: function get() { - var allKeys = Object.keys(this._keys); - allKeys.sort(naturalComparator); - return allKeys; - } - }]); - - return FilterSet; -}(); - -exports.default = FilterSet; - -},{"./util":11}],4:[function(require,module,exports){ -(function (global){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); - -var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); - -var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; - -exports.default = group; - -var _var2 = require("./var"); - -var _var3 = _interopRequireDefault(_var2); - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -// Use a global so that multiple copies of crosstalk.js can be loaded and still -// have groups behave as singletons across all copies. -global.__crosstalk_groups = global.__crosstalk_groups || {}; -var groups = global.__crosstalk_groups; - -function group(groupName) { - if (groupName && typeof groupName === "string") { - if (!groups.hasOwnProperty(groupName)) { - groups[groupName] = new Group(groupName); - } - return groups[groupName]; - } else if ((typeof groupName === "undefined" ? "undefined" : _typeof(groupName)) === "object" && groupName._vars && groupName.var) { - // Appears to already be a group object - return groupName; - } else if (Array.isArray(groupName) && groupName.length == 1 && typeof groupName[0] === "string") { - return group(groupName[0]); - } else { - throw new Error("Invalid groupName argument"); - } -} - -var Group = function () { - function Group(name) { - _classCallCheck(this, Group); - - this.name = name; - this._vars = {}; - } - - _createClass(Group, [{ - key: "var", - value: function _var(name) { - if (!name || typeof name !== "string") { - throw new Error("Invalid var name"); - } - - if (!this._vars.hasOwnProperty(name)) this._vars[name] = new _var3.default(this, name); - return this._vars[name]; - } - }, { - key: "has", - value: function has(name) { - if (!name || typeof name !== "string") { - throw new Error("Invalid var name"); - } - - return this._vars.hasOwnProperty(name); - } - }]); - - return Group; -}(); - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./var":12}],5:[function(require,module,exports){ -(function (global){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); - -var _group = require("./group"); - -var _group2 = _interopRequireDefault(_group); - -var _selection = require("./selection"); - -var _filter = require("./filter"); - -var _input = require("./input"); - -require("./input_selectize"); - -require("./input_checkboxgroup"); - -require("./input_slider"); - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -var defaultGroup = (0, _group2.default)("default"); - -function var_(name) { - return defaultGroup.var(name); -} - -function has(name) { - return defaultGroup.has(name); -} - -if (global.Shiny) { - global.Shiny.addCustomMessageHandler("update-client-value", function (message) { - if (typeof message.group === "string") { - (0, _group2.default)(message.group).var(message.name).set(message.value); - } else { - var_(message.name).set(message.value); - } - }); -} - -var crosstalk = { - group: _group2.default, - var: var_, - has: has, - SelectionHandle: _selection.SelectionHandle, - FilterHandle: _filter.FilterHandle, - bind: _input.bind -}; - -/** - * @namespace crosstalk - */ -exports.default = crosstalk; - -global.crosstalk = crosstalk; - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./filter":2,"./group":4,"./input":6,"./input_checkboxgroup":7,"./input_selectize":8,"./input_slider":9,"./selection":10}],6:[function(require,module,exports){ -(function (global){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); -exports.register = register; -exports.bind = bind; -var $ = global.jQuery; - -var bindings = {}; - -function register(reg) { - bindings[reg.className] = reg; - if (global.document && global.document.readyState !== "complete") { - $(function () { - bind(); - }); - } else if (global.document) { - setTimeout(bind, 100); - } -} - -function bind() { - Object.keys(bindings).forEach(function (className) { - var binding = bindings[className]; - $("." + binding.className).not(".crosstalk-input-bound").each(function (i, el) { - bindInstance(binding, el); - }); - }); -} - -// Escape jQuery identifier -function $escape(val) { - return val.replace(/([!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~])/g, "\\$1"); -} - -function bindEl(el) { - var $el = $(el); - Object.keys(bindings).forEach(function (className) { - if ($el.hasClass(className) && !$el.hasClass("crosstalk-input-bound")) { - var binding = bindings[className]; - bindInstance(binding, el); - } - }); -} - -function bindInstance(binding, el) { - var jsonEl = $(el).find("script[type='application/json'][data-for='" + $escape(el.id) + "']"); - var data = JSON.parse(jsonEl[0].innerText); - - var instance = binding.factory(el, data); - $(el).data("crosstalk-instance", instance); - $(el).addClass("crosstalk-input-bound"); -} - -if (global.Shiny) { - var inputBinding = new global.Shiny.InputBinding(); - var _$ = global.jQuery; - _$.extend(inputBinding, { - find: function find(scope) { - return _$(scope).find(".crosstalk-input"); - }, - initialize: function initialize(el) { - if (!_$(el).hasClass("crosstalk-input-bound")) { - bindEl(el); - } - }, - getId: function getId(el) { - return el.id; - }, - getValue: function getValue(el) {}, - setValue: function setValue(el, value) {}, - receiveMessage: function receiveMessage(el, data) {}, - subscribe: function subscribe(el, callback) { - _$(el).data("crosstalk-instance").resume(); - }, - unsubscribe: function unsubscribe(el) { - _$(el).data("crosstalk-instance").suspend(); - } - }); - global.Shiny.inputBindings.register(inputBinding, "crosstalk.inputBinding"); -} - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{}],7:[function(require,module,exports){ -(function (global){ -"use strict"; - -var _input = require("./input"); - -var input = _interopRequireWildcard(_input); - -var _filter = require("./filter"); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -var $ = global.jQuery; - -input.register({ - className: "crosstalk-input-checkboxgroup", - - factory: function factory(el, data) { - /* - * map: {"groupA": ["keyA", "keyB", ...], ...} - * group: "ct-groupname" - */ - var ctHandle = new _filter.FilterHandle(data.group); - - var lastKnownKeys = void 0; - var $el = $(el); - $el.on("change", "input[type='checkbox']", function () { - var checked = $el.find("input[type='checkbox']:checked"); - if (checked.length === 0) { - lastKnownKeys = null; - ctHandle.clear(); - } else { - var keys = {}; - checked.each(function () { - data.map[this.value].forEach(function (key) { - keys[key] = true; - }); - }); - var keyArray = Object.keys(keys); - keyArray.sort(); - lastKnownKeys = keyArray; - ctHandle.set(keyArray); - } - }); - - return { - suspend: function suspend() { - ctHandle.clear(); - }, - resume: function resume() { - if (lastKnownKeys) ctHandle.set(lastKnownKeys); - } - }; - } -}); - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./filter":2,"./input":6}],8:[function(require,module,exports){ -(function (global){ -"use strict"; - -var _input = require("./input"); - -var input = _interopRequireWildcard(_input); - -var _util = require("./util"); - -var util = _interopRequireWildcard(_util); - -var _filter = require("./filter"); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -var $ = global.jQuery; - -input.register({ - className: "crosstalk-input-select", - - factory: function factory(el, data) { - /* - * items: {value: [...], label: [...]} - * map: {"groupA": ["keyA", "keyB", ...], ...} - * group: "ct-groupname" - */ - - var first = [{ value: "", label: "(All)" }]; - var items = util.dataframeToD3(data.items); - var opts = { - options: first.concat(items), - valueField: "value", - labelField: "label", - searchField: "label" - }; - - var select = $(el).find("select")[0]; - - var selectize = $(select).selectize(opts)[0].selectize; - - var ctHandle = new _filter.FilterHandle(data.group); - - var lastKnownKeys = void 0; - selectize.on("change", function () { - if (selectize.items.length === 0) { - lastKnownKeys = null; - ctHandle.clear(); - } else { - var keys = {}; - selectize.items.forEach(function (group) { - data.map[group].forEach(function (key) { - keys[key] = true; - }); - }); - var keyArray = Object.keys(keys); - keyArray.sort(); - lastKnownKeys = keyArray; - ctHandle.set(keyArray); - } - }); - - return { - suspend: function suspend() { - ctHandle.clear(); - }, - resume: function resume() { - if (lastKnownKeys) ctHandle.set(lastKnownKeys); - } - }; - } -}); - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./filter":2,"./input":6,"./util":11}],9:[function(require,module,exports){ -(function (global){ -"use strict"; - -var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }(); - -var _input = require("./input"); - -var input = _interopRequireWildcard(_input); - -var _filter = require("./filter"); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -var $ = global.jQuery; -var strftime = global.strftime; - -input.register({ - className: "crosstalk-input-slider", - - factory: function factory(el, data) { - /* - * map: {"groupA": ["keyA", "keyB", ...], ...} - * group: "ct-groupname" - */ - var ctHandle = new _filter.FilterHandle(data.group); - - var opts = {}; - var $el = $(el).find("input"); - var dataType = $el.data("data-type"); - var timeFormat = $el.data("time-format"); - var round = $el.data("round"); - var timeFormatter = void 0; - - // Set up formatting functions - if (dataType === "date") { - timeFormatter = strftime.utc(); - opts.prettify = function (num) { - return timeFormatter(timeFormat, new Date(num)); - }; - } else if (dataType === "datetime") { - var timezone = $el.data("timezone"); - if (timezone) timeFormatter = strftime.timezone(timezone);else timeFormatter = strftime; - - opts.prettify = function (num) { - return timeFormatter(timeFormat, new Date(num)); - }; - } else if (dataType === "number") { - if (typeof round !== "undefined") opts.prettify = function (num) { - var factor = Math.pow(10, round); - return Math.round(num * factor) / factor; - }; - } - - $el.ionRangeSlider(opts); - - function getValue() { - var result = $el.data("ionRangeSlider").result; - - // Function for converting numeric value from slider to appropriate type. - var convert = void 0; - var dataType = $el.data("data-type"); - if (dataType === "date") { - convert = function convert(val) { - return formatDateUTC(new Date(+val)); - }; - } else if (dataType === "datetime") { - convert = function convert(val) { - // Convert ms to s - return +val / 1000; - }; - } else { - convert = function convert(val) { - return +val; - }; - } - - if ($el.data("ionRangeSlider").options.type === "double") { - return [convert(result.from), convert(result.to)]; - } else { - return convert(result.from); - } - } - - var lastKnownKeys = null; - - $el.on("change.crosstalkSliderInput", function (event) { - if (!$el.data("updating") && !$el.data("animating")) { - var _getValue = getValue(), - _getValue2 = _slicedToArray(_getValue, 2), - from = _getValue2[0], - to = _getValue2[1]; - - var keys = []; - for (var i = 0; i < data.values.length; i++) { - var val = data.values[i]; - if (val >= from && val <= to) { - keys.push(data.keys[i]); - } - } - keys.sort(); - ctHandle.set(keys); - lastKnownKeys = keys; - } - }); - - // let $el = $(el); - // $el.on("change", "input[type="checkbox"]", function() { - // let checked = $el.find("input[type="checkbox"]:checked"); - // if (checked.length === 0) { - // ctHandle.clear(); - // } else { - // let keys = {}; - // checked.each(function() { - // data.map[this.value].forEach(function(key) { - // keys[key] = true; - // }); - // }); - // let keyArray = Object.keys(keys); - // keyArray.sort(); - // ctHandle.set(keyArray); - // } - // }); - - return { - suspend: function suspend() { - ctHandle.clear(); - }, - resume: function resume() { - if (lastKnownKeys) ctHandle.set(lastKnownKeys); - } - }; - } -}); - -// Convert a number to a string with leading zeros -function padZeros(n, digits) { - var str = n.toString(); - while (str.length < digits) { - str = "0" + str; - }return str; -} - -// Given a Date object, return a string in yyyy-mm-dd format, using the -// UTC date. This may be a day off from the date in the local time zone. -function formatDateUTC(date) { - if (date instanceof Date) { - return date.getUTCFullYear() + "-" + padZeros(date.getUTCMonth() + 1, 2) + "-" + padZeros(date.getUTCDate(), 2); - } else { - return null; - } -} - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./filter":2,"./input":6}],10:[function(require,module,exports){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); -exports.SelectionHandle = undefined; - -var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); - -var _events = require("./events"); - -var _events2 = _interopRequireDefault(_events); - -var _group = require("./group"); - -var _group2 = _interopRequireDefault(_group); - -var _util = require("./util"); - -var util = _interopRequireWildcard(_util); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -/** - * Use this class to read and write (and listen for changes to) the selection - * for a Crosstalk group. This is intended to be used for linked brushing. - * - * If two (or more) `SelectionHandle` instances in the same webpage share the - * same group name, they will share the same state. Setting the selection using - * one `SelectionHandle` instance will result in the `value` property instantly - * changing across the others, and `"change"` event listeners on all instances - * (including the one that initiated the sending) will fire. - * - * @param {string} [group] - The name of the Crosstalk group, or if none, - * null or undefined (or any other falsy value). This can be changed later - * via the [SelectionHandle#setGroup](#setGroup) method. - * @param {Object} [extraInfo] - An object whose properties will be copied to - * the event object whenever an event is emitted. - */ -var SelectionHandle = exports.SelectionHandle = function () { - function SelectionHandle() { - var group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; - var extraInfo = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null; - - _classCallCheck(this, SelectionHandle); - - this._eventRelay = new _events2.default(); - this._emitter = new util.SubscriptionTracker(this._eventRelay); - - // Name of the group we're currently tracking, if any. Can change over time. - this._group = null; - // The Var we're currently tracking, if any. Can change over time. - this._var = null; - // The event handler subscription we currently have on var.on("change"). - this._varOnChangeSub = null; - - this._extraInfo = util.extend({ sender: this }, extraInfo); - - this.setGroup(group); - } - - /** - * Changes the Crosstalk group membership of this SelectionHandle. The group - * being switched away from (if any) will not have its selection value - * modified as a result of calling `setGroup`, even if this handle was the - * most recent handle to set the selection of the group. - * - * The group being switched to (if any) will also not have its selection value - * modified as a result of calling `setGroup`. If you want to set the - * selection value of the new group, call `set` explicitly. - * - * @param {string} group - The name of the Crosstalk group, or null (or - * undefined) to clear the group. - */ - - - _createClass(SelectionHandle, [{ - key: "setGroup", - value: function setGroup(group) { - var _this = this; - - // If group is unchanged, do nothing - if (this._group === group) return; - // Treat null, undefined, and other falsy values the same - if (!this._group && !group) return; - - if (this._var) { - this._var.off("change", this._varOnChangeSub); - this._var = null; - this._varOnChangeSub = null; - } - - this._group = group; - - if (group) { - this._var = (0, _group2.default)(group).var("selection"); - var sub = this._var.on("change", function (e) { - _this._eventRelay.trigger("change", e, _this); - }); - this._varOnChangeSub = sub; - } - } - - /** - * Retrieves the current selection for the group represented by this - * `SelectionHandle`. - * - * - If no selection is active, then this value will be falsy. - * - If a selection is active, but no data points are selected, then this - * value will be an empty array. - * - If a selection is active, and data points are selected, then the keys - * of the selected data points will be present in the array. - */ - - }, { - key: "_mergeExtraInfo", - - - /** - * Combines the given `extraInfo` (if any) with the handle's default - * `_extraInfo` (if any). - * @private - */ - value: function _mergeExtraInfo(extraInfo) { - // Important incidental effect: shallow clone is returned - return util.extend({}, this._extraInfo ? this._extraInfo : null, extraInfo ? extraInfo : null); - } - - /** - * Overwrites the current selection for the group, and raises the `"change"` - * event among all of the group's '`SelectionHandle` instances (including - * this one). - * - * @fires SelectionHandle#change - * @param {string[]} selectedKeys - Falsy, empty array, or array of keys (see - * {@link SelectionHandle#value}). - * @param {Object} [extraInfo] - Extra properties to be included on the event - * object that's passed to listeners (in addition to any options that were - * passed into the `SelectionHandle` constructor). - */ - - }, { - key: "set", - value: function set(selectedKeys, extraInfo) { - if (this._var) this._var.set(selectedKeys, this._mergeExtraInfo(extraInfo)); - } - - /** - * Overwrites the current selection for the group, and raises the `"change"` - * event among all of the group's '`SelectionHandle` instances (including - * this one). - * - * @fires SelectionHandle#change - * @param {Object} [extraInfo] - Extra properties to be included on the event - * object that's passed to listeners (in addition to any that were passed - * into the `SelectionHandle` constructor). - */ - - }, { - key: "clear", - value: function clear(extraInfo) { - if (this._var) this.set(void 0, this._mergeExtraInfo(extraInfo)); - } - - /** - * Subscribes to events on this `SelectionHandle`. - * - * @param {string} eventType - Indicates the type of events to listen to. - * Currently, only `"change"` is supported. - * @param {SelectionHandle~listener} listener - The callback function that - * will be invoked when the event occurs. - * @return {string} - A token to pass to {@link SelectionHandle#off} to cancel - * this subscription. - */ - - }, { - key: "on", - value: function on(eventType, listener) { - return this._emitter.on(eventType, listener); - } - - /** - * Cancels event subscriptions created by {@link SelectionHandle#on}. - * - * @param {string} eventType - The type of event to unsubscribe. - * @param {string|SelectionHandle~listener} listener - Either the callback - * function previously passed into {@link SelectionHandle#on}, or the - * string that was returned from {@link SelectionHandle#on}. - */ - - }, { - key: "off", - value: function off(eventType, listener) { - return this._emitter.off(eventType, listener); - } - - /** - * Shuts down the `SelectionHandle` object. - * - * Removes all event listeners that were added through this handle. - */ - - }, { - key: "close", - value: function close() { - this._emitter.removeAllListeners(); - this.setGroup(null); - } - }, { - key: "value", - get: function get() { - return this._var ? this._var.get() : null; - } - }]); - - return SelectionHandle; -}(); - -/** - * @callback SelectionHandle~listener - * @param {Object} event - An object containing details of the event. For - * `"change"` events, this includes the properties `value` (the new - * value of the selection, or `undefined` if no selection is active), - * `oldValue` (the previous value of the selection), and `sender` (the - * `SelectionHandle` instance that made the change). - */ - -/** - * @event SelectionHandle#change - * @type {object} - * @property {object} value - The new value of the selection, or `undefined` - * if no selection is active. - * @property {object} oldValue - The previous value of the selection. - * @property {SelectionHandle} sender - The `SelectionHandle` instance that - * changed the value. - */ - -},{"./events":1,"./group":4,"./util":11}],11:[function(require,module,exports){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); - -var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); - -var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; - -exports.extend = extend; -exports.checkSorted = checkSorted; -exports.diffSortedLists = diffSortedLists; -exports.dataframeToD3 = dataframeToD3; - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -function extend(target) { - for (var _len = arguments.length, sources = Array(_len > 1 ? _len - 1 : 0), _key = 1; _key < _len; _key++) { - sources[_key - 1] = arguments[_key]; - } - - for (var i = 0; i < sources.length; i++) { - var src = sources[i]; - if (typeof src === "undefined" || src === null) continue; - - for (var key in src) { - if (src.hasOwnProperty(key)) { - target[key] = src[key]; - } - } - } - return target; -} - -function checkSorted(list) { - for (var i = 1; i < list.length; i++) { - if (list[i] <= list[i - 1]) { - throw new Error("List is not sorted or contains duplicate"); - } - } -} - -function diffSortedLists(a, b) { - var i_a = 0; - var i_b = 0; - - if (!a) a = []; - if (!b) b = []; - - var a_only = []; - var b_only = []; - - checkSorted(a); - checkSorted(b); - - while (i_a < a.length && i_b < b.length) { - if (a[i_a] === b[i_b]) { - i_a++; - i_b++; - } else if (a[i_a] < b[i_b]) { - a_only.push(a[i_a++]); - } else { - b_only.push(b[i_b++]); - } - } - - if (i_a < a.length) a_only = a_only.concat(a.slice(i_a)); - if (i_b < b.length) b_only = b_only.concat(b.slice(i_b)); - return { - removed: a_only, - added: b_only - }; -} - -// Convert from wide: { colA: [1,2,3], colB: [4,5,6], ... } -// to long: [ {colA: 1, colB: 4}, {colA: 2, colB: 5}, ... ] -function dataframeToD3(df) { - var names = []; - var length = void 0; - for (var name in df) { - if (df.hasOwnProperty(name)) names.push(name); - if (_typeof(df[name]) !== "object" || typeof df[name].length === "undefined") { - throw new Error("All fields must be arrays"); - } else if (typeof length !== "undefined" && length !== df[name].length) { - throw new Error("All fields must be arrays of the same length"); - } - length = df[name].length; - } - var results = []; - var item = void 0; - for (var row = 0; row < length; row++) { - item = {}; - for (var col = 0; col < names.length; col++) { - item[names[col]] = df[names[col]][row]; - } - results.push(item); - } - return results; -} - -/** - * Keeps track of all event listener additions/removals and lets all active - * listeners be removed with a single operation. - * - * @private - */ - -var SubscriptionTracker = exports.SubscriptionTracker = function () { - function SubscriptionTracker(emitter) { - _classCallCheck(this, SubscriptionTracker); - - this._emitter = emitter; - this._subs = {}; - } - - _createClass(SubscriptionTracker, [{ - key: "on", - value: function on(eventType, listener) { - var sub = this._emitter.on(eventType, listener); - this._subs[sub] = eventType; - return sub; - } - }, { - key: "off", - value: function off(eventType, listener) { - var sub = this._emitter.off(eventType, listener); - if (sub) { - delete this._subs[sub]; - } - return sub; - } - }, { - key: "removeAllListeners", - value: function removeAllListeners() { - var _this = this; - - var current_subs = this._subs; - this._subs = {}; - Object.keys(current_subs).forEach(function (sub) { - _this._emitter.off(current_subs[sub], sub); - }); - } - }]); - - return SubscriptionTracker; -}(); - -},{}],12:[function(require,module,exports){ -(function (global){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); - -var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; - -var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); - -var _events = require("./events"); - -var _events2 = _interopRequireDefault(_events); - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -var Var = function () { - function Var(group, name, /*optional*/value) { - _classCallCheck(this, Var); - - this._group = group; - this._name = name; - this._value = value; - this._events = new _events2.default(); - } - - _createClass(Var, [{ - key: "get", - value: function get() { - return this._value; - } - }, { - key: "set", - value: function set(value, /*optional*/event) { - if (this._value === value) { - // Do nothing; the value hasn't changed - return; - } - var oldValue = this._value; - this._value = value; - // Alert JavaScript listeners that the value has changed - var evt = {}; - if (event && (typeof event === "undefined" ? "undefined" : _typeof(event)) === "object") { - for (var k in event) { - if (event.hasOwnProperty(k)) evt[k] = event[k]; - } - } - evt.oldValue = oldValue; - evt.value = value; - this._events.trigger("change", evt, this); - - // TODO: Make this extensible, to let arbitrary back-ends know that - // something has changed - if (global.Shiny && global.Shiny.onInputChange) { - global.Shiny.onInputChange(".clientValue-" + (this._group.name !== null ? this._group.name + "-" : "") + this._name, typeof value === "undefined" ? null : value); - } - } - }, { - key: "on", - value: function on(eventType, listener) { - return this._events.on(eventType, listener); - } - }, { - key: "off", - value: function off(eventType, listener) { - return this._events.off(eventType, listener); - } - }]); - - return Var; -}(); - -exports.default = Var; - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./events":1}]},{},[5]) -//# sourceMappingURL=crosstalk.js.map diff --git a/content/find/recipes/index_files/crosstalk/js/crosstalk.js.map b/content/find/recipes/index_files/crosstalk/js/crosstalk.js.map deleted file mode 100644 index cff94f08..00000000 --- a/content/find/recipes/index_files/crosstalk/js/crosstalk.js.map +++ /dev/null @@ -1,37 +0,0 @@ -{ - "version": 3, - "sources": [ - "node_modules/browser-pack/_prelude.js", - "javascript/src/events.js", - "javascript/src/filter.js", - "javascript/src/filterset.js", - "javascript/src/group.js", - "javascript/src/index.js", - "javascript/src/input.js", - "javascript/src/input_checkboxgroup.js", - "javascript/src/input_selectize.js", - "javascript/src/input_slider.js", - "javascript/src/selection.js", - "javascript/src/util.js", - "javascript/src/var.js" - ], - "names": [], - "mappings": "AAAA;;;;;;;;;;;ICAqB,M;AACnB,oBAAc;AAAA;;AACZ,SAAK,MAAL,GAAc,EAAd;AACA,SAAK,IAAL,GAAY,CAAZ;AACD;;;;uBAEE,S,EAAW,Q,EAAU;AACtB,UAAI,OAAO,KAAK,MAAL,CAAY,SAAZ,CAAX;AACA,UAAI,CAAC,IAAL,EAAW;AACT,eAAO,KAAK,MAAL,CAAY,SAAZ,IAAyB,EAAhC;AACD;AACD,UAAI,MAAM,QAAS,KAAK,IAAL,EAAnB;AACA,WAAK,GAAL,IAAY,QAAZ;AACA,aAAO,GAAP;AACD;;AAED;;;;wBACI,S,EAAW,Q,EAAU;AACvB,UAAI,OAAO,KAAK,MAAL,CAAY,SAAZ,CAAX;AACA,UAAI,OAAO,QAAP,KAAqB,UAAzB,EAAqC;AACnC,aAAK,IAAI,GAAT,IAAgB,IAAhB,EAAsB;AACpB,cAAI,KAAK,cAAL,CAAoB,GAApB,CAAJ,EAA8B;AAC5B,gBAAI,KAAK,GAAL,MAAc,QAAlB,EAA4B;AAC1B,qBAAO,KAAK,GAAL,CAAP;AACA,qBAAO,GAAP;AACD;AACF;AACF;AACD,eAAO,KAAP;AACD,OAVD,MAUO,IAAI,OAAO,QAAP,KAAqB,QAAzB,EAAmC;AACxC,YAAI,QAAQ,KAAK,QAAL,CAAZ,EAA4B;AAC1B,iBAAO,KAAK,QAAL,CAAP;AACA,iBAAO,QAAP;AACD;AACD,eAAO,KAAP;AACD,OANM,MAMA;AACL,cAAM,IAAI,KAAJ,CAAU,8BAAV,CAAN;AACD;AACF;;;4BAEO,S,EAAW,G,EAAK,O,EAAS;AAC/B,UAAI,OAAO,KAAK,MAAL,CAAY,SAAZ,CAAX;AACA,WAAK,IAAI,GAAT,IAAgB,IAAhB,EAAsB;AACpB,YAAI,KAAK,cAAL,CAAoB,GAApB,CAAJ,EAA8B;AAC5B,eAAK,GAAL,EAAU,IAAV,CAAe,OAAf,EAAwB,GAAxB;AACD;AACF;AACF;;;;;;kBA/CkB,M;;;;;;;;;;;;ACArB;;;;AACA;;;;AACA;;;;AACA;;IAAY,I;;;;;;;;AAEZ,SAAS,YAAT,CAAsB,KAAtB,EAA6B;AAC3B,MAAI,QAAQ,MAAM,GAAN,CAAU,WAAV,CAAZ;AACA,MAAI,SAAS,MAAM,GAAN,EAAb;AACA,MAAI,CAAC,MAAL,EAAa;AACX,aAAS,yBAAT;AACA,UAAM,GAAN,CAAU,MAAV;AACD;AACD,SAAO,MAAP;AACD;;AAED,IAAI,KAAK,CAAT;AACA,SAAS,MAAT,GAAkB;AAChB,SAAO,IAAP;AACD;;AAED;;;;;;;;;;;;;;;;;;;;;;;;;IAwBa,Y,WAAA,Y;AACX,wBAAY,KAAZ,EAAmB,SAAnB,EAA8B;AAAA;;AAC5B,SAAK,WAAL,GAAmB,sBAAnB;AACA,SAAK,QAAL,GAAgB,IAAI,KAAK,mBAAT,CAA6B,KAAK,WAAlC,CAAhB;;AAEA;AACA,SAAK,MAAL,GAAc,IAAd;AACA;AACA,SAAK,UAAL,GAAkB,IAAlB;AACA;AACA,SAAK,UAAL,GAAkB,IAAlB;AACA;AACA,SAAK,eAAL,GAAuB,IAAvB;;AAEA,SAAK,UAAL,GAAkB,KAAK,MAAL,CAAY,EAAE,QAAQ,IAAV,EAAZ,EAA8B,SAA9B,CAAlB;;AAEA,SAAK,GAAL,GAAW,WAAW,QAAtB;;AAEA,SAAK,QAAL,CAAc,KAAd;AACD;;AAED;;;;;;;;;;;;;;6BAUS,K,EAAO;AAAA;;AACd;AACA,UAAI,KAAK,MAAL,KAAgB,KAApB,EACE;AACF;AACA,UAAI,CAAC,KAAK,MAAN,IAAgB,CAAC,KAArB,EACE;;AAEF,UAAI,KAAK,UAAT,EAAqB;AACnB,aAAK,UAAL,CAAgB,GAAhB,CAAoB,QAApB,EAA8B,KAAK,eAAnC;AACA,aAAK,KAAL;AACA,aAAK,eAAL,GAAuB,IAAvB;AACA,aAAK,UAAL,GAAkB,IAAlB;AACA,aAAK,UAAL,GAAkB,IAAlB;AACD;;AAED,WAAK,MAAL,GAAc,KAAd;;AAEA,UAAI,KAAJ,EAAW;AACT,gBAAQ,qBAAI,KAAJ,CAAR;AACA,aAAK,UAAL,GAAkB,aAAa,KAAb,CAAlB;AACA,aAAK,UAAL,GAAkB,qBAAI,KAAJ,EAAW,GAAX,CAAe,QAAf,CAAlB;AACA,YAAI,MAAM,KAAK,UAAL,CAAgB,EAAhB,CAAmB,QAAnB,EAA6B,UAAC,CAAD,EAAO;AAC5C,gBAAK,WAAL,CAAiB,OAAjB,CAAyB,QAAzB,EAAmC,CAAnC;AACD,SAFS,CAAV;AAGA,aAAK,eAAL,GAAuB,GAAvB;AACD;AACF;;AAED;;;;;;;;oCAKgB,S,EAAW;AACzB,aAAO,KAAK,MAAL,CAAY,EAAZ,EACL,KAAK,UAAL,GAAkB,KAAK,UAAvB,GAAoC,IAD/B,EAEL,YAAY,SAAZ,GAAwB,IAFnB,CAAP;AAGD;;AAED;;;;;;;4BAIQ;AACN,WAAK,QAAL,CAAc,kBAAd;AACA,WAAK,KAAL;AACA,WAAK,QAAL,CAAc,IAAd;AACD;;AAED;;;;;;;;;;;;0BASM,S,EAAW;AACf,UAAI,CAAC,KAAK,UAAV,EACE;AACF,WAAK,UAAL,CAAgB,KAAhB,CAAsB,KAAK,GAA3B;AACA,WAAK,SAAL,CAAe,SAAf;AACD;;AAED;;;;;;;;;;;;;;;;;;;;wBAiBI,I,EAAM,S,EAAW;AACnB,UAAI,CAAC,KAAK,UAAV,EACE;AACF,WAAK,UAAL,CAAgB,MAAhB,CAAuB,KAAK,GAA5B,EAAiC,IAAjC;AACA,WAAK,SAAL,CAAe,SAAf;AACD;;AAED;;;;;;;;;;AASA;;;;;;;;;;uBAUG,S,EAAW,Q,EAAU;AACtB,aAAO,KAAK,QAAL,CAAc,EAAd,CAAiB,SAAjB,EAA4B,QAA5B,CAAP;AACD;;AAED;;;;;;;;;;;wBAQI,S,EAAW,Q,EAAU;AACvB,aAAO,KAAK,QAAL,CAAc,GAAd,CAAkB,SAAlB,EAA6B,QAA7B,CAAP;AACD;;;8BAES,S,EAAW;AACnB,UAAI,CAAC,KAAK,UAAV,EACE;AACF,WAAK,UAAL,CAAgB,GAAhB,CAAoB,KAAK,UAAL,CAAgB,KAApC,EAA2C,KAAK,eAAL,CAAqB,SAArB,CAA3C;AACD;;AAED;;;;;;;;;;;wBApCmB;AACjB,aAAO,KAAK,UAAL,GAAkB,KAAK,UAAL,CAAgB,KAAlC,GAA0C,IAAjD;AACD;;;;;;AA6CH;;;;;;;;;;;;;;;;;;;ACzNA;;;;AAEA,SAAS,iBAAT,CAA2B,CAA3B,EAA8B,CAA9B,EAAiC;AAC/B,MAAI,MAAM,CAAV,EAAa;AACX,WAAO,CAAP;AACD,GAFD,MAEO,IAAI,IAAI,CAAR,EAAW;AAChB,WAAO,CAAC,CAAR;AACD,GAFM,MAEA,IAAI,IAAI,CAAR,EAAW;AAChB,WAAO,CAAP;AACD;AACF;;AAED;;;;IAGqB,S;AACnB,uBAAc;AAAA;;AACZ,SAAK,KAAL;AACD;;;;4BAEO;AACN;AACA,WAAK,QAAL,GAAgB,EAAhB;AACA;AACA,WAAK,KAAL,GAAa,EAAb;AACA,WAAK,MAAL,GAAc,IAAd;AACA,WAAK,cAAL,GAAsB,CAAtB;AACD;;;2BAMM,Q,EAAU,I,EAAM;AACrB,UAAI,SAAS,IAAb,EAAmB;AACjB,eAAO,KAAK,KAAL,CAAW,CAAX,CAAP,CADiB,CACK;AACtB,aAAK,IAAL,CAAU,iBAAV;AACD;;AAJoB,6BAME,2BAAgB,KAAK,QAAL,CAAc,QAAd,CAAhB,EAAyC,IAAzC,CANF;AAAA,UAMhB,KANgB,oBAMhB,KANgB;AAAA,UAMT,OANS,oBAMT,OANS;;AAOrB,WAAK,QAAL,CAAc,QAAd,IAA0B,IAA1B;;AAEA,WAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,MAAM,MAA1B,EAAkC,GAAlC,EAAuC;AACrC,aAAK,KAAL,CAAW,MAAM,CAAN,CAAX,IAAuB,CAAC,KAAK,KAAL,CAAW,MAAM,CAAN,CAAX,KAAwB,CAAzB,IAA8B,CAArD;AACD;AACD,WAAK,IAAI,KAAI,CAAb,EAAgB,KAAI,QAAQ,MAA5B,EAAoC,IAApC,EAAyC;AACvC,aAAK,KAAL,CAAW,QAAQ,EAAR,CAAX;AACD;;AAED,WAAK,YAAL,CAAkB,IAAlB;AACD;;AAED;;;;;;;;mCAKmC;AAAA,UAAtB,IAAsB,uEAAf,KAAK,QAAU;;AACjC,UAAI,cAAc,OAAO,IAAP,CAAY,KAAK,QAAjB,EAA2B,MAA7C;AACA,UAAI,gBAAgB,CAApB,EAAuB;AACrB,aAAK,MAAL,GAAc,IAAd;AACD,OAFD,MAEO;AACL,aAAK,MAAL,GAAc,EAAd;AACA,aAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,KAAK,MAAzB,EAAiC,GAAjC,EAAsC;AACpC,cAAI,QAAQ,KAAK,KAAL,CAAW,KAAK,CAAL,CAAX,CAAZ;AACA,cAAI,UAAU,WAAd,EAA2B;AACzB,iBAAK,MAAL,CAAY,IAAZ,CAAiB,KAAK,CAAL,CAAjB;AACD;AACF;AACF;AACF;;;0BAEK,Q,EAAU;AACd,UAAI,OAAO,KAAK,QAAL,CAAc,QAAd,CAAP,KAAoC,WAAxC,EAAqD;AACnD;AACD;;AAED,UAAI,OAAO,KAAK,QAAL,CAAc,QAAd,CAAX;AACA,UAAI,CAAC,IAAL,EAAW;AACT,eAAO,EAAP;AACD;;AAED,WAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,KAAK,MAAzB,EAAiC,GAAjC,EAAsC;AACpC,aAAK,KAAL,CAAW,KAAK,CAAL,CAAX;AACD;AACD,aAAO,KAAK,QAAL,CAAc,QAAd,CAAP;;AAEA,WAAK,YAAL;AACD;;;wBA3DW;AACV,aAAO,KAAK,MAAZ;AACD;;;wBA2Dc;AACb,UAAI,UAAU,OAAO,IAAP,CAAY,KAAK,KAAjB,CAAd;AACA,cAAQ,IAAR,CAAa,iBAAb;AACA,aAAO,OAAP;AACD;;;;;;kBA/EkB,S;;;;;;;;;;;;;;kBCRG,K;;AAPxB;;;;;;;;AAEA;AACA;AACA,OAAO,kBAAP,GAA4B,OAAO,kBAAP,IAA6B,EAAzD;AACA,IAAI,SAAS,OAAO,kBAApB;;AAEe,SAAS,KAAT,CAAe,SAAf,EAA0B;AACvC,MAAI,aAAa,OAAO,SAAP,KAAsB,QAAvC,EAAiD;AAC/C,QAAI,CAAC,OAAO,cAAP,CAAsB,SAAtB,CAAL,EAAuC;AACrC,aAAO,SAAP,IAAoB,IAAI,KAAJ,CAAU,SAAV,CAApB;AACD;AACD,WAAO,OAAO,SAAP,CAAP;AACD,GALD,MAKO,IAAI,QAAO,SAAP,yCAAO,SAAP,OAAsB,QAAtB,IAAkC,UAAU,KAA5C,IAAqD,UAAU,GAAnE,EAAwE;AAC7E;AACA,WAAO,SAAP;AACD,GAHM,MAGA,IAAI,MAAM,OAAN,CAAc,SAAd,KACP,UAAU,MAAV,IAAoB,CADb,IAEP,OAAO,UAAU,CAAV,CAAP,KAAyB,QAFtB,EAEgC;AACrC,WAAO,MAAM,UAAU,CAAV,CAAN,CAAP;AACD,GAJM,MAIA;AACL,UAAM,IAAI,KAAJ,CAAU,4BAAV,CAAN;AACD;AACF;;IAEK,K;AACJ,iBAAY,IAAZ,EAAkB;AAAA;;AAChB,SAAK,IAAL,GAAY,IAAZ;AACA,SAAK,KAAL,GAAa,EAAb;AACD;;;;yBAEG,I,EAAM;AACR,UAAI,CAAC,IAAD,IAAS,OAAO,IAAP,KAAiB,QAA9B,EAAwC;AACtC,cAAM,IAAI,KAAJ,CAAU,kBAAV,CAAN;AACD;;AAED,UAAI,CAAC,KAAK,KAAL,CAAW,cAAX,CAA0B,IAA1B,CAAL,EACE,KAAK,KAAL,CAAW,IAAX,IAAmB,kBAAQ,IAAR,EAAc,IAAd,CAAnB;AACF,aAAO,KAAK,KAAL,CAAW,IAAX,CAAP;AACD;;;wBAEG,I,EAAM;AACR,UAAI,CAAC,IAAD,IAAS,OAAO,IAAP,KAAiB,QAA9B,EAAwC;AACtC,cAAM,IAAI,KAAJ,CAAU,kBAAV,CAAN;AACD;;AAED,aAAO,KAAK,KAAL,CAAW,cAAX,CAA0B,IAA1B,CAAP;AACD;;;;;;;;;;;;;;;;AC/CH;;;;AACA;;AACA;;AACA;;AACA;;AACA;;AACA;;;;AAEA,IAAM,eAAe,qBAAM,SAAN,CAArB;;AAEA,SAAS,IAAT,CAAc,IAAd,EAAoB;AAClB,SAAO,aAAa,GAAb,CAAiB,IAAjB,CAAP;AACD;;AAED,SAAS,GAAT,CAAa,IAAb,EAAmB;AACjB,SAAO,aAAa,GAAb,CAAiB,IAAjB,CAAP;AACD;;AAED,IAAI,OAAO,KAAX,EAAkB;AAChB,SAAO,KAAP,CAAa,uBAAb,CAAqC,qBAArC,EAA4D,UAAS,OAAT,EAAkB;AAC5E,QAAI,OAAO,QAAQ,KAAf,KAA0B,QAA9B,EAAwC;AACtC,2BAAM,QAAQ,KAAd,EAAqB,GAArB,CAAyB,QAAQ,IAAjC,EAAuC,GAAvC,CAA2C,QAAQ,KAAnD;AACD,KAFD,MAEO;AACL,WAAK,QAAQ,IAAb,EAAmB,GAAnB,CAAuB,QAAQ,KAA/B;AACD;AACF,GAND;AAOD;;AAED,IAAM,YAAY;AAChB,wBADgB;AAEhB,OAAK,IAFW;AAGhB,OAAK,GAHW;AAIhB,6CAJgB;AAKhB,oCALgB;AAMhB;AANgB,CAAlB;;AASA;;;kBAGe,S;;AACf,OAAO,SAAP,GAAmB,SAAnB;;;;;;;;;;;QCrCgB,Q,GAAA,Q;QAWA,I,GAAA,I;AAfhB,IAAI,IAAI,OAAO,MAAf;;AAEA,IAAI,WAAW,EAAf;;AAEO,SAAS,QAAT,CAAkB,GAAlB,EAAuB;AAC5B,WAAS,IAAI,SAAb,IAA0B,GAA1B;AACA,MAAI,OAAO,QAAP,IAAmB,OAAO,QAAP,CAAgB,UAAhB,KAA+B,UAAtD,EAAkE;AAChE,MAAE,YAAM;AACN;AACD,KAFD;AAGD,GAJD,MAIO,IAAI,OAAO,QAAX,EAAqB;AAC1B,eAAW,IAAX,EAAiB,GAAjB;AACD;AACF;;AAEM,SAAS,IAAT,GAAgB;AACrB,SAAO,IAAP,CAAY,QAAZ,EAAsB,OAAtB,CAA8B,UAAS,SAAT,EAAoB;AAChD,QAAI,UAAU,SAAS,SAAT,CAAd;AACA,MAAE,MAAM,QAAQ,SAAhB,EAA2B,GAA3B,CAA+B,wBAA/B,EAAyD,IAAzD,CAA8D,UAAS,CAAT,EAAY,EAAZ,EAAgB;AAC5E,mBAAa,OAAb,EAAsB,EAAtB;AACD,KAFD;AAGD,GALD;AAMD;;AAED;AACA,SAAS,OAAT,CAAiB,GAAjB,EAAsB;AACpB,SAAO,IAAI,OAAJ,CAAY,uCAAZ,EAAqD,MAArD,CAAP;AACD;;AAED,SAAS,MAAT,CAAgB,EAAhB,EAAoB;AAClB,MAAI,MAAM,EAAE,EAAF,CAAV;AACA,SAAO,IAAP,CAAY,QAAZ,EAAsB,OAAtB,CAA8B,UAAS,SAAT,EAAoB;AAChD,QAAI,IAAI,QAAJ,CAAa,SAAb,KAA2B,CAAC,IAAI,QAAJ,CAAa,uBAAb,CAAhC,EAAuE;AACrE,UAAI,UAAU,SAAS,SAAT,CAAd;AACA,mBAAa,OAAb,EAAsB,EAAtB;AACD;AACF,GALD;AAMD;;AAED,SAAS,YAAT,CAAsB,OAAtB,EAA+B,EAA/B,EAAmC;AACjC,MAAI,SAAS,EAAE,EAAF,EAAM,IAAN,CAAW,+CAA+C,QAAQ,GAAG,EAAX,CAA/C,GAAgE,IAA3E,CAAb;AACA,MAAI,OAAO,KAAK,KAAL,CAAW,OAAO,CAAP,EAAU,SAArB,CAAX;;AAEA,MAAI,WAAW,QAAQ,OAAR,CAAgB,EAAhB,EAAoB,IAApB,CAAf;AACA,IAAE,EAAF,EAAM,IAAN,CAAW,oBAAX,EAAiC,QAAjC;AACA,IAAE,EAAF,EAAM,QAAN,CAAe,uBAAf;AACD;;AAED,IAAI,OAAO,KAAX,EAAkB;AAChB,MAAI,eAAe,IAAI,OAAO,KAAP,CAAa,YAAjB,EAAnB;AACA,MAAI,KAAI,OAAO,MAAf;AACA,KAAE,MAAF,CAAS,YAAT,EAAuB;AACrB,UAAM,cAAS,KAAT,EAAgB;AACpB,aAAO,GAAE,KAAF,EAAS,IAAT,CAAc,kBAAd,CAAP;AACD,KAHoB;AAIrB,gBAAY,oBAAS,EAAT,EAAa;AACvB,UAAI,CAAC,GAAE,EAAF,EAAM,QAAN,CAAe,uBAAf,CAAL,EAA8C;AAC5C,eAAO,EAAP;AACD;AACF,KARoB;AASrB,WAAO,eAAS,EAAT,EAAa;AAClB,aAAO,GAAG,EAAV;AACD,KAXoB;AAYrB,cAAU,kBAAS,EAAT,EAAa,CAEtB,CAdoB;AAerB,cAAU,kBAAS,EAAT,EAAa,KAAb,EAAoB,CAE7B,CAjBoB;AAkBrB,oBAAgB,wBAAS,EAAT,EAAa,IAAb,EAAmB,CAElC,CApBoB;AAqBrB,eAAW,mBAAS,EAAT,EAAa,QAAb,EAAuB;AAChC,SAAE,EAAF,EAAM,IAAN,CAAW,oBAAX,EAAiC,MAAjC;AACD,KAvBoB;AAwBrB,iBAAa,qBAAS,EAAT,EAAa;AACxB,SAAE,EAAF,EAAM,IAAN,CAAW,oBAAX,EAAiC,OAAjC;AACD;AA1BoB,GAAvB;AA4BA,SAAO,KAAP,CAAa,aAAb,CAA2B,QAA3B,CAAoC,YAApC,EAAkD,wBAAlD;AACD;;;;;;;;AChFD;;IAAY,K;;AACZ;;;;AAEA,IAAI,IAAI,OAAO,MAAf;;AAEA,MAAM,QAAN,CAAe;AACb,aAAW,+BADE;;AAGb,WAAS,iBAAS,EAAT,EAAa,IAAb,EAAmB;AAC1B;;;;AAIA,QAAI,WAAW,yBAAiB,KAAK,KAAtB,CAAf;;AAEA,QAAI,sBAAJ;AACA,QAAI,MAAM,EAAE,EAAF,CAAV;AACA,QAAI,EAAJ,CAAO,QAAP,EAAiB,wBAAjB,EAA2C,YAAW;AACpD,UAAI,UAAU,IAAI,IAAJ,CAAS,gCAAT,CAAd;AACA,UAAI,QAAQ,MAAR,KAAmB,CAAvB,EAA0B;AACxB,wBAAgB,IAAhB;AACA,iBAAS,KAAT;AACD,OAHD,MAGO;AACL,YAAI,OAAO,EAAX;AACA,gBAAQ,IAAR,CAAa,YAAW;AACtB,eAAK,GAAL,CAAS,KAAK,KAAd,EAAqB,OAArB,CAA6B,UAAS,GAAT,EAAc;AACzC,iBAAK,GAAL,IAAY,IAAZ;AACD,WAFD;AAGD,SAJD;AAKA,YAAI,WAAW,OAAO,IAAP,CAAY,IAAZ,CAAf;AACA,iBAAS,IAAT;AACA,wBAAgB,QAAhB;AACA,iBAAS,GAAT,CAAa,QAAb;AACD;AACF,KAjBD;;AAmBA,WAAO;AACL,eAAS,mBAAW;AAClB,iBAAS,KAAT;AACD,OAHI;AAIL,cAAQ,kBAAW;AACjB,YAAI,aAAJ,EACE,SAAS,GAAT,CAAa,aAAb;AACH;AAPI,KAAP;AASD;AAxCY,CAAf;;;;;;;;ACLA;;IAAY,K;;AACZ;;IAAY,I;;AACZ;;;;AAEA,IAAI,IAAI,OAAO,MAAf;;AAEA,MAAM,QAAN,CAAe;AACb,aAAW,wBADE;;AAGb,WAAS,iBAAS,EAAT,EAAa,IAAb,EAAmB;AAC1B;;;;;;AAMA,QAAI,QAAQ,CAAC,EAAC,OAAO,EAAR,EAAY,OAAO,OAAnB,EAAD,CAAZ;AACA,QAAI,QAAQ,KAAK,aAAL,CAAmB,KAAK,KAAxB,CAAZ;AACA,QAAI,OAAO;AACT,eAAS,MAAM,MAAN,CAAa,KAAb,CADA;AAET,kBAAY,OAFH;AAGT,kBAAY,OAHH;AAIT,mBAAa;AAJJ,KAAX;;AAOA,QAAI,SAAS,EAAE,EAAF,EAAM,IAAN,CAAW,QAAX,EAAqB,CAArB,CAAb;;AAEA,QAAI,YAAY,EAAE,MAAF,EAAU,SAAV,CAAoB,IAApB,EAA0B,CAA1B,EAA6B,SAA7C;;AAEA,QAAI,WAAW,yBAAiB,KAAK,KAAtB,CAAf;;AAEA,QAAI,sBAAJ;AACA,cAAU,EAAV,CAAa,QAAb,EAAuB,YAAW;AAChC,UAAI,UAAU,KAAV,CAAgB,MAAhB,KAA2B,CAA/B,EAAkC;AAChC,wBAAgB,IAAhB;AACA,iBAAS,KAAT;AACD,OAHD,MAGO;AACL,YAAI,OAAO,EAAX;AACA,kBAAU,KAAV,CAAgB,OAAhB,CAAwB,UAAS,KAAT,EAAgB;AACtC,eAAK,GAAL,CAAS,KAAT,EAAgB,OAAhB,CAAwB,UAAS,GAAT,EAAc;AACpC,iBAAK,GAAL,IAAY,IAAZ;AACD,WAFD;AAGD,SAJD;AAKA,YAAI,WAAW,OAAO,IAAP,CAAY,IAAZ,CAAf;AACA,iBAAS,IAAT;AACA,wBAAgB,QAAhB;AACA,iBAAS,GAAT,CAAa,QAAb;AACD;AACF,KAhBD;;AAkBA,WAAO;AACL,eAAS,mBAAW;AAClB,iBAAS,KAAT;AACD,OAHI;AAIL,cAAQ,kBAAW;AACjB,YAAI,aAAJ,EACE,SAAS,GAAT,CAAa,aAAb;AACH;AAPI,KAAP;AASD;AArDY,CAAf;;;;;;;;;;ACNA;;IAAY,K;;AACZ;;;;AAEA,IAAI,IAAI,OAAO,MAAf;AACA,IAAI,WAAW,OAAO,QAAtB;;AAEA,MAAM,QAAN,CAAe;AACb,aAAW,wBADE;;AAGb,WAAS,iBAAS,EAAT,EAAa,IAAb,EAAmB;AAC1B;;;;AAIA,QAAI,WAAW,yBAAiB,KAAK,KAAtB,CAAf;;AAEA,QAAI,OAAO,EAAX;AACA,QAAI,MAAM,EAAE,EAAF,EAAM,IAAN,CAAW,OAAX,CAAV;AACA,QAAI,WAAW,IAAI,IAAJ,CAAS,WAAT,CAAf;AACA,QAAI,aAAa,IAAI,IAAJ,CAAS,aAAT,CAAjB;AACA,QAAI,QAAQ,IAAI,IAAJ,CAAS,OAAT,CAAZ;AACA,QAAI,sBAAJ;;AAEA;AACA,QAAI,aAAa,MAAjB,EAAyB;AACvB,sBAAgB,SAAS,GAAT,EAAhB;AACA,WAAK,QAAL,GAAgB,UAAS,GAAT,EAAc;AAC5B,eAAO,cAAc,UAAd,EAA0B,IAAI,IAAJ,CAAS,GAAT,CAA1B,CAAP;AACD,OAFD;AAID,KAND,MAMO,IAAI,aAAa,UAAjB,EAA6B;AAClC,UAAI,WAAW,IAAI,IAAJ,CAAS,UAAT,CAAf;AACA,UAAI,QAAJ,EACE,gBAAgB,SAAS,QAAT,CAAkB,QAAlB,CAAhB,CADF,KAGE,gBAAgB,QAAhB;;AAEF,WAAK,QAAL,GAAgB,UAAS,GAAT,EAAc;AAC5B,eAAO,cAAc,UAAd,EAA0B,IAAI,IAAJ,CAAS,GAAT,CAA1B,CAAP;AACD,OAFD;AAGD,KAVM,MAUA,IAAI,aAAa,QAAjB,EAA2B;AAChC,UAAI,OAAO,KAAP,KAAiB,WAArB,EACE,KAAK,QAAL,GAAgB,UAAS,GAAT,EAAc;AAC5B,YAAI,SAAS,KAAK,GAAL,CAAS,EAAT,EAAa,KAAb,CAAb;AACA,eAAO,KAAK,KAAL,CAAW,MAAM,MAAjB,IAA2B,MAAlC;AACD,OAHD;AAIH;;AAED,QAAI,cAAJ,CAAmB,IAAnB;;AAEA,aAAS,QAAT,GAAoB;AAClB,UAAI,SAAS,IAAI,IAAJ,CAAS,gBAAT,EAA2B,MAAxC;;AAEA;AACA,UAAI,gBAAJ;AACA,UAAI,WAAW,IAAI,IAAJ,CAAS,WAAT,CAAf;AACA,UAAI,aAAa,MAAjB,EAAyB;AACvB,kBAAU,iBAAS,GAAT,EAAc;AACtB,iBAAO,cAAc,IAAI,IAAJ,CAAS,CAAC,GAAV,CAAd,CAAP;AACD,SAFD;AAGD,OAJD,MAIO,IAAI,aAAa,UAAjB,EAA6B;AAClC,kBAAU,iBAAS,GAAT,EAAc;AACtB;AACA,iBAAO,CAAC,GAAD,GAAO,IAAd;AACD,SAHD;AAID,OALM,MAKA;AACL,kBAAU,iBAAS,GAAT,EAAc;AAAE,iBAAO,CAAC,GAAR;AAAc,SAAxC;AACD;;AAED,UAAI,IAAI,IAAJ,CAAS,gBAAT,EAA2B,OAA3B,CAAmC,IAAnC,KAA4C,QAAhD,EAA0D;AACxD,eAAO,CAAC,QAAQ,OAAO,IAAf,CAAD,EAAuB,QAAQ,OAAO,EAAf,CAAvB,CAAP;AACD,OAFD,MAEO;AACL,eAAO,QAAQ,OAAO,IAAf,CAAP;AACD;AACF;;AAED,QAAI,gBAAgB,IAApB;;AAEA,QAAI,EAAJ,CAAO,6BAAP,EAAsC,UAAS,KAAT,EAAgB;AACpD,UAAI,CAAC,IAAI,IAAJ,CAAS,UAAT,CAAD,IAAyB,CAAC,IAAI,IAAJ,CAAS,WAAT,CAA9B,EAAqD;AAAA,wBAClC,UADkC;AAAA;AAAA,YAC9C,IAD8C;AAAA,YACxC,EADwC;;AAEnD,YAAI,OAAO,EAAX;AACA,aAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,KAAK,MAAL,CAAY,MAAhC,EAAwC,GAAxC,EAA6C;AAC3C,cAAI,MAAM,KAAK,MAAL,CAAY,CAAZ,CAAV;AACA,cAAI,OAAO,IAAP,IAAe,OAAO,EAA1B,EAA8B;AAC5B,iBAAK,IAAL,CAAU,KAAK,IAAL,CAAU,CAAV,CAAV;AACD;AACF;AACD,aAAK,IAAL;AACA,iBAAS,GAAT,CAAa,IAAb;AACA,wBAAgB,IAAhB;AACD;AACF,KAdD;;AAiBA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAEA,WAAO;AACL,eAAS,mBAAW;AAClB,iBAAS,KAAT;AACD,OAHI;AAIL,cAAQ,kBAAW;AACjB,YAAI,aAAJ,EACE,SAAS,GAAT,CAAa,aAAb;AACH;AAPI,KAAP;AASD;AApHY,CAAf;;AAwHA;AACA,SAAS,QAAT,CAAkB,CAAlB,EAAqB,MAArB,EAA6B;AAC3B,MAAI,MAAM,EAAE,QAAF,EAAV;AACA,SAAO,IAAI,MAAJ,GAAa,MAApB;AACE,UAAM,MAAM,GAAZ;AADF,GAEA,OAAO,GAAP;AACD;;AAED;AACA;AACA,SAAS,aAAT,CAAuB,IAAvB,EAA6B;AAC3B,MAAI,gBAAgB,IAApB,EAA0B;AACxB,WAAO,KAAK,cAAL,KAAwB,GAAxB,GACA,SAAS,KAAK,WAAL,KAAmB,CAA5B,EAA+B,CAA/B,CADA,GACoC,GADpC,GAEA,SAAS,KAAK,UAAL,EAAT,EAA4B,CAA5B,CAFP;AAID,GALD,MAKO;AACL,WAAO,IAAP;AACD;AACF;;;;;;;;;;;;;;ACjJD;;;;AACA;;;;AACA;;IAAY,I;;;;;;;;AAEZ;;;;;;;;;;;;;;;;IAgBa,e,WAAA,e;AAEX,6BAA4C;AAAA,QAAhC,KAAgC,uEAAxB,IAAwB;AAAA,QAAlB,SAAkB,uEAAN,IAAM;;AAAA;;AAC1C,SAAK,WAAL,GAAmB,sBAAnB;AACA,SAAK,QAAL,GAAgB,IAAI,KAAK,mBAAT,CAA6B,KAAK,WAAlC,CAAhB;;AAEA;AACA,SAAK,MAAL,GAAc,IAAd;AACA;AACA,SAAK,IAAL,GAAY,IAAZ;AACA;AACA,SAAK,eAAL,GAAuB,IAAvB;;AAEA,SAAK,UAAL,GAAkB,KAAK,MAAL,CAAY,EAAE,QAAQ,IAAV,EAAZ,EAA8B,SAA9B,CAAlB;;AAEA,SAAK,QAAL,CAAc,KAAd;AACD;;AAED;;;;;;;;;;;;;;;;;6BAaS,K,EAAO;AAAA;;AACd;AACA,UAAI,KAAK,MAAL,KAAgB,KAApB,EACE;AACF;AACA,UAAI,CAAC,KAAK,MAAN,IAAgB,CAAC,KAArB,EACE;;AAEF,UAAI,KAAK,IAAT,EAAe;AACb,aAAK,IAAL,CAAU,GAAV,CAAc,QAAd,EAAwB,KAAK,eAA7B;AACA,aAAK,IAAL,GAAY,IAAZ;AACA,aAAK,eAAL,GAAuB,IAAvB;AACD;;AAED,WAAK,MAAL,GAAc,KAAd;;AAEA,UAAI,KAAJ,EAAW;AACT,aAAK,IAAL,GAAY,qBAAI,KAAJ,EAAW,GAAX,CAAe,WAAf,CAAZ;AACA,YAAI,MAAM,KAAK,IAAL,CAAU,EAAV,CAAa,QAAb,EAAuB,UAAC,CAAD,EAAO;AACtC,gBAAK,WAAL,CAAiB,OAAjB,CAAyB,QAAzB,EAAmC,CAAnC;AACD,SAFS,CAAV;AAGA,aAAK,eAAL,GAAuB,GAAvB;AACD;AACF;;AAED;;;;;;;;;;;;;;;AAcA;;;;;oCAKgB,S,EAAW;AACzB;AACA,aAAO,KAAK,MAAL,CAAY,EAAZ,EACL,KAAK,UAAL,GAAkB,KAAK,UAAvB,GAAoC,IAD/B,EAEL,YAAY,SAAZ,GAAwB,IAFnB,CAAP;AAGD;;AAED;;;;;;;;;;;;;;;wBAYI,Y,EAAc,S,EAAW;AAC3B,UAAI,KAAK,IAAT,EACE,KAAK,IAAL,CAAU,GAAV,CAAc,YAAd,EAA4B,KAAK,eAAL,CAAqB,SAArB,CAA5B;AACH;;AAED;;;;;;;;;;;;;0BAUM,S,EAAW;AACf,UAAI,KAAK,IAAT,EACE,KAAK,GAAL,CAAS,KAAK,CAAd,EAAiB,KAAK,eAAL,CAAqB,SAArB,CAAjB;AACH;;AAED;;;;;;;;;;;;;uBAUG,S,EAAW,Q,EAAU;AACtB,aAAO,KAAK,QAAL,CAAc,EAAd,CAAiB,SAAjB,EAA4B,QAA5B,CAAP;AACD;;AAED;;;;;;;;;;;wBAQI,S,EAAW,Q,EAAU;AACvB,aAAO,KAAK,QAAL,CAAc,GAAd,CAAkB,SAAlB,EAA6B,QAA7B,CAAP;AACD;;AAED;;;;;;;;4BAKQ;AACN,WAAK,QAAL,CAAc,kBAAd;AACA,WAAK,QAAL,CAAc,IAAd;AACD;;;wBAlFW;AACV,aAAO,KAAK,IAAL,GAAY,KAAK,IAAL,CAAU,GAAV,EAAZ,GAA8B,IAArC;AACD;;;;;;AAmFH;;;;;;;;;AASA;;;;;;;;;;;;;;;;;;;;;QCpLgB,M,GAAA,M;QAeA,W,GAAA,W;QAQA,e,GAAA,e;QAoCA,a,GAAA,a;;;;AA3DT,SAAS,MAAT,CAAgB,MAAhB,EAAoC;AAAA,oCAAT,OAAS;AAAT,WAAS;AAAA;;AACzC,OAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,QAAQ,MAA5B,EAAoC,GAApC,EAAyC;AACvC,QAAI,MAAM,QAAQ,CAAR,CAAV;AACA,QAAI,OAAO,GAAP,KAAgB,WAAhB,IAA+B,QAAQ,IAA3C,EACE;;AAEF,SAAK,IAAI,GAAT,IAAgB,GAAhB,EAAqB;AACnB,UAAI,IAAI,cAAJ,CAAmB,GAAnB,CAAJ,EAA6B;AAC3B,eAAO,GAAP,IAAc,IAAI,GAAJ,CAAd;AACD;AACF;AACF;AACD,SAAO,MAAP;AACD;;AAEM,SAAS,WAAT,CAAqB,IAArB,EAA2B;AAChC,OAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,KAAK,MAAzB,EAAiC,GAAjC,EAAsC;AACpC,QAAI,KAAK,CAAL,KAAW,KAAK,IAAE,CAAP,CAAf,EAA0B;AACxB,YAAM,IAAI,KAAJ,CAAU,0CAAV,CAAN;AACD;AACF;AACF;;AAEM,SAAS,eAAT,CAAyB,CAAzB,EAA4B,CAA5B,EAA+B;AACpC,MAAI,MAAM,CAAV;AACA,MAAI,MAAM,CAAV;;AAEA,MAAI,CAAC,CAAL,EAAQ,IAAI,EAAJ;AACR,MAAI,CAAC,CAAL,EAAQ,IAAI,EAAJ;;AAER,MAAI,SAAS,EAAb;AACA,MAAI,SAAS,EAAb;;AAEA,cAAY,CAAZ;AACA,cAAY,CAAZ;;AAEA,SAAO,MAAM,EAAE,MAAR,IAAkB,MAAM,EAAE,MAAjC,EAAyC;AACvC,QAAI,EAAE,GAAF,MAAW,EAAE,GAAF,CAAf,EAAuB;AACrB;AACA;AACD,KAHD,MAGO,IAAI,EAAE,GAAF,IAAS,EAAE,GAAF,CAAb,EAAqB;AAC1B,aAAO,IAAP,CAAY,EAAE,KAAF,CAAZ;AACD,KAFM,MAEA;AACL,aAAO,IAAP,CAAY,EAAE,KAAF,CAAZ;AACD;AACF;;AAED,MAAI,MAAM,EAAE,MAAZ,EACE,SAAS,OAAO,MAAP,CAAc,EAAE,KAAF,CAAQ,GAAR,CAAd,CAAT;AACF,MAAI,MAAM,EAAE,MAAZ,EACE,SAAS,OAAO,MAAP,CAAc,EAAE,KAAF,CAAQ,GAAR,CAAd,CAAT;AACF,SAAO;AACL,aAAS,MADJ;AAEL,WAAO;AAFF,GAAP;AAID;;AAED;AACA;AACO,SAAS,aAAT,CAAuB,EAAvB,EAA2B;AAChC,MAAI,QAAQ,EAAZ;AACA,MAAI,eAAJ;AACA,OAAK,IAAI,IAAT,IAAiB,EAAjB,EAAqB;AACnB,QAAI,GAAG,cAAH,CAAkB,IAAlB,CAAJ,EACE,MAAM,IAAN,CAAW,IAAX;AACF,QAAI,QAAO,GAAG,IAAH,CAAP,MAAqB,QAArB,IAAiC,OAAO,GAAG,IAAH,EAAS,MAAhB,KAA4B,WAAjE,EAA8E;AAC5E,YAAM,IAAI,KAAJ,CAAU,2BAAV,CAAN;AACD,KAFD,MAEO,IAAI,OAAO,MAAP,KAAmB,WAAnB,IAAkC,WAAW,GAAG,IAAH,EAAS,MAA1D,EAAkE;AACvE,YAAM,IAAI,KAAJ,CAAU,8CAAV,CAAN;AACD;AACD,aAAS,GAAG,IAAH,EAAS,MAAlB;AACD;AACD,MAAI,UAAU,EAAd;AACA,MAAI,aAAJ;AACA,OAAK,IAAI,MAAM,CAAf,EAAkB,MAAM,MAAxB,EAAgC,KAAhC,EAAuC;AACrC,WAAO,EAAP;AACA,SAAK,IAAI,MAAM,CAAf,EAAkB,MAAM,MAAM,MAA9B,EAAsC,KAAtC,EAA6C;AAC3C,WAAK,MAAM,GAAN,CAAL,IAAmB,GAAG,MAAM,GAAN,CAAH,EAAe,GAAf,CAAnB;AACD;AACD,YAAQ,IAAR,CAAa,IAAb;AACD;AACD,SAAO,OAAP;AACD;;AAED;;;;;;;IAMa,mB,WAAA,mB;AACX,+BAAY,OAAZ,EAAqB;AAAA;;AACnB,SAAK,QAAL,GAAgB,OAAhB;AACA,SAAK,KAAL,GAAa,EAAb;AACD;;;;uBAEE,S,EAAW,Q,EAAU;AACtB,UAAI,MAAM,KAAK,QAAL,CAAc,EAAd,CAAiB,SAAjB,EAA4B,QAA5B,CAAV;AACA,WAAK,KAAL,CAAW,GAAX,IAAkB,SAAlB;AACA,aAAO,GAAP;AACD;;;wBAEG,S,EAAW,Q,EAAU;AACvB,UAAI,MAAM,KAAK,QAAL,CAAc,GAAd,CAAkB,SAAlB,EAA6B,QAA7B,CAAV;AACA,UAAI,GAAJ,EAAS;AACP,eAAO,KAAK,KAAL,CAAW,GAAX,CAAP;AACD;AACD,aAAO,GAAP;AACD;;;yCAEoB;AAAA;;AACnB,UAAI,eAAe,KAAK,KAAxB;AACA,WAAK,KAAL,GAAa,EAAb;AACA,aAAO,IAAP,CAAY,YAAZ,EAA0B,OAA1B,CAAkC,UAAC,GAAD,EAAS;AACzC,cAAK,QAAL,CAAc,GAAd,CAAkB,aAAa,GAAb,CAAlB,EAAqC,GAArC;AACD,OAFD;AAGD;;;;;;;;;;;;;;;;;;ACpHH;;;;;;;;IAEqB,G;AACnB,eAAY,KAAZ,EAAmB,IAAnB,EAAyB,YAAa,KAAtC,EAA6C;AAAA;;AAC3C,SAAK,MAAL,GAAc,KAAd;AACA,SAAK,KAAL,GAAa,IAAb;AACA,SAAK,MAAL,GAAc,KAAd;AACA,SAAK,OAAL,GAAe,sBAAf;AACD;;;;0BAEK;AACJ,aAAO,KAAK,MAAZ;AACD;;;wBAEG,K,EAAO,YAAa,K,EAAO;AAC7B,UAAI,KAAK,MAAL,KAAgB,KAApB,EAA2B;AACzB;AACA;AACD;AACD,UAAI,WAAW,KAAK,MAApB;AACA,WAAK,MAAL,GAAc,KAAd;AACA;AACA,UAAI,MAAM,EAAV;AACA,UAAI,SAAS,QAAO,KAAP,yCAAO,KAAP,OAAkB,QAA/B,EAAyC;AACvC,aAAK,IAAI,CAAT,IAAc,KAAd,EAAqB;AACnB,cAAI,MAAM,cAAN,CAAqB,CAArB,CAAJ,EACE,IAAI,CAAJ,IAAS,MAAM,CAAN,CAAT;AACH;AACF;AACD,UAAI,QAAJ,GAAe,QAAf;AACA,UAAI,KAAJ,GAAY,KAAZ;AACA,WAAK,OAAL,CAAa,OAAb,CAAqB,QAArB,EAA+B,GAA/B,EAAoC,IAApC;;AAEA;AACA;AACA,UAAI,OAAO,KAAP,IAAgB,OAAO,KAAP,CAAa,aAAjC,EAAgD;AAC9C,eAAO,KAAP,CAAa,aAAb,CACE,mBACG,KAAK,MAAL,CAAY,IAAZ,KAAqB,IAArB,GAA4B,KAAK,MAAL,CAAY,IAAZ,GAAmB,GAA/C,GAAqD,EADxD,IAEE,KAAK,KAHT,EAIE,OAAO,KAAP,KAAkB,WAAlB,GAAgC,IAAhC,GAAuC,KAJzC;AAMD;AACF;;;uBAEE,S,EAAW,Q,EAAU;AACtB,aAAO,KAAK,OAAL,CAAa,EAAb,CAAgB,SAAhB,EAA2B,QAA3B,CAAP;AACD;;;wBAEG,S,EAAW,Q,EAAU;AACvB,aAAO,KAAK,OAAL,CAAa,GAAb,CAAiB,SAAjB,EAA4B,QAA5B,CAAP;AACD;;;;;;kBAjDkB,G", - "file": "generated.js", - "sourceRoot": "", - "sourcesContent": [ - "(function(){function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require==\"function\"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error(\"Cannot find module '\"+o+\"'\");throw f.code=\"MODULE_NOT_FOUND\",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require==\"function\"&&require;for(var o=0;o {\n this._eventRelay.trigger(\"change\", e, this);\n });\n this._varOnChangeSub = sub;\n }\n }\n\n /**\n * Combine the given `extraInfo` (if any) with the handle's default\n * `_extraInfo` (if any).\n * @private\n */\n _mergeExtraInfo(extraInfo) {\n return util.extend({},\n this._extraInfo ? this._extraInfo : null,\n extraInfo ? extraInfo : null);\n }\n\n /**\n * Close the handle. This clears this handle's contribution to the filter set,\n * and unsubscribes all event listeners.\n */\n close() {\n this._emitter.removeAllListeners();\n this.clear();\n this.setGroup(null);\n }\n\n /**\n * Clear this handle's contribution to the filter set.\n *\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `FilterHandle` constructor).\n * \n * @fires FilterHandle#change\n */\n clear(extraInfo) {\n if (!this._filterSet)\n return;\n this._filterSet.clear(this._id);\n this._onChange(extraInfo);\n }\n\n /**\n * Set this handle's contribution to the filter set. This array should consist\n * of the keys of the rows that _should_ be displayed; any keys that are not\n * present in the array will be considered _filtered out_. Note that multiple\n * `FilterHandle` instances in the group may each contribute an array of keys,\n * and only those keys that appear in _all_ of the arrays make it through the\n * filter.\n *\n * @param {string[]} keys - Empty array, or array of keys. To clear the\n * filter, don't pass an empty array; instead, use the\n * {@link FilterHandle#clear} method.\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `FilterHandle` constructor).\n * \n * @fires FilterHandle#change\n */\n set(keys, extraInfo) {\n if (!this._filterSet)\n return;\n this._filterSet.update(this._id, keys);\n this._onChange(extraInfo);\n }\n\n /**\n * @return {string[]|null} - Either: 1) an array of keys that made it through\n * all of the `FilterHandle` instances, or, 2) `null`, which means no filter\n * is being applied (all data should be displayed).\n */\n get filteredKeys() {\n return this._filterSet ? this._filterSet.value : null;\n }\n\n /**\n * Subscribe to events on this `FilterHandle`.\n *\n * @param {string} eventType - Indicates the type of events to listen to.\n * Currently, only `\"change\"` is supported.\n * @param {FilterHandle~listener} listener - The callback function that\n * will be invoked when the event occurs.\n * @return {string} - A token to pass to {@link FilterHandle#off} to cancel\n * this subscription.\n */\n on(eventType, listener) {\n return this._emitter.on(eventType, listener);\n }\n\n /**\n * Cancel event subscriptions created by {@link FilterHandle#on}.\n *\n * @param {string} eventType - The type of event to unsubscribe.\n * @param {string|FilterHandle~listener} listener - Either the callback\n * function previously passed into {@link FilterHandle#on}, or the\n * string that was returned from {@link FilterHandle#on}.\n */\n off(eventType, listener) {\n return this._emitter.off(eventType, listener);\n }\n\n _onChange(extraInfo) {\n if (!this._filterSet)\n return;\n this._filterVar.set(this._filterSet.value, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * @callback FilterHandle~listener\n * @param {Object} event - An object containing details of the event. For\n * `\"change\"` events, this includes the properties `value` (the new\n * value of the filter set, or `null` if no filter set is active),\n * `oldValue` (the previous value of the filter set), and `sender` (the\n * `FilterHandle` instance that made the change).\n */\n\n}\n\n/**\n * @event FilterHandle#change\n * @type {object}\n * @property {object} value - The new value of the filter set, or `null`\n * if no filter set is active.\n * @property {object} oldValue - The previous value of the filter set.\n * @property {FilterHandle} sender - The `FilterHandle` instance that\n * changed the value.\n */\n", - "import { diffSortedLists } from \"./util\";\n\nfunction naturalComparator(a, b) {\n if (a === b) {\n return 0;\n } else if (a < b) {\n return -1;\n } else if (a > b) {\n return 1;\n }\n}\n\n/**\n * @private\n */\nexport default class FilterSet {\n constructor() {\n this.reset();\n }\n\n reset() {\n // Key: handle ID, Value: array of selected keys, or null\n this._handles = {};\n // Key: key string, Value: count of handles that include it\n this._keys = {};\n this._value = null;\n this._activeHandles = 0;\n }\n\n get value() {\n return this._value;\n }\n\n update(handleId, keys) {\n if (keys !== null) {\n keys = keys.slice(0); // clone before sorting\n keys.sort(naturalComparator);\n }\n\n let {added, removed} = diffSortedLists(this._handles[handleId], keys);\n this._handles[handleId] = keys;\n\n for (let i = 0; i < added.length; i++) {\n this._keys[added[i]] = (this._keys[added[i]] || 0) + 1;\n }\n for (let i = 0; i < removed.length; i++) {\n this._keys[removed[i]]--;\n }\n\n this._updateValue(keys);\n }\n\n /**\n * @param {string[]} keys Sorted array of strings that indicate\n * a superset of possible keys.\n * @private\n */\n _updateValue(keys = this._allKeys) {\n let handleCount = Object.keys(this._handles).length;\n if (handleCount === 0) {\n this._value = null;\n } else {\n this._value = [];\n for (let i = 0; i < keys.length; i++) {\n let count = this._keys[keys[i]];\n if (count === handleCount) {\n this._value.push(keys[i]);\n }\n }\n }\n }\n\n clear(handleId) {\n if (typeof(this._handles[handleId]) === \"undefined\") {\n return;\n }\n\n let keys = this._handles[handleId];\n if (!keys) {\n keys = [];\n }\n\n for (let i = 0; i < keys.length; i++) {\n this._keys[keys[i]]--;\n }\n delete this._handles[handleId];\n\n this._updateValue();\n }\n\n get _allKeys() {\n let allKeys = Object.keys(this._keys);\n allKeys.sort(naturalComparator);\n return allKeys;\n }\n}\n", - "import Var from \"./var\";\n\n// Use a global so that multiple copies of crosstalk.js can be loaded and still\n// have groups behave as singletons across all copies.\nglobal.__crosstalk_groups = global.__crosstalk_groups || {};\nlet groups = global.__crosstalk_groups;\n\nexport default function group(groupName) {\n if (groupName && typeof(groupName) === \"string\") {\n if (!groups.hasOwnProperty(groupName)) {\n groups[groupName] = new Group(groupName);\n }\n return groups[groupName];\n } else if (typeof(groupName) === \"object\" && groupName._vars && groupName.var) {\n // Appears to already be a group object\n return groupName;\n } else if (Array.isArray(groupName) &&\n groupName.length == 1 &&\n typeof(groupName[0]) === \"string\") {\n return group(groupName[0]);\n } else {\n throw new Error(\"Invalid groupName argument\");\n }\n}\n\nclass Group {\n constructor(name) {\n this.name = name;\n this._vars = {};\n }\n\n var(name) {\n if (!name || typeof(name) !== \"string\") {\n throw new Error(\"Invalid var name\");\n }\n\n if (!this._vars.hasOwnProperty(name))\n this._vars[name] = new Var(this, name);\n return this._vars[name];\n }\n\n has(name) {\n if (!name || typeof(name) !== \"string\") {\n throw new Error(\"Invalid var name\");\n }\n\n return this._vars.hasOwnProperty(name);\n }\n}\n", - "import group from \"./group\";\nimport { SelectionHandle } from \"./selection\";\nimport { FilterHandle } from \"./filter\";\nimport { bind } from \"./input\";\nimport \"./input_selectize\";\nimport \"./input_checkboxgroup\";\nimport \"./input_slider\";\n\nconst defaultGroup = group(\"default\");\n\nfunction var_(name) {\n return defaultGroup.var(name);\n}\n\nfunction has(name) {\n return defaultGroup.has(name);\n}\n\nif (global.Shiny) {\n global.Shiny.addCustomMessageHandler(\"update-client-value\", function(message) {\n if (typeof(message.group) === \"string\") {\n group(message.group).var(message.name).set(message.value);\n } else {\n var_(message.name).set(message.value);\n }\n });\n}\n\nconst crosstalk = {\n group: group,\n var: var_,\n has: has,\n SelectionHandle: SelectionHandle,\n FilterHandle: FilterHandle,\n bind: bind\n};\n\n/**\n * @namespace crosstalk\n */\nexport default crosstalk;\nglobal.crosstalk = crosstalk;\n", - "let $ = global.jQuery;\n\nlet bindings = {};\n\nexport function register(reg) {\n bindings[reg.className] = reg;\n if (global.document && global.document.readyState !== \"complete\") {\n $(() => {\n bind();\n });\n } else if (global.document) {\n setTimeout(bind, 100);\n }\n}\n\nexport function bind() {\n Object.keys(bindings).forEach(function(className) {\n let binding = bindings[className];\n $(\".\" + binding.className).not(\".crosstalk-input-bound\").each(function(i, el) {\n bindInstance(binding, el);\n });\n });\n}\n\n// Escape jQuery identifier\nfunction $escape(val) {\n return val.replace(/([!\"#$%&'()*+,./:;<=>?@[\\\\\\]^`{|}~])/g, \"\\\\$1\");\n}\n\nfunction bindEl(el) {\n let $el = $(el);\n Object.keys(bindings).forEach(function(className) {\n if ($el.hasClass(className) && !$el.hasClass(\"crosstalk-input-bound\")) {\n let binding = bindings[className];\n bindInstance(binding, el);\n }\n });\n}\n\nfunction bindInstance(binding, el) {\n let jsonEl = $(el).find(\"script[type='application/json'][data-for='\" + $escape(el.id) + \"']\");\n let data = JSON.parse(jsonEl[0].innerText);\n\n let instance = binding.factory(el, data);\n $(el).data(\"crosstalk-instance\", instance);\n $(el).addClass(\"crosstalk-input-bound\");\n}\n\nif (global.Shiny) {\n let inputBinding = new global.Shiny.InputBinding();\n let $ = global.jQuery;\n $.extend(inputBinding, {\n find: function(scope) {\n return $(scope).find(\".crosstalk-input\");\n },\n initialize: function(el) {\n if (!$(el).hasClass(\"crosstalk-input-bound\")) {\n bindEl(el);\n }\n },\n getId: function(el) {\n return el.id;\n },\n getValue: function(el) {\n\n },\n setValue: function(el, value) {\n\n },\n receiveMessage: function(el, data) {\n\n },\n subscribe: function(el, callback) {\n $(el).data(\"crosstalk-instance\").resume();\n },\n unsubscribe: function(el) {\n $(el).data(\"crosstalk-instance\").suspend();\n }\n });\n global.Shiny.inputBindings.register(inputBinding, \"crosstalk.inputBinding\");\n}\n", - "import * as input from \"./input\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\n\ninput.register({\n className: \"crosstalk-input-checkboxgroup\",\n\n factory: function(el, data) {\n /*\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n let ctHandle = new FilterHandle(data.group);\n\n let lastKnownKeys;\n let $el = $(el);\n $el.on(\"change\", \"input[type='checkbox']\", function() {\n let checked = $el.find(\"input[type='checkbox']:checked\");\n if (checked.length === 0) {\n lastKnownKeys = null;\n ctHandle.clear();\n } else {\n let keys = {};\n checked.each(function() {\n data.map[this.value].forEach(function(key) {\n keys[key] = true;\n });\n });\n let keyArray = Object.keys(keys);\n keyArray.sort();\n lastKnownKeys = keyArray;\n ctHandle.set(keyArray);\n }\n });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n", - "import * as input from \"./input\";\nimport * as util from \"./util\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\n\ninput.register({\n className: \"crosstalk-input-select\",\n\n factory: function(el, data) {\n /*\n * items: {value: [...], label: [...]}\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n\n let first = [{value: \"\", label: \"(All)\"}];\n let items = util.dataframeToD3(data.items);\n let opts = {\n options: first.concat(items),\n valueField: \"value\",\n labelField: \"label\",\n searchField: \"label\"\n };\n\n let select = $(el).find(\"select\")[0];\n\n let selectize = $(select).selectize(opts)[0].selectize;\n\n let ctHandle = new FilterHandle(data.group);\n\n let lastKnownKeys;\n selectize.on(\"change\", function() {\n if (selectize.items.length === 0) {\n lastKnownKeys = null;\n ctHandle.clear();\n } else {\n let keys = {};\n selectize.items.forEach(function(group) {\n data.map[group].forEach(function(key) {\n keys[key] = true;\n });\n });\n let keyArray = Object.keys(keys);\n keyArray.sort();\n lastKnownKeys = keyArray;\n ctHandle.set(keyArray);\n }\n });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n", - "import * as input from \"./input\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\nlet strftime = global.strftime;\n\ninput.register({\n className: \"crosstalk-input-slider\",\n\n factory: function(el, data) {\n /*\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n let ctHandle = new FilterHandle(data.group);\n\n let opts = {};\n let $el = $(el).find(\"input\");\n let dataType = $el.data(\"data-type\");\n let timeFormat = $el.data(\"time-format\");\n let round = $el.data(\"round\");\n let timeFormatter;\n\n // Set up formatting functions\n if (dataType === \"date\") {\n timeFormatter = strftime.utc();\n opts.prettify = function(num) {\n return timeFormatter(timeFormat, new Date(num));\n };\n\n } else if (dataType === \"datetime\") {\n let timezone = $el.data(\"timezone\");\n if (timezone)\n timeFormatter = strftime.timezone(timezone);\n else\n timeFormatter = strftime;\n\n opts.prettify = function(num) {\n return timeFormatter(timeFormat, new Date(num));\n };\n } else if (dataType === \"number\") {\n if (typeof round !== \"undefined\")\n opts.prettify = function(num) {\n let factor = Math.pow(10, round);\n return Math.round(num * factor) / factor;\n };\n }\n\n $el.ionRangeSlider(opts);\n\n function getValue() {\n let result = $el.data(\"ionRangeSlider\").result;\n\n // Function for converting numeric value from slider to appropriate type.\n let convert;\n let dataType = $el.data(\"data-type\");\n if (dataType === \"date\") {\n convert = function(val) {\n return formatDateUTC(new Date(+val));\n };\n } else if (dataType === \"datetime\") {\n convert = function(val) {\n // Convert ms to s\n return +val / 1000;\n };\n } else {\n convert = function(val) { return +val; };\n }\n\n if ($el.data(\"ionRangeSlider\").options.type === \"double\") {\n return [convert(result.from), convert(result.to)];\n } else {\n return convert(result.from);\n }\n }\n\n let lastKnownKeys = null;\n\n $el.on(\"change.crosstalkSliderInput\", function(event) {\n if (!$el.data(\"updating\") && !$el.data(\"animating\")) {\n let [from, to] = getValue();\n let keys = [];\n for (let i = 0; i < data.values.length; i++) {\n let val = data.values[i];\n if (val >= from && val <= to) {\n keys.push(data.keys[i]);\n }\n }\n keys.sort();\n ctHandle.set(keys);\n lastKnownKeys = keys;\n }\n });\n\n\n // let $el = $(el);\n // $el.on(\"change\", \"input[type=\"checkbox\"]\", function() {\n // let checked = $el.find(\"input[type=\"checkbox\"]:checked\");\n // if (checked.length === 0) {\n // ctHandle.clear();\n // } else {\n // let keys = {};\n // checked.each(function() {\n // data.map[this.value].forEach(function(key) {\n // keys[key] = true;\n // });\n // });\n // let keyArray = Object.keys(keys);\n // keyArray.sort();\n // ctHandle.set(keyArray);\n // }\n // });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n\n\n// Convert a number to a string with leading zeros\nfunction padZeros(n, digits) {\n let str = n.toString();\n while (str.length < digits)\n str = \"0\" + str;\n return str;\n}\n\n// Given a Date object, return a string in yyyy-mm-dd format, using the\n// UTC date. This may be a day off from the date in the local time zone.\nfunction formatDateUTC(date) {\n if (date instanceof Date) {\n return date.getUTCFullYear() + \"-\" +\n padZeros(date.getUTCMonth()+1, 2) + \"-\" +\n padZeros(date.getUTCDate(), 2);\n\n } else {\n return null;\n }\n}\n", - "import Events from \"./events\";\nimport grp from \"./group\";\nimport * as util from \"./util\";\n\n/**\n * Use this class to read and write (and listen for changes to) the selection\n * for a Crosstalk group. This is intended to be used for linked brushing.\n *\n * If two (or more) `SelectionHandle` instances in the same webpage share the\n * same group name, they will share the same state. Setting the selection using\n * one `SelectionHandle` instance will result in the `value` property instantly\n * changing across the others, and `\"change\"` event listeners on all instances\n * (including the one that initiated the sending) will fire.\n *\n * @param {string} [group] - The name of the Crosstalk group, or if none,\n * null or undefined (or any other falsy value). This can be changed later\n * via the [SelectionHandle#setGroup](#setGroup) method.\n * @param {Object} [extraInfo] - An object whose properties will be copied to\n * the event object whenever an event is emitted.\n */\nexport class SelectionHandle {\n\n constructor(group = null, extraInfo = null) {\n this._eventRelay = new Events();\n this._emitter = new util.SubscriptionTracker(this._eventRelay);\n\n // Name of the group we're currently tracking, if any. Can change over time.\n this._group = null;\n // The Var we're currently tracking, if any. Can change over time.\n this._var = null;\n // The event handler subscription we currently have on var.on(\"change\").\n this._varOnChangeSub = null;\n\n this._extraInfo = util.extend({ sender: this }, extraInfo);\n\n this.setGroup(group);\n }\n\n /**\n * Changes the Crosstalk group membership of this SelectionHandle. The group\n * being switched away from (if any) will not have its selection value\n * modified as a result of calling `setGroup`, even if this handle was the\n * most recent handle to set the selection of the group.\n *\n * The group being switched to (if any) will also not have its selection value\n * modified as a result of calling `setGroup`. If you want to set the\n * selection value of the new group, call `set` explicitly.\n *\n * @param {string} group - The name of the Crosstalk group, or null (or\n * undefined) to clear the group.\n */\n setGroup(group) {\n // If group is unchanged, do nothing\n if (this._group === group)\n return;\n // Treat null, undefined, and other falsy values the same\n if (!this._group && !group)\n return;\n\n if (this._var) {\n this._var.off(\"change\", this._varOnChangeSub);\n this._var = null;\n this._varOnChangeSub = null;\n }\n\n this._group = group;\n\n if (group) {\n this._var = grp(group).var(\"selection\");\n let sub = this._var.on(\"change\", (e) => {\n this._eventRelay.trigger(\"change\", e, this);\n });\n this._varOnChangeSub = sub;\n }\n }\n\n /**\n * Retrieves the current selection for the group represented by this\n * `SelectionHandle`.\n *\n * - If no selection is active, then this value will be falsy.\n * - If a selection is active, but no data points are selected, then this\n * value will be an empty array.\n * - If a selection is active, and data points are selected, then the keys\n * of the selected data points will be present in the array.\n */\n get value() {\n return this._var ? this._var.get() : null;\n }\n\n /**\n * Combines the given `extraInfo` (if any) with the handle's default\n * `_extraInfo` (if any).\n * @private\n */\n _mergeExtraInfo(extraInfo) {\n // Important incidental effect: shallow clone is returned\n return util.extend({},\n this._extraInfo ? this._extraInfo : null,\n extraInfo ? extraInfo : null);\n }\n\n /**\n * Overwrites the current selection for the group, and raises the `\"change\"`\n * event among all of the group's '`SelectionHandle` instances (including\n * this one).\n *\n * @fires SelectionHandle#change\n * @param {string[]} selectedKeys - Falsy, empty array, or array of keys (see\n * {@link SelectionHandle#value}).\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `SelectionHandle` constructor).\n */\n set(selectedKeys, extraInfo) {\n if (this._var)\n this._var.set(selectedKeys, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * Overwrites the current selection for the group, and raises the `\"change\"`\n * event among all of the group's '`SelectionHandle` instances (including\n * this one).\n *\n * @fires SelectionHandle#change\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any that were passed\n * into the `SelectionHandle` constructor).\n */\n clear(extraInfo) {\n if (this._var)\n this.set(void 0, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * Subscribes to events on this `SelectionHandle`.\n *\n * @param {string} eventType - Indicates the type of events to listen to.\n * Currently, only `\"change\"` is supported.\n * @param {SelectionHandle~listener} listener - The callback function that\n * will be invoked when the event occurs.\n * @return {string} - A token to pass to {@link SelectionHandle#off} to cancel\n * this subscription.\n */\n on(eventType, listener) {\n return this._emitter.on(eventType, listener);\n }\n\n /**\n * Cancels event subscriptions created by {@link SelectionHandle#on}.\n *\n * @param {string} eventType - The type of event to unsubscribe.\n * @param {string|SelectionHandle~listener} listener - Either the callback\n * function previously passed into {@link SelectionHandle#on}, or the\n * string that was returned from {@link SelectionHandle#on}.\n */\n off(eventType, listener) {\n return this._emitter.off(eventType, listener);\n }\n\n /**\n * Shuts down the `SelectionHandle` object.\n *\n * Removes all event listeners that were added through this handle.\n */\n close() {\n this._emitter.removeAllListeners();\n this.setGroup(null);\n }\n}\n\n/**\n * @callback SelectionHandle~listener\n * @param {Object} event - An object containing details of the event. For\n * `\"change\"` events, this includes the properties `value` (the new\n * value of the selection, or `undefined` if no selection is active),\n * `oldValue` (the previous value of the selection), and `sender` (the\n * `SelectionHandle` instance that made the change).\n */\n\n/**\n * @event SelectionHandle#change\n * @type {object}\n * @property {object} value - The new value of the selection, or `undefined`\n * if no selection is active.\n * @property {object} oldValue - The previous value of the selection.\n * @property {SelectionHandle} sender - The `SelectionHandle` instance that\n * changed the value.\n */\n", - "export function extend(target, ...sources) {\n for (let i = 0; i < sources.length; i++) {\n let src = sources[i];\n if (typeof(src) === \"undefined\" || src === null)\n continue;\n\n for (let key in src) {\n if (src.hasOwnProperty(key)) {\n target[key] = src[key];\n }\n }\n }\n return target;\n}\n\nexport function checkSorted(list) {\n for (let i = 1; i < list.length; i++) {\n if (list[i] <= list[i-1]) {\n throw new Error(\"List is not sorted or contains duplicate\");\n }\n }\n}\n\nexport function diffSortedLists(a, b) {\n let i_a = 0;\n let i_b = 0;\n\n if (!a) a = [];\n if (!b) b = [];\n\n let a_only = [];\n let b_only = [];\n\n checkSorted(a);\n checkSorted(b);\n\n while (i_a < a.length && i_b < b.length) {\n if (a[i_a] === b[i_b]) {\n i_a++;\n i_b++;\n } else if (a[i_a] < b[i_b]) {\n a_only.push(a[i_a++]);\n } else {\n b_only.push(b[i_b++]);\n }\n }\n\n if (i_a < a.length)\n a_only = a_only.concat(a.slice(i_a));\n if (i_b < b.length)\n b_only = b_only.concat(b.slice(i_b));\n return {\n removed: a_only,\n added: b_only\n };\n}\n\n// Convert from wide: { colA: [1,2,3], colB: [4,5,6], ... }\n// to long: [ {colA: 1, colB: 4}, {colA: 2, colB: 5}, ... ]\nexport function dataframeToD3(df) {\n let names = [];\n let length;\n for (let name in df) {\n if (df.hasOwnProperty(name))\n names.push(name);\n if (typeof(df[name]) !== \"object\" || typeof(df[name].length) === \"undefined\") {\n throw new Error(\"All fields must be arrays\");\n } else if (typeof(length) !== \"undefined\" && length !== df[name].length) {\n throw new Error(\"All fields must be arrays of the same length\");\n }\n length = df[name].length;\n }\n let results = [];\n let item;\n for (let row = 0; row < length; row++) {\n item = {};\n for (let col = 0; col < names.length; col++) {\n item[names[col]] = df[names[col]][row];\n }\n results.push(item);\n }\n return results;\n}\n\n/**\n * Keeps track of all event listener additions/removals and lets all active\n * listeners be removed with a single operation.\n *\n * @private\n */\nexport class SubscriptionTracker {\n constructor(emitter) {\n this._emitter = emitter;\n this._subs = {};\n }\n\n on(eventType, listener) {\n let sub = this._emitter.on(eventType, listener);\n this._subs[sub] = eventType;\n return sub;\n }\n\n off(eventType, listener) {\n let sub = this._emitter.off(eventType, listener);\n if (sub) {\n delete this._subs[sub];\n }\n return sub;\n }\n\n removeAllListeners() {\n let current_subs = this._subs;\n this._subs = {};\n Object.keys(current_subs).forEach((sub) => {\n this._emitter.off(current_subs[sub], sub);\n });\n }\n}\n", - "import Events from \"./events\";\n\nexport default class Var {\n constructor(group, name, /*optional*/ value) {\n this._group = group;\n this._name = name;\n this._value = value;\n this._events = new Events();\n }\n\n get() {\n return this._value;\n }\n\n set(value, /*optional*/ event) {\n if (this._value === value) {\n // Do nothing; the value hasn't changed\n return;\n }\n let oldValue = this._value;\n this._value = value;\n // Alert JavaScript listeners that the value has changed\n let evt = {};\n if (event && typeof(event) === \"object\") {\n for (let k in event) {\n if (event.hasOwnProperty(k))\n evt[k] = event[k];\n }\n }\n evt.oldValue = oldValue;\n evt.value = value;\n this._events.trigger(\"change\", evt, this);\n\n // TODO: Make this extensible, to let arbitrary back-ends know that\n // something has changed\n if (global.Shiny && global.Shiny.onInputChange) {\n global.Shiny.onInputChange(\n \".clientValue-\" +\n (this._group.name !== null ? this._group.name + \"-\" : \"\") +\n this._name,\n typeof(value) === \"undefined\" ? null : value\n );\n }\n }\n\n on(eventType, listener) {\n return this._events.on(eventType, listener);\n }\n\n off(eventType, listener) {\n return this._events.off(eventType, listener);\n }\n}\n" - ] -} \ No newline at end of file diff --git a/content/find/recipes/index_files/crosstalk/js/crosstalk.min.js b/content/find/recipes/index_files/crosstalk/js/crosstalk.min.js deleted file mode 100644 index b7ec0ac9..00000000 --- a/content/find/recipes/index_files/crosstalk/js/crosstalk.min.js +++ /dev/null @@ -1,2 +0,0 @@ -!function o(u,a,l){function s(n,e){if(!a[n]){if(!u[n]){var t="function"==typeof require&&require;if(!e&&t)return t(n,!0);if(f)return f(n,!0);var r=new Error("Cannot find module '"+n+"'");throw r.code="MODULE_NOT_FOUND",r}var i=a[n]={exports:{}};u[n][0].call(i.exports,function(e){var t=u[n][1][e];return s(t||e)},i,i.exports,o,u,a,l)}return a[n].exports}for(var f="function"==typeof require&&require,e=0;e?@[\\\]^`{|}~])/g,"\\$1")+"']"),r=JSON.parse(n[0].innerText),i=e.factory(t,r);o(t).data("crosstalk-instance",i),o(t).addClass("crosstalk-input-bound")}if(t.Shiny){var e=new t.Shiny.InputBinding,u=t.jQuery;u.extend(e,{find:function(e){return u(e).find(".crosstalk-input")},initialize:function(e){var t,n;u(e).hasClass("crosstalk-input-bound")||(n=o(t=e),Object.keys(r).forEach(function(e){n.hasClass(e)&&!n.hasClass("crosstalk-input-bound")&&i(r[e],t)}))},getId:function(e){return e.id},getValue:function(e){},setValue:function(e,t){},receiveMessage:function(e,t){},subscribe:function(e,t){u(e).data("crosstalk-instance").resume()},unsubscribe:function(e){u(e).data("crosstalk-instance").suspend()}}),t.Shiny.inputBindings.register(e,"crosstalk.inputBinding")}}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{}],7:[function(r,e,t){(function(e){"use strict";var t=function(e){{if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}}(r("./input")),n=r("./filter");var a=e.jQuery;t.register({className:"crosstalk-input-checkboxgroup",factory:function(e,r){var i=new n.FilterHandle(r.group),o=void 0,u=a(e);return u.on("change","input[type='checkbox']",function(){var e=u.find("input[type='checkbox']:checked");if(0===e.length)o=null,i.clear();else{var t={};e.each(function(){r.map[this.value].forEach(function(e){t[e]=!0})});var n=Object.keys(t);n.sort(),o=n,i.set(n)}}),{suspend:function(){i.clear()},resume:function(){o&&i.set(o)}}}})}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./filter":2,"./input":6}],8:[function(r,e,t){(function(e){"use strict";var t=n(r("./input")),l=n(r("./util")),s=r("./filter");function n(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}var f=e.jQuery;t.register({className:"crosstalk-input-select",factory:function(e,n){var t=l.dataframeToD3(n.items),r={options:[{value:"",label:"(All)"}].concat(t),valueField:"value",labelField:"label",searchField:"label"},i=f(e).find("select")[0],o=f(i).selectize(r)[0].selectize,u=new s.FilterHandle(n.group),a=void 0;return o.on("change",function(){if(0===o.items.length)a=null,u.clear();else{var t={};o.items.forEach(function(e){n.map[e].forEach(function(e){t[e]=!0})});var e=Object.keys(t);e.sort(),a=e,u.set(e)}}),{suspend:function(){u.clear()},resume:function(){a&&u.set(a)}}}})}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./filter":2,"./input":6,"./util":11}],9:[function(n,e,t){(function(e){"use strict";var d=function(e,t){if(Array.isArray(e))return e;if(Symbol.iterator in Object(e))return function(e,t){var n=[],r=!0,i=!1,o=void 0;try{for(var u,a=e[Symbol.iterator]();!(r=(u=a.next()).done)&&(n.push(u.value),!t||n.length!==t);r=!0);}catch(e){i=!0,o=e}finally{try{!r&&a.return&&a.return()}finally{if(i)throw o}}return n}(e,t);throw new TypeError("Invalid attempt to destructure non-iterable instance")},t=function(e){{if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}}(n("./input")),a=n("./filter");var v=e.jQuery,p=e.strftime;function y(e,t){for(var n=e.toString();n.length {\n this._eventRelay.trigger(\"change\", e, this);\n });\n this._varOnChangeSub = sub;\n }\n }\n\n /**\n * Combine the given `extraInfo` (if any) with the handle's default\n * `_extraInfo` (if any).\n * @private\n */\n _mergeExtraInfo(extraInfo) {\n return util.extend({},\n this._extraInfo ? this._extraInfo : null,\n extraInfo ? extraInfo : null);\n }\n\n /**\n * Close the handle. This clears this handle's contribution to the filter set,\n * and unsubscribes all event listeners.\n */\n close() {\n this._emitter.removeAllListeners();\n this.clear();\n this.setGroup(null);\n }\n\n /**\n * Clear this handle's contribution to the filter set.\n *\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `FilterHandle` constructor).\n * \n * @fires FilterHandle#change\n */\n clear(extraInfo) {\n if (!this._filterSet)\n return;\n this._filterSet.clear(this._id);\n this._onChange(extraInfo);\n }\n\n /**\n * Set this handle's contribution to the filter set. This array should consist\n * of the keys of the rows that _should_ be displayed; any keys that are not\n * present in the array will be considered _filtered out_. Note that multiple\n * `FilterHandle` instances in the group may each contribute an array of keys,\n * and only those keys that appear in _all_ of the arrays make it through the\n * filter.\n *\n * @param {string[]} keys - Empty array, or array of keys. To clear the\n * filter, don't pass an empty array; instead, use the\n * {@link FilterHandle#clear} method.\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `FilterHandle` constructor).\n * \n * @fires FilterHandle#change\n */\n set(keys, extraInfo) {\n if (!this._filterSet)\n return;\n this._filterSet.update(this._id, keys);\n this._onChange(extraInfo);\n }\n\n /**\n * @return {string[]|null} - Either: 1) an array of keys that made it through\n * all of the `FilterHandle` instances, or, 2) `null`, which means no filter\n * is being applied (all data should be displayed).\n */\n get filteredKeys() {\n return this._filterSet ? this._filterSet.value : null;\n }\n\n /**\n * Subscribe to events on this `FilterHandle`.\n *\n * @param {string} eventType - Indicates the type of events to listen to.\n * Currently, only `\"change\"` is supported.\n * @param {FilterHandle~listener} listener - The callback function that\n * will be invoked when the event occurs.\n * @return {string} - A token to pass to {@link FilterHandle#off} to cancel\n * this subscription.\n */\n on(eventType, listener) {\n return this._emitter.on(eventType, listener);\n }\n\n /**\n * Cancel event subscriptions created by {@link FilterHandle#on}.\n *\n * @param {string} eventType - The type of event to unsubscribe.\n * @param {string|FilterHandle~listener} listener - Either the callback\n * function previously passed into {@link FilterHandle#on}, or the\n * string that was returned from {@link FilterHandle#on}.\n */\n off(eventType, listener) {\n return this._emitter.off(eventType, listener);\n }\n\n _onChange(extraInfo) {\n if (!this._filterSet)\n return;\n this._filterVar.set(this._filterSet.value, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * @callback FilterHandle~listener\n * @param {Object} event - An object containing details of the event. For\n * `\"change\"` events, this includes the properties `value` (the new\n * value of the filter set, or `null` if no filter set is active),\n * `oldValue` (the previous value of the filter set), and `sender` (the\n * `FilterHandle` instance that made the change).\n */\n\n}\n\n/**\n * @event FilterHandle#change\n * @type {object}\n * @property {object} value - The new value of the filter set, or `null`\n * if no filter set is active.\n * @property {object} oldValue - The previous value of the filter set.\n * @property {FilterHandle} sender - The `FilterHandle` instance that\n * changed the value.\n */\n","import { diffSortedLists } from \"./util\";\n\nfunction naturalComparator(a, b) {\n if (a === b) {\n return 0;\n } else if (a < b) {\n return -1;\n } else if (a > b) {\n return 1;\n }\n}\n\n/**\n * @private\n */\nexport default class FilterSet {\n constructor() {\n this.reset();\n }\n\n reset() {\n // Key: handle ID, Value: array of selected keys, or null\n this._handles = {};\n // Key: key string, Value: count of handles that include it\n this._keys = {};\n this._value = null;\n this._activeHandles = 0;\n }\n\n get value() {\n return this._value;\n }\n\n update(handleId, keys) {\n if (keys !== null) {\n keys = keys.slice(0); // clone before sorting\n keys.sort(naturalComparator);\n }\n\n let {added, removed} = diffSortedLists(this._handles[handleId], keys);\n this._handles[handleId] = keys;\n\n for (let i = 0; i < added.length; i++) {\n this._keys[added[i]] = (this._keys[added[i]] || 0) + 1;\n }\n for (let i = 0; i < removed.length; i++) {\n this._keys[removed[i]]--;\n }\n\n this._updateValue(keys);\n }\n\n /**\n * @param {string[]} keys Sorted array of strings that indicate\n * a superset of possible keys.\n * @private\n */\n _updateValue(keys = this._allKeys) {\n let handleCount = Object.keys(this._handles).length;\n if (handleCount === 0) {\n this._value = null;\n } else {\n this._value = [];\n for (let i = 0; i < keys.length; i++) {\n let count = this._keys[keys[i]];\n if (count === handleCount) {\n this._value.push(keys[i]);\n }\n }\n }\n }\n\n clear(handleId) {\n if (typeof(this._handles[handleId]) === \"undefined\") {\n return;\n }\n\n let keys = this._handles[handleId];\n if (!keys) {\n keys = [];\n }\n\n for (let i = 0; i < keys.length; i++) {\n this._keys[keys[i]]--;\n }\n delete this._handles[handleId];\n\n this._updateValue();\n }\n\n get _allKeys() {\n let allKeys = Object.keys(this._keys);\n allKeys.sort(naturalComparator);\n return allKeys;\n }\n}\n","import Var from \"./var\";\n\n// Use a global so that multiple copies of crosstalk.js can be loaded and still\n// have groups behave as singletons across all copies.\nglobal.__crosstalk_groups = global.__crosstalk_groups || {};\nlet groups = global.__crosstalk_groups;\n\nexport default function group(groupName) {\n if (groupName && typeof(groupName) === \"string\") {\n if (!groups.hasOwnProperty(groupName)) {\n groups[groupName] = new Group(groupName);\n }\n return groups[groupName];\n } else if (typeof(groupName) === \"object\" && groupName._vars && groupName.var) {\n // Appears to already be a group object\n return groupName;\n } else if (Array.isArray(groupName) &&\n groupName.length == 1 &&\n typeof(groupName[0]) === \"string\") {\n return group(groupName[0]);\n } else {\n throw new Error(\"Invalid groupName argument\");\n }\n}\n\nclass Group {\n constructor(name) {\n this.name = name;\n this._vars = {};\n }\n\n var(name) {\n if (!name || typeof(name) !== \"string\") {\n throw new Error(\"Invalid var name\");\n }\n\n if (!this._vars.hasOwnProperty(name))\n this._vars[name] = new Var(this, name);\n return this._vars[name];\n }\n\n has(name) {\n if (!name || typeof(name) !== \"string\") {\n throw new Error(\"Invalid var name\");\n }\n\n return this._vars.hasOwnProperty(name);\n }\n}\n","import group from \"./group\";\nimport { SelectionHandle } from \"./selection\";\nimport { FilterHandle } from \"./filter\";\nimport { bind } from \"./input\";\nimport \"./input_selectize\";\nimport \"./input_checkboxgroup\";\nimport \"./input_slider\";\n\nconst defaultGroup = group(\"default\");\n\nfunction var_(name) {\n return defaultGroup.var(name);\n}\n\nfunction has(name) {\n return defaultGroup.has(name);\n}\n\nif (global.Shiny) {\n global.Shiny.addCustomMessageHandler(\"update-client-value\", function(message) {\n if (typeof(message.group) === \"string\") {\n group(message.group).var(message.name).set(message.value);\n } else {\n var_(message.name).set(message.value);\n }\n });\n}\n\nconst crosstalk = {\n group: group,\n var: var_,\n has: has,\n SelectionHandle: SelectionHandle,\n FilterHandle: FilterHandle,\n bind: bind\n};\n\n/**\n * @namespace crosstalk\n */\nexport default crosstalk;\nglobal.crosstalk = crosstalk;\n","let $ = global.jQuery;\n\nlet bindings = {};\n\nexport function register(reg) {\n bindings[reg.className] = reg;\n if (global.document && global.document.readyState !== \"complete\") {\n $(() => {\n bind();\n });\n } else if (global.document) {\n setTimeout(bind, 100);\n }\n}\n\nexport function bind() {\n Object.keys(bindings).forEach(function(className) {\n let binding = bindings[className];\n $(\".\" + binding.className).not(\".crosstalk-input-bound\").each(function(i, el) {\n bindInstance(binding, el);\n });\n });\n}\n\n// Escape jQuery identifier\nfunction $escape(val) {\n return val.replace(/([!\"#$%&'()*+,./:;<=>?@[\\\\\\]^`{|}~])/g, \"\\\\$1\");\n}\n\nfunction bindEl(el) {\n let $el = $(el);\n Object.keys(bindings).forEach(function(className) {\n if ($el.hasClass(className) && !$el.hasClass(\"crosstalk-input-bound\")) {\n let binding = bindings[className];\n bindInstance(binding, el);\n }\n });\n}\n\nfunction bindInstance(binding, el) {\n let jsonEl = $(el).find(\"script[type='application/json'][data-for='\" + $escape(el.id) + \"']\");\n let data = JSON.parse(jsonEl[0].innerText);\n\n let instance = binding.factory(el, data);\n $(el).data(\"crosstalk-instance\", instance);\n $(el).addClass(\"crosstalk-input-bound\");\n}\n\nif (global.Shiny) {\n let inputBinding = new global.Shiny.InputBinding();\n let $ = global.jQuery;\n $.extend(inputBinding, {\n find: function(scope) {\n return $(scope).find(\".crosstalk-input\");\n },\n initialize: function(el) {\n if (!$(el).hasClass(\"crosstalk-input-bound\")) {\n bindEl(el);\n }\n },\n getId: function(el) {\n return el.id;\n },\n getValue: function(el) {\n\n },\n setValue: function(el, value) {\n\n },\n receiveMessage: function(el, data) {\n\n },\n subscribe: function(el, callback) {\n $(el).data(\"crosstalk-instance\").resume();\n },\n unsubscribe: function(el) {\n $(el).data(\"crosstalk-instance\").suspend();\n }\n });\n global.Shiny.inputBindings.register(inputBinding, \"crosstalk.inputBinding\");\n}\n","import * as input from \"./input\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\n\ninput.register({\n className: \"crosstalk-input-checkboxgroup\",\n\n factory: function(el, data) {\n /*\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n let ctHandle = new FilterHandle(data.group);\n\n let lastKnownKeys;\n let $el = $(el);\n $el.on(\"change\", \"input[type='checkbox']\", function() {\n let checked = $el.find(\"input[type='checkbox']:checked\");\n if (checked.length === 0) {\n lastKnownKeys = null;\n ctHandle.clear();\n } else {\n let keys = {};\n checked.each(function() {\n data.map[this.value].forEach(function(key) {\n keys[key] = true;\n });\n });\n let keyArray = Object.keys(keys);\n keyArray.sort();\n lastKnownKeys = keyArray;\n ctHandle.set(keyArray);\n }\n });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n","import * as input from \"./input\";\nimport * as util from \"./util\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\n\ninput.register({\n className: \"crosstalk-input-select\",\n\n factory: function(el, data) {\n /*\n * items: {value: [...], label: [...]}\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n\n let first = [{value: \"\", label: \"(All)\"}];\n let items = util.dataframeToD3(data.items);\n let opts = {\n options: first.concat(items),\n valueField: \"value\",\n labelField: \"label\",\n searchField: \"label\"\n };\n\n let select = $(el).find(\"select\")[0];\n\n let selectize = $(select).selectize(opts)[0].selectize;\n\n let ctHandle = new FilterHandle(data.group);\n\n let lastKnownKeys;\n selectize.on(\"change\", function() {\n if (selectize.items.length === 0) {\n lastKnownKeys = null;\n ctHandle.clear();\n } else {\n let keys = {};\n selectize.items.forEach(function(group) {\n data.map[group].forEach(function(key) {\n keys[key] = true;\n });\n });\n let keyArray = Object.keys(keys);\n keyArray.sort();\n lastKnownKeys = keyArray;\n ctHandle.set(keyArray);\n }\n });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n","import * as input from \"./input\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\nlet strftime = global.strftime;\n\ninput.register({\n className: \"crosstalk-input-slider\",\n\n factory: function(el, data) {\n /*\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n let ctHandle = new FilterHandle(data.group);\n\n let opts = {};\n let $el = $(el).find(\"input\");\n let dataType = $el.data(\"data-type\");\n let timeFormat = $el.data(\"time-format\");\n let round = $el.data(\"round\");\n let timeFormatter;\n\n // Set up formatting functions\n if (dataType === \"date\") {\n timeFormatter = strftime.utc();\n opts.prettify = function(num) {\n return timeFormatter(timeFormat, new Date(num));\n };\n\n } else if (dataType === \"datetime\") {\n let timezone = $el.data(\"timezone\");\n if (timezone)\n timeFormatter = strftime.timezone(timezone);\n else\n timeFormatter = strftime;\n\n opts.prettify = function(num) {\n return timeFormatter(timeFormat, new Date(num));\n };\n } else if (dataType === \"number\") {\n if (typeof round !== \"undefined\")\n opts.prettify = function(num) {\n let factor = Math.pow(10, round);\n return Math.round(num * factor) / factor;\n };\n }\n\n $el.ionRangeSlider(opts);\n\n function getValue() {\n let result = $el.data(\"ionRangeSlider\").result;\n\n // Function for converting numeric value from slider to appropriate type.\n let convert;\n let dataType = $el.data(\"data-type\");\n if (dataType === \"date\") {\n convert = function(val) {\n return formatDateUTC(new Date(+val));\n };\n } else if (dataType === \"datetime\") {\n convert = function(val) {\n // Convert ms to s\n return +val / 1000;\n };\n } else {\n convert = function(val) { return +val; };\n }\n\n if ($el.data(\"ionRangeSlider\").options.type === \"double\") {\n return [convert(result.from), convert(result.to)];\n } else {\n return convert(result.from);\n }\n }\n\n let lastKnownKeys = null;\n\n $el.on(\"change.crosstalkSliderInput\", function(event) {\n if (!$el.data(\"updating\") && !$el.data(\"animating\")) {\n let [from, to] = getValue();\n let keys = [];\n for (let i = 0; i < data.values.length; i++) {\n let val = data.values[i];\n if (val >= from && val <= to) {\n keys.push(data.keys[i]);\n }\n }\n keys.sort();\n ctHandle.set(keys);\n lastKnownKeys = keys;\n }\n });\n\n\n // let $el = $(el);\n // $el.on(\"change\", \"input[type=\"checkbox\"]\", function() {\n // let checked = $el.find(\"input[type=\"checkbox\"]:checked\");\n // if (checked.length === 0) {\n // ctHandle.clear();\n // } else {\n // let keys = {};\n // checked.each(function() {\n // data.map[this.value].forEach(function(key) {\n // keys[key] = true;\n // });\n // });\n // let keyArray = Object.keys(keys);\n // keyArray.sort();\n // ctHandle.set(keyArray);\n // }\n // });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n\n\n// Convert a number to a string with leading zeros\nfunction padZeros(n, digits) {\n let str = n.toString();\n while (str.length < digits)\n str = \"0\" + str;\n return str;\n}\n\n// Given a Date object, return a string in yyyy-mm-dd format, using the\n// UTC date. This may be a day off from the date in the local time zone.\nfunction formatDateUTC(date) {\n if (date instanceof Date) {\n return date.getUTCFullYear() + \"-\" +\n padZeros(date.getUTCMonth()+1, 2) + \"-\" +\n padZeros(date.getUTCDate(), 2);\n\n } else {\n return null;\n }\n}\n","import Events from \"./events\";\nimport grp from \"./group\";\nimport * as util from \"./util\";\n\n/**\n * Use this class to read and write (and listen for changes to) the selection\n * for a Crosstalk group. This is intended to be used for linked brushing.\n *\n * If two (or more) `SelectionHandle` instances in the same webpage share the\n * same group name, they will share the same state. Setting the selection using\n * one `SelectionHandle` instance will result in the `value` property instantly\n * changing across the others, and `\"change\"` event listeners on all instances\n * (including the one that initiated the sending) will fire.\n *\n * @param {string} [group] - The name of the Crosstalk group, or if none,\n * null or undefined (or any other falsy value). This can be changed later\n * via the [SelectionHandle#setGroup](#setGroup) method.\n * @param {Object} [extraInfo] - An object whose properties will be copied to\n * the event object whenever an event is emitted.\n */\nexport class SelectionHandle {\n\n constructor(group = null, extraInfo = null) {\n this._eventRelay = new Events();\n this._emitter = new util.SubscriptionTracker(this._eventRelay);\n\n // Name of the group we're currently tracking, if any. Can change over time.\n this._group = null;\n // The Var we're currently tracking, if any. Can change over time.\n this._var = null;\n // The event handler subscription we currently have on var.on(\"change\").\n this._varOnChangeSub = null;\n\n this._extraInfo = util.extend({ sender: this }, extraInfo);\n\n this.setGroup(group);\n }\n\n /**\n * Changes the Crosstalk group membership of this SelectionHandle. The group\n * being switched away from (if any) will not have its selection value\n * modified as a result of calling `setGroup`, even if this handle was the\n * most recent handle to set the selection of the group.\n *\n * The group being switched to (if any) will also not have its selection value\n * modified as a result of calling `setGroup`. If you want to set the\n * selection value of the new group, call `set` explicitly.\n *\n * @param {string} group - The name of the Crosstalk group, or null (or\n * undefined) to clear the group.\n */\n setGroup(group) {\n // If group is unchanged, do nothing\n if (this._group === group)\n return;\n // Treat null, undefined, and other falsy values the same\n if (!this._group && !group)\n return;\n\n if (this._var) {\n this._var.off(\"change\", this._varOnChangeSub);\n this._var = null;\n this._varOnChangeSub = null;\n }\n\n this._group = group;\n\n if (group) {\n this._var = grp(group).var(\"selection\");\n let sub = this._var.on(\"change\", (e) => {\n this._eventRelay.trigger(\"change\", e, this);\n });\n this._varOnChangeSub = sub;\n }\n }\n\n /**\n * Retrieves the current selection for the group represented by this\n * `SelectionHandle`.\n *\n * - If no selection is active, then this value will be falsy.\n * - If a selection is active, but no data points are selected, then this\n * value will be an empty array.\n * - If a selection is active, and data points are selected, then the keys\n * of the selected data points will be present in the array.\n */\n get value() {\n return this._var ? this._var.get() : null;\n }\n\n /**\n * Combines the given `extraInfo` (if any) with the handle's default\n * `_extraInfo` (if any).\n * @private\n */\n _mergeExtraInfo(extraInfo) {\n // Important incidental effect: shallow clone is returned\n return util.extend({},\n this._extraInfo ? this._extraInfo : null,\n extraInfo ? extraInfo : null);\n }\n\n /**\n * Overwrites the current selection for the group, and raises the `\"change\"`\n * event among all of the group's '`SelectionHandle` instances (including\n * this one).\n *\n * @fires SelectionHandle#change\n * @param {string[]} selectedKeys - Falsy, empty array, or array of keys (see\n * {@link SelectionHandle#value}).\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `SelectionHandle` constructor).\n */\n set(selectedKeys, extraInfo) {\n if (this._var)\n this._var.set(selectedKeys, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * Overwrites the current selection for the group, and raises the `\"change\"`\n * event among all of the group's '`SelectionHandle` instances (including\n * this one).\n *\n * @fires SelectionHandle#change\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any that were passed\n * into the `SelectionHandle` constructor).\n */\n clear(extraInfo) {\n if (this._var)\n this.set(void 0, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * Subscribes to events on this `SelectionHandle`.\n *\n * @param {string} eventType - Indicates the type of events to listen to.\n * Currently, only `\"change\"` is supported.\n * @param {SelectionHandle~listener} listener - The callback function that\n * will be invoked when the event occurs.\n * @return {string} - A token to pass to {@link SelectionHandle#off} to cancel\n * this subscription.\n */\n on(eventType, listener) {\n return this._emitter.on(eventType, listener);\n }\n\n /**\n * Cancels event subscriptions created by {@link SelectionHandle#on}.\n *\n * @param {string} eventType - The type of event to unsubscribe.\n * @param {string|SelectionHandle~listener} listener - Either the callback\n * function previously passed into {@link SelectionHandle#on}, or the\n * string that was returned from {@link SelectionHandle#on}.\n */\n off(eventType, listener) {\n return this._emitter.off(eventType, listener);\n }\n\n /**\n * Shuts down the `SelectionHandle` object.\n *\n * Removes all event listeners that were added through this handle.\n */\n close() {\n this._emitter.removeAllListeners();\n this.setGroup(null);\n }\n}\n\n/**\n * @callback SelectionHandle~listener\n * @param {Object} event - An object containing details of the event. For\n * `\"change\"` events, this includes the properties `value` (the new\n * value of the selection, or `undefined` if no selection is active),\n * `oldValue` (the previous value of the selection), and `sender` (the\n * `SelectionHandle` instance that made the change).\n */\n\n/**\n * @event SelectionHandle#change\n * @type {object}\n * @property {object} value - The new value of the selection, or `undefined`\n * if no selection is active.\n * @property {object} oldValue - The previous value of the selection.\n * @property {SelectionHandle} sender - The `SelectionHandle` instance that\n * changed the value.\n */\n","export function extend(target, ...sources) {\n for (let i = 0; i < sources.length; i++) {\n let src = sources[i];\n if (typeof(src) === \"undefined\" || src === null)\n continue;\n\n for (let key in src) {\n if (src.hasOwnProperty(key)) {\n target[key] = src[key];\n }\n }\n }\n return target;\n}\n\nexport function checkSorted(list) {\n for (let i = 1; i < list.length; i++) {\n if (list[i] <= list[i-1]) {\n throw new Error(\"List is not sorted or contains duplicate\");\n }\n }\n}\n\nexport function diffSortedLists(a, b) {\n let i_a = 0;\n let i_b = 0;\n\n if (!a) a = [];\n if (!b) b = [];\n\n let a_only = [];\n let b_only = [];\n\n checkSorted(a);\n checkSorted(b);\n\n while (i_a < a.length && i_b < b.length) {\n if (a[i_a] === b[i_b]) {\n i_a++;\n i_b++;\n } else if (a[i_a] < b[i_b]) {\n a_only.push(a[i_a++]);\n } else {\n b_only.push(b[i_b++]);\n }\n }\n\n if (i_a < a.length)\n a_only = a_only.concat(a.slice(i_a));\n if (i_b < b.length)\n b_only = b_only.concat(b.slice(i_b));\n return {\n removed: a_only,\n added: b_only\n };\n}\n\n// Convert from wide: { colA: [1,2,3], colB: [4,5,6], ... }\n// to long: [ {colA: 1, colB: 4}, {colA: 2, colB: 5}, ... ]\nexport function dataframeToD3(df) {\n let names = [];\n let length;\n for (let name in df) {\n if (df.hasOwnProperty(name))\n names.push(name);\n if (typeof(df[name]) !== \"object\" || typeof(df[name].length) === \"undefined\") {\n throw new Error(\"All fields must be arrays\");\n } else if (typeof(length) !== \"undefined\" && length !== df[name].length) {\n throw new Error(\"All fields must be arrays of the same length\");\n }\n length = df[name].length;\n }\n let results = [];\n let item;\n for (let row = 0; row < length; row++) {\n item = {};\n for (let col = 0; col < names.length; col++) {\n item[names[col]] = df[names[col]][row];\n }\n results.push(item);\n }\n return results;\n}\n\n/**\n * Keeps track of all event listener additions/removals and lets all active\n * listeners be removed with a single operation.\n *\n * @private\n */\nexport class SubscriptionTracker {\n constructor(emitter) {\n this._emitter = emitter;\n this._subs = {};\n }\n\n on(eventType, listener) {\n let sub = this._emitter.on(eventType, listener);\n this._subs[sub] = eventType;\n return sub;\n }\n\n off(eventType, listener) {\n let sub = this._emitter.off(eventType, listener);\n if (sub) {\n delete this._subs[sub];\n }\n return sub;\n }\n\n removeAllListeners() {\n let current_subs = this._subs;\n this._subs = {};\n Object.keys(current_subs).forEach((sub) => {\n this._emitter.off(current_subs[sub], sub);\n });\n }\n}\n","import Events from \"./events\";\n\nexport default class Var {\n constructor(group, name, /*optional*/ value) {\n this._group = group;\n this._name = name;\n this._value = value;\n this._events = new Events();\n }\n\n get() {\n return this._value;\n }\n\n set(value, /*optional*/ event) {\n if (this._value === value) {\n // Do nothing; the value hasn't changed\n return;\n }\n let oldValue = this._value;\n this._value = value;\n // Alert JavaScript listeners that the value has changed\n let evt = {};\n if (event && typeof(event) === \"object\") {\n for (let k in event) {\n if (event.hasOwnProperty(k))\n evt[k] = event[k];\n }\n }\n evt.oldValue = oldValue;\n evt.value = value;\n this._events.trigger(\"change\", evt, this);\n\n // TODO: Make this extensible, to let arbitrary back-ends know that\n // something has changed\n if (global.Shiny && global.Shiny.onInputChange) {\n global.Shiny.onInputChange(\n \".clientValue-\" +\n (this._group.name !== null ? this._group.name + \"-\" : \"\") +\n this._name,\n typeof(value) === \"undefined\" ? null : value\n );\n }\n }\n\n on(eventType, listener) {\n return this._events.on(eventType, listener);\n }\n\n off(eventType, listener) {\n return this._events.off(eventType, listener);\n }\n}\n"]} \ No newline at end of file diff --git a/content/find/recipes/index_files/crosstalk/scss/crosstalk.scss b/content/find/recipes/index_files/crosstalk/scss/crosstalk.scss deleted file mode 100644 index 35665616..00000000 --- a/content/find/recipes/index_files/crosstalk/scss/crosstalk.scss +++ /dev/null @@ -1,75 +0,0 @@ -/* Adjust margins outwards, so column contents line up with the edges of the - parent of container-fluid. */ -.container-fluid.crosstalk-bscols { - margin-left: -30px; - margin-right: -30px; - white-space: normal; -} - -/* But don't adjust the margins outwards if we're directly under the body, - i.e. we were the top-level of something at the console. */ -body > .container-fluid.crosstalk-bscols { - margin-left: auto; - margin-right: auto; -} - -.crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column { - display: inline-block; - padding-right: 12px; - vertical-align: top; -} - -@media only screen and (max-width:480px) { - .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column { - display: block; - padding-right: inherit; - } -} - -/* Relevant BS3 styles to make filter_checkbox() look reasonable without Bootstrap */ -.crosstalk-input { - margin-bottom: 15px; /* a la .form-group */ - .control-label { - margin-bottom: 0; - vertical-align: middle; - } - input[type="checkbox"] { - margin: 4px 0 0; - margin-top: 1px; - line-height: normal; - } - .checkbox { - position: relative; - display: block; - margin-top: 10px; - margin-bottom: 10px; - } - .checkbox > label{ - padding-left: 20px; - margin-bottom: 0; - font-weight: 400; - cursor: pointer; - } - .checkbox input[type="checkbox"], - .checkbox-inline input[type="checkbox"] { - position: absolute; - margin-top: 2px; - margin-left: -20px; - } - .checkbox + .checkbox { - margin-top: -5px; - } - .checkbox-inline { - position: relative; - display: inline-block; - padding-left: 20px; - margin-bottom: 0; - font-weight: 400; - vertical-align: middle; - cursor: pointer; - } - .checkbox-inline + .checkbox-inline { - margin-top: 0; - margin-left: 10px; - } -} diff --git a/content/find/recipes/index_files/datatables-binding/datatables.js b/content/find/recipes/index_files/datatables-binding/datatables.js deleted file mode 100644 index fcee8ce2..00000000 --- a/content/find/recipes/index_files/datatables-binding/datatables.js +++ /dev/null @@ -1,1512 +0,0 @@ -(function() { - -// some helper functions: using a global object DTWidget so that it can be used -// in JS() code, e.g. datatable(options = list(foo = JS('code'))); unlike R's -// dynamic scoping, when 'code' is eval()'ed, JavaScript does not know objects -// from the "parent frame", e.g. JS('DTWidget') will not work unless it was made -// a global object -var DTWidget = {}; - -// 123456666.7890 -> 123,456,666.7890 -var markInterval = function(d, digits, interval, mark, decMark, precision) { - x = precision ? d.toPrecision(digits) : d.toFixed(digits); - if (!/^-?[\d.]+$/.test(x)) return x; - var xv = x.split('.'); - if (xv.length > 2) return x; // should have at most one decimal point - xv[0] = xv[0].replace(new RegExp('\\B(?=(\\d{' + interval + '})+(?!\\d))', 'g'), mark); - return xv.join(decMark); -}; - -DTWidget.formatCurrency = function(data, currency, digits, interval, mark, decMark, before, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - var res = markInterval(d, digits, interval, mark, decMark); - res = before ? (/^-/.test(res) ? '-' + currency + res.replace(/^-/, '') : currency + res) : - res + currency; - return res; -}; - -DTWidget.formatString = function(data, prefix, suffix) { - var d = data; - if (d === null) return ''; - return prefix + d + suffix; -}; - -DTWidget.formatPercentage = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d * 100, digits, interval, mark, decMark) + '%'; -}; - -DTWidget.formatRound = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d, digits, interval, mark, decMark); -}; - -DTWidget.formatSignif = function(data, digits, interval, mark, decMark, zeroPrint) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - if (zeroPrint !== null && d === 0.0) return zeroPrint; - return markInterval(d, digits, interval, mark, decMark, true); -}; - -DTWidget.formatDate = function(data, method, params) { - var d = data; - if (d === null) return ''; - // (new Date('2015-10-28')).toDateString() may return 2015-10-27 because the - // actual time created could be like 'Tue Oct 27 2015 19:00:00 GMT-0500 (CDT)', - // i.e. the date-only string is treated as UTC time instead of local time - if ((method === 'toDateString' || method === 'toLocaleDateString') && /^\d{4,}\D\d{2}\D\d{2}$/.test(d)) { - d = d.split(/\D/); - d = new Date(d[0], d[1] - 1, d[2]); - } else { - d = new Date(d); - } - return d[method].apply(d, params); -}; - -window.DTWidget = DTWidget; - -// A helper function to update the properties of existing filters -var setFilterProps = function(td, props) { - // Update enabled/disabled state - var $input = $(td).find('input').first(); - var searchable = $input.data('searchable'); - $input.prop('disabled', !searchable || props.disabled); - - // Based on the filter type, set its new values - var type = td.getAttribute('data-type'); - if (['factor', 'logical'].includes(type)) { - // Reformat the new dropdown options for use with selectize - var new_vals = props.params.options.map(function(item) { - return { text: item, value: item }; - }); - - // Find the selectize object - var dropdown = $(td).find('.selectized').eq(0)[0].selectize; - - // Note the current values - var old_vals = dropdown.getValue(); - - // Remove the existing values - dropdown.clearOptions(); - - // Add the new options - dropdown.addOption(new_vals); - - // Preserve the existing values - dropdown.setValue(old_vals); - - } else if (['number', 'integer', 'date', 'time'].includes(type)) { - // Apply internal scaling to new limits. Updating scale not yet implemented. - var slider = $(td).find('.noUi-target').eq(0); - var scale = Math.pow(10, Math.max(0, +slider.data('scale') || 0)); - var new_vals = [props.params.min * scale, props.params.max * scale]; - - // Note what the new limits will be just for this filter - var new_lims = new_vals.slice(); - - // Determine the current values and limits - var old_vals = slider.val().map(Number); - var old_lims = slider.noUiSlider('options').range; - old_lims = [old_lims.min, old_lims.max]; - - // Preserve the current values if filters have been applied; otherwise, apply no filtering - if (old_vals[0] != old_lims[0]) { - new_vals[0] = Math.max(old_vals[0], new_vals[0]); - } - - if (old_vals[1] != old_lims[1]) { - new_vals[1] = Math.min(old_vals[1], new_vals[1]); - } - - // Update the endpoints of the slider - slider.noUiSlider({ - start: new_vals, - range: {'min': new_lims[0], 'max': new_lims[1]} - }, true); - } -}; - -var transposeArray2D = function(a) { - return a.length === 0 ? a : HTMLWidgets.transposeArray2D(a); -}; - -var crosstalkPluginsInstalled = false; - -function maybeInstallCrosstalkPlugins() { - if (crosstalkPluginsInstalled) - return; - crosstalkPluginsInstalled = true; - - $.fn.dataTable.ext.afnFiltering.push( - function(oSettings, aData, iDataIndex) { - var ctfilter = oSettings.nTable.ctfilter; - if (ctfilter && !ctfilter[iDataIndex]) - return false; - - var ctselect = oSettings.nTable.ctselect; - if (ctselect && !ctselect[iDataIndex]) - return false; - - return true; - } - ); -} - -HTMLWidgets.widget({ - name: "datatables", - type: "output", - renderOnNullValue: true, - initialize: function(el, width, height) { - // in order that the type=number inputs return a number - $.valHooks.number = { - get: function(el) { - var value = parseFloat(el.value); - return isNaN(value) ? "" : value; - } - }; - $(el).html(' '); - return { - data: null, - ctfilterHandle: new crosstalk.FilterHandle(), - ctfilterSubscription: null, - ctselectHandle: new crosstalk.SelectionHandle(), - ctselectSubscription: null - }; - }, - renderValue: function(el, data, instance) { - if (el.offsetWidth === 0 || el.offsetHeight === 0) { - instance.data = data; - return; - } - instance.data = null; - var $el = $(el); - $el.empty(); - - if (data === null) { - $el.append(' '); - // clear previous Shiny inputs (if any) - for (var i in instance.clearInputs) instance.clearInputs[i](); - instance.clearInputs = {}; - return; - } - - var crosstalkOptions = data.crosstalkOptions; - if (!crosstalkOptions) crosstalkOptions = { - 'key': null, 'group': null - }; - if (crosstalkOptions.group) { - maybeInstallCrosstalkPlugins(); - instance.ctfilterHandle.setGroup(crosstalkOptions.group); - instance.ctselectHandle.setGroup(crosstalkOptions.group); - } - - // if we are in the viewer then we always want to fillContainer and - // and autoHideNavigation (unless the user has explicitly set these) - if (window.HTMLWidgets.viewerMode) { - if (!data.hasOwnProperty("fillContainer")) - data.fillContainer = true; - if (!data.hasOwnProperty("autoHideNavigation")) - data.autoHideNavigation = true; - } - - // propagate fillContainer to instance (so we have it in resize) - instance.fillContainer = data.fillContainer; - - var cells = data.data; - - if (cells instanceof Array) cells = transposeArray2D(cells); - - $el.append(data.container); - var $table = $el.find('table'); - if (data.class) $table.addClass(data.class); - if (data.caption) $table.prepend(data.caption); - - if (!data.selection) data.selection = { - mode: 'none', selected: null, target: 'row', selectable: null - }; - if (HTMLWidgets.shinyMode && data.selection.mode !== 'none' && - data.selection.target === 'row+column') { - if ($table.children('tfoot').length === 0) { - $table.append($('')); - $table.find('thead tr').clone().appendTo($table.find('tfoot')); - } - } - - // column filters - var filterRow; - switch (data.filter) { - case 'top': - $table.children('thead').append(data.filterHTML); - filterRow = $table.find('thead tr:last td'); - break; - case 'bottom': - if ($table.children('tfoot').length === 0) { - $table.append($('')); - } - $table.children('tfoot').prepend(data.filterHTML); - filterRow = $table.find('tfoot tr:first td'); - break; - } - - var options = { searchDelay: 1000 }; - if (cells !== null) $.extend(options, { - data: cells - }); - - // options for fillContainer - var bootstrapActive = typeof($.fn.popover) != 'undefined'; - if (instance.fillContainer) { - - // force scrollX/scrollY and turn off autoWidth - options.scrollX = true; - options.scrollY = "100px"; // can be any value, we'll adjust below - - // if we aren't paginating then move around the info/filter controls - // to save space at the bottom and rephrase the info callback - if (data.options.paging === false) { - - // we know how to do this cleanly for bootstrap, not so much - // for other themes/layouts - if (bootstrapActive) { - options.dom = "<'row'<'col-sm-4'i><'col-sm-8'f>>" + - "<'row'<'col-sm-12'tr>>"; - } - - options.fnInfoCallback = function(oSettings, iStart, iEnd, - iMax, iTotal, sPre) { - return Number(iTotal).toLocaleString() + " records"; - }; - } - } - - // auto hide navigation if requested - // Note, this only works on client-side processing mode as on server-side, - // cells (data.data) is null; In addition, we require the pageLength option - // being provided explicitly to enable this. Despite we may be able to deduce - // the default value of pageLength, it may complicate things so we'd rather - // put this responsiblity to users and warn them on the R side. - if (data.autoHideNavigation === true && data.options.paging !== false) { - // strip all nav if length >= cells - if ((cells instanceof Array) && data.options.pageLength >= cells.length) - options.dom = bootstrapActive ? "<'row'<'col-sm-12'tr>>" : "t"; - // alternatively lean things out for flexdashboard mobile portrait - else if (bootstrapActive && window.FlexDashboard && window.FlexDashboard.isMobilePhone()) - options.dom = "<'row'<'col-sm-12'f>>" + - "<'row'<'col-sm-12'tr>>" + - "<'row'<'col-sm-12'p>>"; - } - - $.extend(true, options, data.options || {}); - - var searchCols = options.searchCols; - if (searchCols) { - searchCols = searchCols.map(function(x) { - return x === null ? '' : x.search; - }); - // FIXME: this means I don't respect the escapeRegex setting - delete options.searchCols; - } - - // server-side processing? - var server = options.serverSide === true; - - // use the dataSrc function to pre-process JSON data returned from R - var DT_rows_all = [], DT_rows_current = []; - if (server && HTMLWidgets.shinyMode && typeof options.ajax === 'object' && - /^session\/[\da-z]+\/dataobj/.test(options.ajax.url) && !options.ajax.dataSrc) { - options.ajax.dataSrc = function(json) { - DT_rows_all = $.makeArray(json.DT_rows_all); - DT_rows_current = $.makeArray(json.DT_rows_current); - var data = json.data; - if (!colReorderEnabled()) return data; - var table = $table.DataTable(), order = table.colReorder.order(), flag = true, i, j, row; - for (i = 0; i < order.length; ++i) if (order[i] !== i) flag = false; - if (flag) return data; - for (i = 0; i < data.length; ++i) { - row = data[i].slice(); - for (j = 0; j < order.length; ++j) data[i][j] = row[order[j]]; - } - return data; - }; - } - - var thiz = this; - if (instance.fillContainer) $table.on('init.dt', function(e) { - thiz.fillAvailableHeight(el, $(el).innerHeight()); - }); - // If the page contains serveral datatables and one of which enables colReorder, - // the table.colReorder.order() function will exist but throws error when called. - // So it seems like the only way to know if colReorder is enabled or not is to - // check the options. - var colReorderEnabled = function() { return "colReorder" in options; }; - var table = $table.DataTable(options); - $el.data('datatable', table); - - // Unregister previous Crosstalk event subscriptions, if they exist - if (instance.ctfilterSubscription) { - instance.ctfilterHandle.off("change", instance.ctfilterSubscription); - instance.ctfilterSubscription = null; - } - if (instance.ctselectSubscription) { - instance.ctselectHandle.off("change", instance.ctselectSubscription); - instance.ctselectSubscription = null; - } - - if (!crosstalkOptions.group) { - $table[0].ctfilter = null; - $table[0].ctselect = null; - } else { - var key = crosstalkOptions.key; - function keysToMatches(keys) { - if (!keys) { - return null; - } else { - var selectedKeys = {}; - for (var i = 0; i < keys.length; i++) { - selectedKeys[keys[i]] = true; - } - var matches = {}; - for (var j = 0; j < key.length; j++) { - if (selectedKeys[key[j]]) - matches[j] = true; - } - return matches; - } - } - - function applyCrosstalkFilter(e) { - $table[0].ctfilter = keysToMatches(e.value); - table.draw(); - } - instance.ctfilterSubscription = instance.ctfilterHandle.on("change", applyCrosstalkFilter); - applyCrosstalkFilter({value: instance.ctfilterHandle.filteredKeys}); - - function applyCrosstalkSelection(e) { - if (e.sender !== instance.ctselectHandle) { - table - .rows('.' + selClass, {search: 'applied'}) - .nodes() - .to$() - .removeClass(selClass); - if (selectedRows) - changeInput('rows_selected', selectedRows(), void 0, true); - } - - if (e.sender !== instance.ctselectHandle && e.value && e.value.length) { - var matches = keysToMatches(e.value); - - // persistent selection with plotly (& leaflet) - var ctOpts = crosstalk.var("plotlyCrosstalkOpts").get() || {}; - if (ctOpts.persistent === true) { - var matches = $.extend(matches, $table[0].ctselect); - } - - $table[0].ctselect = matches; - table.draw(); - } else { - if ($table[0].ctselect) { - $table[0].ctselect = null; - table.draw(); - } - } - } - instance.ctselectSubscription = instance.ctselectHandle.on("change", applyCrosstalkSelection); - // TODO: This next line doesn't seem to work when renderDataTable is used - applyCrosstalkSelection({value: instance.ctselectHandle.value}); - } - - var inArray = function(val, array) { - return $.inArray(val, $.makeArray(array)) > -1; - }; - - // search the i-th column - var searchColumn = function(i, value) { - var regex = false, ci = true; - if (options.search) { - regex = options.search.regex, - ci = options.search.caseInsensitive !== false; - } - return table.column(i).search(value, regex, !regex, ci); - }; - - if (data.filter !== 'none') { - - filterRow.each(function(i, td) { - - var $td = $(td), type = $td.data('type'), filter; - var $input = $td.children('div').first().children('input'); - var disabled = $input.prop('disabled'); - var searchable = table.settings()[0].aoColumns[i].bSearchable; - $input.prop('disabled', !searchable || disabled); - $input.data('searchable', searchable); // for updating later - $input.on('input blur', function() { - $input.next('span').toggle(Boolean($input.val())); - }); - // Bootstrap sets pointer-events to none and we won't be able to click - // the clear button - $input.next('span').css('pointer-events', 'auto').hide().click(function() { - $(this).hide().prev('input').val('').trigger('input').focus(); - }); - var searchCol; // search string for this column - if (searchCols && searchCols[i]) { - searchCol = searchCols[i]; - $input.val(searchCol).trigger('input'); - } - var $x = $td.children('div').last(); - - // remove the overflow: hidden attribute of the scrollHead - // (otherwise the scrolling table body obscures the filters) - // The workaround and the discussion from - // https://github.com/rstudio/DT/issues/554#issuecomment-518007347 - // Otherwise the filter selection will not be anchored to the values - // when the columns number is many and scrollX is enabled. - var scrollHead = $(el).find('.dataTables_scrollHead,.dataTables_scrollFoot'); - var cssOverflowHead = scrollHead.css('overflow'); - var scrollBody = $(el).find('.dataTables_scrollBody'); - var cssOverflowBody = scrollBody.css('overflow'); - var scrollTable = $(el).find('.dataTables_scroll'); - var cssOverflowTable = scrollTable.css('overflow'); - if (cssOverflowHead === 'hidden') { - $x.on('show hide', function(e) { - if (e.type === 'show') { - scrollHead.css('overflow', 'visible'); - scrollBody.css('overflow', 'visible'); - scrollTable.css('overflow-x', 'scroll'); - } else { - scrollHead.css('overflow', cssOverflowHead); - scrollBody.css('overflow', cssOverflowBody); - scrollTable.css('overflow-x', cssOverflowTable); - } - }); - $x.css('z-index', 25); - } - - if (inArray(type, ['factor', 'logical'])) { - $input.on({ - click: function() { - $input.parent().hide(); $x.show().trigger('show'); filter[0].selectize.focus(); - }, - input: function() { - if ($input.val() === '') filter[0].selectize.setValue([]); - } - }); - var $input2 = $x.children('select'); - filter = $input2.selectize({ - options: $input2.data('options').map(function(v, i) { - return ({text: v, value: v}); - }), - plugins: ['remove_button'], - hideSelected: true, - onChange: function(value) { - if (value === null) value = []; // compatibility with jQuery 3.0 - $input.val(value.length ? JSON.stringify(value) : ''); - if (value.length) $input.trigger('input'); - $input.attr('title', $input.val()); - if (server) { - table.column(i).search(value.length ? JSON.stringify(value) : '').draw(); - return; - } - // turn off filter if nothing selected - $td.data('filter', value.length > 0); - table.draw(); // redraw table, and filters will be applied - } - }); - if (searchCol) filter[0].selectize.setValue(JSON.parse(searchCol)); - filter[0].selectize.on('blur', function() { - $x.hide().trigger('hide'); $input.parent().show(); $input.trigger('blur'); - }); - filter.next('div').css('margin-bottom', 'auto'); - } else if (type === 'character') { - var fun = function() { - searchColumn(i, $input.val()).draw(); - }; - if (server) { - fun = $.fn.dataTable.util.throttle(fun, options.searchDelay); - } - $input.on('input', fun); - } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { - var $x0 = $x; - $x = $x0.children('div').first(); - $x0.css({ - 'background-color': '#fff', - 'border': '1px #ddd solid', - 'border-radius': '4px', - 'padding': data.vertical ? '35px 20px': '20px 20px 10px 20px' - }); - var $spans = $x0.children('span').css({ - 'margin-top': data.vertical ? '0' : '10px', - 'white-space': 'nowrap' - }); - var $span1 = $spans.first(), $span2 = $spans.last(); - var r1 = +$x.data('min'), r2 = +$x.data('max'); - // when the numbers are too small or have many decimal places, the - // slider may have numeric precision problems (#150) - var scale = Math.pow(10, Math.max(0, +$x.data('scale') || 0)); - r1 = Math.round(r1 * scale); r2 = Math.round(r2 * scale); - var scaleBack = function(x, scale) { - if (scale === 1) return x; - var d = Math.round(Math.log(scale) / Math.log(10)); - // to avoid problems like 3.423/100 -> 0.034230000000000003 - return (x / scale).toFixed(d); - }; - var slider_min = function() { - return filter.noUiSlider('options').range.min; - }; - var slider_max = function() { - return filter.noUiSlider('options').range.max; - }; - $input.on({ - focus: function() { - $x0.show().trigger('show'); - // first, make sure the slider div leaves at least 20px between - // the two (slider value) span's - $x0.width(Math.max(160, $span1.outerWidth() + $span2.outerWidth() + 20)); - // then, if the input is really wide or slider is vertical, - // make the slider the same width as the input - if ($x0.outerWidth() < $input.outerWidth() || data.vertical) { - $x0.outerWidth($input.outerWidth()); - } - // make sure the slider div does not reach beyond the right margin - if ($(window).width() < $x0.offset().left + $x0.width()) { - $x0.offset({ - 'left': $input.offset().left + $input.outerWidth() - $x0.outerWidth() - }); - } - }, - blur: function() { - $x0.hide().trigger('hide'); - }, - input: function() { - if ($input.val() === '') filter.val([slider_min(), slider_max()]); - }, - change: function() { - var v = $input.val().replace(/\s/g, ''); - if (v === '') return; - v = v.split('...'); - if (v.length !== 2) { - $input.parent().addClass('has-error'); - return; - } - if (v[0] === '') v[0] = slider_min(); - if (v[1] === '') v[1] = slider_max(); - $input.parent().removeClass('has-error'); - // treat date as UTC time at midnight - var strTime = function(x) { - var s = type === 'date' ? 'T00:00:00Z' : ''; - var t = new Date(x + s).getTime(); - // add 10 minutes to date since it does not hurt the date, and - // it helps avoid the tricky floating point arithmetic problems, - // e.g. sometimes the date may be a few milliseconds earlier - // than the midnight due to precision problems in noUiSlider - return type === 'date' ? t + 3600000 : t; - }; - if (inArray(type, ['date', 'time'])) { - v[0] = strTime(v[0]); - v[1] = strTime(v[1]); - } - if (v[0] != slider_min()) v[0] *= scale; - if (v[1] != slider_max()) v[1] *= scale; - filter.val(v); - } - }); - var formatDate = function(d, isoFmt) { - d = scaleBack(d, scale); - if (type === 'number') return d; - if (type === 'integer') return parseInt(d); - var x = new Date(+d); - var fmt = ('filterDateFmt' in data) ? data.filterDateFmt[i] : undefined; - if (fmt !== undefined && isoFmt === false) return x[fmt.method].apply(x, fmt.params); - if (type === 'date') { - var pad0 = function(x) { - return ('0' + x).substr(-2, 2); - }; - return x.getUTCFullYear() + '-' + pad0(1 + x.getUTCMonth()) - + '-' + pad0(x.getUTCDate()); - } else { - return x.toISOString(); - } - }; - var opts = type === 'date' ? { step: 60 * 60 * 1000 } : - type === 'integer' ? { step: 1 } : {}; - - opts.orientation = data.vertical ? 'vertical': 'horizontal'; - opts.direction = data.vertical ? 'rtl': 'ltr'; - - filter = $x.noUiSlider($.extend({ - start: [r1, r2], - range: {min: r1, max: r2}, - connect: true - }, opts)); - if (scale > 1) (function() { - var t1 = r1, t2 = r2; - var val = filter.val(); - while (val[0] > r1 || val[1] < r2) { - if (val[0] > r1) { - t1 -= val[0] - r1; - } - if (val[1] < r2) { - t2 += r2 - val[1]; - } - filter = $x.noUiSlider($.extend({ - start: [t1, t2], - range: {min: t1, max: t2}, - connect: true - }, opts), true); - val = filter.val(); - } - r1 = t1; r2 = t2; - })(); - var updateSliderText = function(v1, v2) { - $span1.text(formatDate(v1, false)); $span2.text(formatDate(v2, false)); - }; - updateSliderText(r1, r2); - var updateSlider = function(e) { - var val = filter.val(); - // turn off filter if in full range - $td.data('filter', val[0] > slider_min() || val[1] < slider_max()); - var v1 = formatDate(val[0]), v2 = formatDate(val[1]), ival; - if ($td.data('filter')) { - ival = v1 + ' ... ' + v2; - $input.attr('title', ival).val(ival).trigger('input'); - } else { - $input.attr('title', '').val(''); - } - updateSliderText(val[0], val[1]); - if (e.type === 'slide') return; // no searching when sliding only - if (server) { - table.column(i).search($td.data('filter') ? ival : '').draw(); - return; - } - table.draw(); - }; - filter.on({ - set: updateSlider, - slide: updateSlider - }); - } - - // server-side processing will be handled by R (or whatever server - // language you use); the following code is only needed for client-side - // processing - if (server) { - // if a search string has been pre-set, search now - if (searchCol) searchColumn(i, searchCol).draw(); - return; - } - - var customFilter = function(settings, data, dataIndex) { - // there is no way to attach a search function to a specific table, - // and we need to make sure a global search function is not applied to - // all tables (i.e. a range filter in a previous table should not be - // applied to the current table); we use the settings object to - // determine if we want to perform searching on the current table, - // since settings.sTableId will be different to different tables - if (table.settings()[0] !== settings) return true; - // no filter on this column or no need to filter this column - if (typeof filter === 'undefined' || !$td.data('filter')) return true; - - var r = filter.val(), v, r0, r1; - var i_data = function(i) { - if (!colReorderEnabled()) return i; - var order = table.colReorder.order(), k; - for (k = 0; k < order.length; ++k) if (order[k] === i) return k; - return i; // in theory it will never be here... - } - v = data[i_data(i)]; - if (type === 'number' || type === 'integer') { - v = parseFloat(v); - // how to handle NaN? currently exclude these rows - if (isNaN(v)) return(false); - r0 = parseFloat(scaleBack(r[0], scale)) - r1 = parseFloat(scaleBack(r[1], scale)); - if (v >= r0 && v <= r1) return true; - } else if (type === 'date' || type === 'time') { - v = new Date(v); - r0 = new Date(r[0] / scale); r1 = new Date(r[1] / scale); - if (v >= r0 && v <= r1) return true; - } else if (type === 'factor') { - if (r.length === 0 || inArray(v, r)) return true; - } else if (type === 'logical') { - if (r.length === 0) return true; - if (inArray(v === '' ? 'na' : v, r)) return true; - } - return false; - }; - - $.fn.dataTable.ext.search.push(customFilter); - - // search for the preset search strings if it is non-empty - if (searchCol) { - if (inArray(type, ['factor', 'logical'])) { - filter[0].selectize.setValue(JSON.parse(searchCol)); - } else if (type === 'character') { - $input.trigger('input'); - } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { - $input.trigger('change'); - } - } - - }); - - } - - // highlight search keywords - var highlight = function() { - var body = $(table.table().body()); - // removing the old highlighting first - body.unhighlight(); - - // don't highlight the "not found" row, so we get the rows using the api - if (table.rows({ filter: 'applied' }).data().length === 0) return; - // highlight global search keywords - body.highlight($.trim(table.search()).split(/\s+/)); - // then highlight keywords from individual column filters - if (filterRow) filterRow.each(function(i, td) { - var $td = $(td), type = $td.data('type'); - if (type !== 'character') return; - var $input = $td.children('div').first().children('input'); - var column = table.column(i).nodes().to$(), - val = $.trim($input.val()); - if (type !== 'character' || val === '') return; - column.highlight(val.split(/\s+/)); - }); - }; - - if (options.searchHighlight) { - table - .on('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth', highlight) - .on('destroy', function() { - // remove event handler - table.off('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth'); - }); - - // Set the option for escaping regex characters in our search string. This will be used - // for all future matching. - jQuery.fn.highlight.options.escapeRegex = (!options.search || !options.search.regex); - - // initial highlight for state saved conditions and initial states - highlight(); - } - - // run the callback function on the table instance - if (typeof data.callback === 'function') data.callback(table); - - // double click to edit the cell, row, column, or all cells - if (data.editable) table.on('dblclick.dt', 'tbody td', function(e) { - // only bring up the editor when the cell itself is dbclicked, and ignore - // other dbclick events bubbled up (e.g. from the ) - if (e.target !== this) return; - var target = [], immediate = false; - switch (data.editable.target) { - case 'cell': - target = [this]; - immediate = true; // edit will take effect immediately - break; - case 'row': - target = table.cells(table.cell(this).index().row, '*').nodes(); - break; - case 'column': - target = table.cells('*', table.cell(this).index().column).nodes(); - break; - case 'all': - target = table.cells().nodes(); - break; - default: - throw 'The editable parameter must be "cell", "row", "column", or "all"'; - } - var disableCols = data.editable.disable ? data.editable.disable.columns : null; - var numericCols = data.editable.numeric; - var areaCols = data.editable.area; - for (var i = 0; i < target.length; i++) { - (function(cell, current) { - var $cell = $(cell), html = $cell.html(); - var _cell = table.cell(cell), value = _cell.data(), index = _cell.index().column; - var $input; - if (inArray(index, numericCols)) { - $input = $(''); - } else if (inArray(index, areaCols)) { - $input = $(''); - } else { - $input = $(''); - } - if (!immediate) { - $cell.data('input', $input).data('html', html); - $input.attr('title', 'Hit Ctrl+Enter to finish editing, or Esc to cancel'); - } - $input.val(value); - if (inArray(index, disableCols)) { - $input.attr('readonly', '').css('filter', 'invert(25%)'); - } - $cell.empty().append($input); - if (cell === current) $input.focus(); - $input.css('width', '100%'); - - if (immediate) $input.on('blur', function(e) { - var valueNew = $input.val(); - if (valueNew != value) { - _cell.data(valueNew); - if (HTMLWidgets.shinyMode) { - changeInput('cell_edit', [cellInfo(cell)], 'DT.cellInfo', null, {priority: 'event'}); - } - // for server-side processing, users have to call replaceData() to update the table - if (!server) table.draw(false); - } else { - $cell.html(html); - } - }).on('keyup', function(e) { - // hit Escape to cancel editing - if (e.keyCode === 27) $input.trigger('blur'); - }); - - // bulk edit (row, column, or all) - if (!immediate) $input.on('keyup', function(e) { - var removeInput = function($cell, restore) { - $cell.data('input').remove(); - if (restore) $cell.html($cell.data('html')); - } - if (e.keyCode === 27) { - for (var i = 0; i < target.length; i++) { - removeInput($(target[i]), true); - } - } else if (e.keyCode === 13 && e.ctrlKey) { - // Ctrl + Enter - var cell, $cell, _cell, cellData = []; - for (var i = 0; i < target.length; i++) { - cell = target[i]; $cell = $(cell); _cell = table.cell(cell); - _cell.data($cell.data('input').val()); - HTMLWidgets.shinyMode && cellData.push(cellInfo(cell)); - removeInput($cell, false); - } - if (HTMLWidgets.shinyMode) { - changeInput('cell_edit', cellData, 'DT.cellInfo', null, {priority: "event"}); - } - if (!server) table.draw(false); - } - }); - })(target[i], this); - } - }); - - // interaction with shiny - if (!HTMLWidgets.shinyMode && !crosstalkOptions.group) return; - - var methods = {}; - var shinyData = {}; - - methods.updateCaption = function(caption) { - if (!caption) return; - $table.children('caption').replaceWith(caption); - } - - // register clear functions to remove input values when the table is removed - instance.clearInputs = {}; - - var changeInput = function(id, value, type, noCrosstalk, opts) { - var event = id; - id = el.id + '_' + id; - if (type) id = id + ':' + type; - // do not update if the new value is the same as old value - if (event !== 'cell_edit' && !/_clicked$/.test(event) && shinyData.hasOwnProperty(id) && shinyData[id] === JSON.stringify(value)) - return; - shinyData[id] = JSON.stringify(value); - if (HTMLWidgets.shinyMode && Shiny.setInputValue) { - Shiny.setInputValue(id, value, opts); - if (!instance.clearInputs[id]) instance.clearInputs[id] = function() { - Shiny.setInputValue(id, null); - } - } - - // HACK - if (event === "rows_selected" && !noCrosstalk) { - if (crosstalkOptions.group) { - var keys = crosstalkOptions.key; - var selectedKeys = null; - if (value) { - selectedKeys = []; - for (var i = 0; i < value.length; i++) { - // The value array's contents use 1-based row numbers, so we must - // convert to 0-based before indexing into the keys array. - selectedKeys.push(keys[value[i] - 1]); - } - } - instance.ctselectHandle.set(selectedKeys); - } - } - }; - - var addOne = function(x) { - return x.map(function(i) { return 1 + i; }); - }; - - var unique = function(x) { - var ux = []; - $.each(x, function(i, el){ - if ($.inArray(el, ux) === -1) ux.push(el); - }); - return ux; - } - - // change the row index of a cell - var tweakCellIndex = function(cell) { - var info = cell.index(); - // some cell may not be valid. e.g, #759 - // when using the RowGroup extension, datatables will - // generate the row label and the cells are not part of - // the data thus contain no row/col info - if (info === undefined) - return {row: null, col: null}; - if (server) { - info.row = DT_rows_current[info.row]; - } else { - info.row += 1; - } - return {row: info.row, col: info.column}; - } - - var cleanSelectedValues = function() { - changeInput('rows_selected', []); - changeInput('columns_selected', []); - changeInput('cells_selected', transposeArray2D([]), 'shiny.matrix'); - } - // #828 we should clean the selection on the server-side when the table reloads - cleanSelectedValues(); - - // a flag to indicates if select extension is initialized or not - var flagSelectExt = table.settings()[0]._select !== undefined; - // the Select extension should only be used in the client mode and - // when the selection.mode is set to none - if (data.selection.mode === 'none' && !server && flagSelectExt) { - var updateRowsSelected = function() { - var rows = table.rows({selected: true}); - var selected = []; - $.each(rows.indexes().toArray(), function(i, v) { - selected.push(v + 1); - }); - changeInput('rows_selected', selected); - } - var updateColsSelected = function() { - var columns = table.columns({selected: true}); - changeInput('columns_selected', columns.indexes().toArray()); - } - var updateCellsSelected = function() { - var cells = table.cells({selected: true}); - var selected = []; - cells.every(function() { - var row = this.index().row; - var col = this.index().column; - selected = selected.concat([[row + 1, col]]); - }); - changeInput('cells_selected', transposeArray2D(selected), 'shiny.matrix'); - } - table.on('select deselect', function(e, dt, type, indexes) { - updateRowsSelected(); - updateColsSelected(); - updateCellsSelected(); - }) - } - - var selMode = data.selection.mode, selTarget = data.selection.target; - var selDisable = data.selection.selectable === false; - if (inArray(selMode, ['single', 'multiple'])) { - var selClass = inArray(data.style, ['bootstrap', 'bootstrap4']) ? 'active' : 'selected'; - // selected1: row indices; selected2: column indices - var initSel = function(x) { - if (x === null || typeof x === 'boolean' || selTarget === 'cell') { - return {rows: [], cols: []}; - } else if (selTarget === 'row') { - return {rows: $.makeArray(x), cols: []}; - } else if (selTarget === 'column') { - return {rows: [], cols: $.makeArray(x)}; - } else if (selTarget === 'row+column') { - return {rows: $.makeArray(x.rows), cols: $.makeArray(x.cols)}; - } - } - var selected = data.selection.selected; - var selected1 = initSel(selected).rows, selected2 = initSel(selected).cols; - // selectable should contain either all positive or all non-positive values, not both - // positive values indicate "selectable" while non-positive values means "nonselectable" - // the assertion is performed on R side. (only column indicides could be zero which indicates - // the row name) - var selectable = data.selection.selectable; - var selectable1 = initSel(selectable).rows, selectable2 = initSel(selectable).cols; - - // After users reorder the rows or filter the table, we cannot use the table index - // directly. Instead, we need this function to find out the rows between the two clicks. - // If user filter the table again between the start click and the end click, the behavior - // would be undefined, but it should not be a problem. - var shiftSelRowsIndex = function(start, end) { - var indexes = server ? DT_rows_all : table.rows({ search: 'applied' }).indexes().toArray(); - start = indexes.indexOf(start); end = indexes.indexOf(end); - // if start is larger than end, we need to swap - if (start > end) { - var tmp = end; end = start; start = tmp; - } - return indexes.slice(start, end + 1); - } - - var serverRowIndex = function(clientRowIndex) { - return server ? DT_rows_current[clientRowIndex] : clientRowIndex + 1; - } - - // row, column, or cell selection - var lastClickedRow; - if (inArray(selTarget, ['row', 'row+column'])) { - // Get the current selected rows. It will also - // update the selected1's value based on the current row selection state - // Note we can't put this function inside selectRows() directly, - // the reason is method.selectRows() will override selected1's value but this - // function will add rows to selected1 (keep the existing selection), which is - // inconsistent with column and cell selection. - var selectedRows = function() { - var rows = table.rows('.' + selClass); - var idx = rows.indexes().toArray(); - if (!server) { - selected1 = addOne(idx); - return selected1; - } - idx = idx.map(function(i) { - return DT_rows_current[i]; - }); - selected1 = selMode === 'multiple' ? unique(selected1.concat(idx)) : idx; - return selected1; - } - // Change selected1's value based on selectable1, then refresh the row state - var onlyKeepSelectableRows = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected1 = []; - return; - } - if (selectable1.length === 0) return; - var nonselectable = selectable1[0] <= 0; - if (nonselectable) { - // should make selectable1 positive - selected1 = $(selected1).not(selectable1.map(function(i) { return -i; })).get(); - } else { - selected1 = $(selected1).filter(selectable1).get(); - } - } - // Change selected1's value based on selectable1, then - // refresh the row selection state according to values in selected1 - var selectRows = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableRows(); - table.$('tr.' + selClass).removeClass(selClass); - if (selected1.length === 0) return; - if (server) { - table.rows({page: 'current'}).every(function() { - if (inArray(DT_rows_current[this.index()], selected1)) { - $(this.node()).addClass(selClass); - } - }); - } else { - var selected0 = selected1.map(function(i) { return i - 1; }); - $(table.rows(selected0).nodes()).addClass(selClass); - } - } - table.on('mousedown.dt', 'tbody tr', function(e) { - var $this = $(this), thisRow = table.row(this); - if (selMode === 'multiple') { - if (e.shiftKey && lastClickedRow !== undefined) { - // select or de-select depends on the last clicked row's status - var flagSel = !$this.hasClass(selClass); - var crtClickedRow = serverRowIndex(thisRow.index()); - if (server) { - var rowsIndex = shiftSelRowsIndex(lastClickedRow, crtClickedRow); - // update current page's selClass - rowsIndex.map(function(i) { - var rowIndex = DT_rows_current.indexOf(i); - if (rowIndex >= 0) { - var row = table.row(rowIndex).nodes().to$(); - var flagRowSel = !row.hasClass(selClass); - if (flagSel === flagRowSel) row.toggleClass(selClass); - } - }); - // update selected1 - if (flagSel) { - selected1 = unique(selected1.concat(rowsIndex)); - } else { - selected1 = selected1.filter(function(index) { - return !inArray(index, rowsIndex); - }); - } - } else { - // js starts from 0 - shiftSelRowsIndex(lastClickedRow - 1, crtClickedRow - 1).map(function(value) { - var row = table.row(value).nodes().to$(); - var flagRowSel = !row.hasClass(selClass); - if (flagSel === flagRowSel) row.toggleClass(selClass); - }); - } - e.preventDefault(); - } else { - $this.toggleClass(selClass); - } - } else { - if ($this.hasClass(selClass)) { - $this.removeClass(selClass); - } else { - table.$('tr.' + selClass).removeClass(selClass); - $this.addClass(selClass); - } - } - if (server && !$this.hasClass(selClass)) { - var id = DT_rows_current[thisRow.index()]; - // remove id from selected1 since its class .selected has been removed - if (inArray(id, selected1)) selected1.splice($.inArray(id, selected1), 1); - } - selectedRows(); // update selected1's value based on selClass - selectRows(false); // only keep the selectable rows - changeInput('rows_selected', selected1); - changeInput('row_last_clicked', serverRowIndex(thisRow.index()), null, null, {priority: 'event'}); - lastClickedRow = serverRowIndex(thisRow.index()); - }); - selectRows(false); // in case users have specified pre-selected rows - // restore selected rows after the table is redrawn (e.g. sort/search/page); - // client-side tables will preserve the selections automatically; for - // server-side tables, we have to *real* row indices are in `selected1` - changeInput('rows_selected', selected1); - if (server) table.on('draw.dt', function(e) { selectRows(false); }); - methods.selectRows = function(selected, ignoreSelectable) { - selected1 = $.makeArray(selected); - selectRows(ignoreSelectable); - changeInput('rows_selected', selected1); - } - } - - if (inArray(selTarget, ['column', 'row+column'])) { - if (selTarget === 'row+column') { - $(table.columns().footer()).css('cursor', 'pointer'); - } - // update selected2's value based on selectable2 - var onlyKeepSelectableCols = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected2 = []; - return; - } - if (selectable2.length === 0) return; - var nonselectable = selectable2[0] <= 0; - if (nonselectable) { - // need to make selectable2 positive - selected2 = $(selected2).not(selectable2.map(function(i) { return -i; })).get(); - } else { - selected2 = $(selected2).filter(selectable2).get(); - } - } - // update selected2 and then - // refresh the col selection state according to values in selected2 - var selectCols = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableCols(); - // if selected2 is not a valide index (e.g., larger than the column number) - // table.columns(selected2) will fail and result in a blank table - // this is different from the table.rows(), where the out-of-range indexes - // doesn't affect at all - selected2 = $(selected2).filter(table.columns().indexes()).get(); - table.columns().nodes().flatten().to$().removeClass(selClass); - if (selected2.length > 0) - table.columns(selected2).nodes().flatten().to$().addClass(selClass); - } - var callback = function() { - var colIdx = selTarget === 'column' ? table.cell(this).index().column : - $.inArray(this, table.columns().footer()), - thisCol = $(table.column(colIdx).nodes()); - if (colIdx === -1) return; - if (thisCol.hasClass(selClass)) { - thisCol.removeClass(selClass); - selected2.splice($.inArray(colIdx, selected2), 1); - } else { - if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); - thisCol.addClass(selClass); - selected2 = selMode === 'single' ? [colIdx] : unique(selected2.concat([colIdx])); - } - selectCols(false); // update selected2 based on selectable - changeInput('columns_selected', selected2); - } - if (selTarget === 'column') { - $(table.table().body()).on('click.dt', 'td', callback); - } else { - $(table.table().footer()).on('click.dt', 'tr th', callback); - } - selectCols(false); // in case users have specified pre-selected columns - changeInput('columns_selected', selected2); - if (server) table.on('draw.dt', function(e) { selectCols(false); }); - methods.selectColumns = function(selected, ignoreSelectable) { - selected2 = $.makeArray(selected); - selectCols(ignoreSelectable); - changeInput('columns_selected', selected2); - } - } - - if (selTarget === 'cell') { - var selected3 = [], selectable3 = []; - if (selected !== null) selected3 = selected; - if (selectable !== null && typeof selectable !== 'boolean') selectable3 = selectable; - var findIndex = function(ij, sel) { - for (var i = 0; i < sel.length; i++) { - if (ij[0] === sel[i][0] && ij[1] === sel[i][1]) return i; - } - return -1; - } - // Change selected3's value based on selectable3, then refresh the cell state - var onlyKeepSelectableCells = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected3 = []; - return; - } - if (selectable3.length === 0) return; - var nonselectable = selectable3[0][0] <= 0; - var out = []; - if (nonselectable) { - selected3.map(function(ij) { - // should make selectable3 positive - if (findIndex([-ij[0], -ij[1]], selectable3) === -1) { out.push(ij); } - }); - } else { - selected3.map(function(ij) { - if (findIndex(ij, selectable3) > -1) { out.push(ij); } - }); - } - selected3 = out; - } - // Change selected3's value based on selectable3, then - // refresh the cell selection state according to values in selected3 - var selectCells = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableCells(); - table.$('td.' + selClass).removeClass(selClass); - if (selected3.length === 0) return; - if (server) { - table.cells({page: 'current'}).every(function() { - var info = tweakCellIndex(this); - if (findIndex([info.row, info.col], selected3) > -1) - $(this.node()).addClass(selClass); - }); - } else { - selected3.map(function(ij) { - $(table.cell(ij[0] - 1, ij[1]).node()).addClass(selClass); - }); - } - }; - table.on('click.dt', 'tbody td', function() { - var $this = $(this), info = tweakCellIndex(table.cell(this)); - if ($this.hasClass(selClass)) { - $this.removeClass(selClass); - selected3.splice(findIndex([info.row, info.col], selected3), 1); - } else { - if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); - $this.addClass(selClass); - selected3 = selMode === 'single' ? [[info.row, info.col]] : - unique(selected3.concat([[info.row, info.col]])); - } - selectCells(false); // must call this to update selected3 based on selectable3 - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - }); - selectCells(false); // in case users have specified pre-selected columns - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - - if (server) table.on('draw.dt', function(e) { selectCells(false); }); - methods.selectCells = function(selected, ignoreSelectable) { - selected3 = selected ? selected : []; - selectCells(ignoreSelectable); - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - } - } - } - - // expose some table info to Shiny - var updateTableInfo = function(e, settings) { - // TODO: is anyone interested in the page info? - // changeInput('page_info', table.page.info()); - var updateRowInfo = function(id, modifier) { - var idx; - if (server) { - idx = modifier.page === 'current' ? DT_rows_current : DT_rows_all; - } else { - var rows = table.rows($.extend({ - search: 'applied', - page: 'all' - }, modifier)); - idx = addOne(rows.indexes().toArray()); - } - changeInput('rows' + '_' + id, idx); - }; - updateRowInfo('current', {page: 'current'}); - updateRowInfo('all', {}); - } - table.on('draw.dt', updateTableInfo); - updateTableInfo(); - - // state info - table.on('draw.dt column-visibility.dt', function() { - changeInput('state', table.state()); - }); - changeInput('state', table.state()); - - // search info - var updateSearchInfo = function() { - changeInput('search', table.search()); - if (filterRow) changeInput('search_columns', filterRow.toArray().map(function(td) { - return $(td).find('input').first().val(); - })); - } - table.on('draw.dt', updateSearchInfo); - updateSearchInfo(); - - var cellInfo = function(thiz) { - var info = tweakCellIndex(table.cell(thiz)); - info.value = table.cell(thiz).data(); - return info; - } - // the current cell clicked on - table.on('click.dt', 'tbody td', function() { - changeInput('cell_clicked', cellInfo(this), null, null, {priority: 'event'}); - }) - changeInput('cell_clicked', {}); - - // do not trigger table selection when clicking on links unless they have classes - table.on('click.dt', 'tbody td a', function(e) { - if (this.className === '') e.stopPropagation(); - }); - - methods.addRow = function(data, rowname, resetPaging) { - var n = table.columns().indexes().length, d = n - data.length; - if (d === 1) { - data = rowname.concat(data) - } else if (d !== 0) { - console.log(data); - console.log(table.columns().indexes()); - throw 'New data must be of the same length as current data (' + n + ')'; - }; - table.row.add(data).draw(resetPaging); - } - - methods.updateSearch = function(keywords) { - if (keywords.global !== null) - $(table.table().container()).find('input[type=search]').first() - .val(keywords.global).trigger('input'); - var columns = keywords.columns; - if (!filterRow || columns === null) return; - filterRow.toArray().map(function(td, i) { - var v = typeof columns === 'string' ? columns : columns[i]; - if (typeof v === 'undefined') { - console.log('The search keyword for column ' + i + ' is undefined') - return; - } - $(td).find('input').first().val(v); - searchColumn(i, v); - }); - table.draw(); - } - - methods.hideCols = function(hide, reset) { - if (reset) table.columns().visible(true, false); - table.columns(hide).visible(false); - } - - methods.showCols = function(show, reset) { - if (reset) table.columns().visible(false, false); - table.columns(show).visible(true); - } - - methods.colReorder = function(order, origOrder) { - table.colReorder.order(order, origOrder); - } - - methods.selectPage = function(page) { - if (table.page.info().pages < page || page < 1) { - throw 'Selected page is out of range'; - }; - table.page(page - 1).draw(false); - } - - methods.reloadData = function(resetPaging, clearSelection) { - // empty selections first if necessary - if (methods.selectRows && inArray('row', clearSelection)) methods.selectRows([]); - if (methods.selectColumns && inArray('column', clearSelection)) methods.selectColumns([]); - if (methods.selectCells && inArray('cell', clearSelection)) methods.selectCells([]); - table.ajax.reload(null, resetPaging); - } - - // update table filters (set new limits of sliders) - methods.updateFilters = function(newProps) { - // loop through each filter in the filter row - filterRow.each(function(i, td) { - var k = i; - if (filterRow.length > newProps.length) { - if (i === 0) return; // first column is row names - k = i - 1; - } - // Update the filters to reflect the updated data. - // Allow "falsy" (e.g. NULL) to signify a no-op. - if (newProps[k]) { - setFilterProps(td, newProps[k]); - } - }); - }; - - table.shinyMethods = methods; - }, - resize: function(el, width, height, instance) { - if (instance.data) this.renderValue(el, instance.data, instance); - - // dynamically adjust height if fillContainer = TRUE - if (instance.fillContainer) - this.fillAvailableHeight(el, height); - - this.adjustWidth(el); - }, - - // dynamically set the scroll body to fill available height - // (used with fillContainer = TRUE) - fillAvailableHeight: function(el, availableHeight) { - - // see how much of the table is occupied by header/footer elements - // and use that to compute a target scroll body height - var dtWrapper = $(el).find('div.dataTables_wrapper'); - var dtScrollBody = $(el).find($('div.dataTables_scrollBody')); - var framingHeight = dtWrapper.innerHeight() - dtScrollBody.innerHeight(); - var scrollBodyHeight = availableHeight - framingHeight; - - // we need to set `max-height` to none as datatables library now sets this - // to a fixed height, disabling the ability to resize to fill the window, - // as it will be set to a fixed 100px under such circumstances, e.g., RStudio IDE, - // or FlexDashboard - // see https://github.com/rstudio/DT/issues/951#issuecomment-1026464509 - dtScrollBody.css('max-height', 'none'); - // set the height - dtScrollBody.height(scrollBodyHeight + 'px'); - }, - - // adjust the width of columns; remove the hard-coded widths on table and the - // scroll header when scrollX/Y are enabled - adjustWidth: function(el) { - var $el = $(el), table = $el.data('datatable'); - if (table) table.columns.adjust(); - $el.find('.dataTables_scrollHeadInner').css('width', '') - .children('table').css('margin-left', ''); - } -}); - - if (!HTMLWidgets.shinyMode) return; - - Shiny.addCustomMessageHandler('datatable-calls', function(data) { - var id = data.id; - var el = document.getElementById(id); - var table = el ? $(el).data('datatable') : null; - if (!table) { - console.log("Couldn't find table with id " + id); - return; - } - - var methods = table.shinyMethods, call = data.call; - if (methods[call.method]) { - methods[call.method].apply(table, call.args); - } else { - console.log("Unknown method " + call.method); - } - }); - -})(); diff --git a/content/find/recipes/index_files/datatables-css/datatables-crosstalk.css b/content/find/recipes/index_files/datatables-css/datatables-crosstalk.css deleted file mode 100644 index fb5bae84..00000000 --- a/content/find/recipes/index_files/datatables-css/datatables-crosstalk.css +++ /dev/null @@ -1,23 +0,0 @@ -.dt-crosstalk-fade { - opacity: 0.2; -} - -html body div.DTS div.dataTables_scrollBody { - background: none; -} - - -/* -Fix https://github.com/rstudio/DT/issues/563 -If the `table.display` is set to "block" (e.g., pkgdown), the browser will display -datatable objects strangely. The search panel and the page buttons will still be -in full-width but the table body will be "compact" and shorter. -In therory, having this attributes will affect `dom="t"` -with `display: block` users. But in reality, there should be no one. -We may remove the below lines in the future if the upstream agree to have this there. -See https://github.com/DataTables/DataTablesSrc/issues/160 -*/ - -table.dataTable { - display: table; -} diff --git a/content/find/recipes/index_files/dt-core/css/jquery.dataTables.extra.css b/content/find/recipes/index_files/dt-core/css/jquery.dataTables.extra.css deleted file mode 100644 index b2dd141f..00000000 --- a/content/find/recipes/index_files/dt-core/css/jquery.dataTables.extra.css +++ /dev/null @@ -1,28 +0,0 @@ -/* Selected rows/cells */ -table.dataTable tr.selected td, table.dataTable td.selected { - background-color: #b0bed9 !important; -} -/* In case of scrollX/Y or FixedHeader */ -.dataTables_scrollBody .dataTables_sizing { - visibility: hidden; -} - -/* The datatables' theme CSS file doesn't define -the color but with white background. It leads to an issue that -when the HTML's body color is set to 'white', the user can't -see the text since the background is white. One case happens in the -RStudio's IDE when inline viewing the DT table inside an Rmd file, -if the IDE theme is set to "Cobalt". - -See https://github.com/rstudio/DT/issues/447 for more info - -This fixes should have little side-effects because all the other elements -of the default theme use the #333 font color. - -TODO: The upstream may use relative colors for both the table background -and the color. It means the table can display well without this patch -then. At that time, we need to remove the below CSS attributes. -*/ -div.datatables { - color: #333; -} diff --git a/content/find/recipes/index_files/dt-core/css/jquery.dataTables.min.css b/content/find/recipes/index_files/dt-core/css/jquery.dataTables.min.css deleted file mode 100644 index 173ae3c4..00000000 --- a/content/find/recipes/index_files/dt-core/css/jquery.dataTables.min.css +++ /dev/null @@ -1 +0,0 @@ -table.dataTable td.dt-control{text-align:center;cursor:pointer}table.dataTable td.dt-control:before{height:1em;width:1em;margin-top:-9px;display:inline-block;color:white;border:.15em solid white;border-radius:1em;box-shadow:0 0 .2em #444;box-sizing:content-box;text-align:center;text-indent:0 !important;font-family:"Courier New",Courier,monospace;line-height:1em;content:"+";background-color:#31b131}table.dataTable tr.dt-hasChild td.dt-control:before{content:"-";background-color:#d33333}table.dataTable thead>tr>th.sorting,table.dataTable thead>tr>th.sorting_asc,table.dataTable thead>tr>th.sorting_desc,table.dataTable thead>tr>th.sorting_asc_disabled,table.dataTable thead>tr>th.sorting_desc_disabled,table.dataTable thead>tr>td.sorting,table.dataTable thead>tr>td.sorting_asc,table.dataTable thead>tr>td.sorting_desc,table.dataTable thead>tr>td.sorting_asc_disabled,table.dataTable thead>tr>td.sorting_desc_disabled{cursor:pointer;position:relative;padding-right:26px}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after{position:absolute;display:block;opacity:.125;right:10px;line-height:9px;font-size:.9em}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:before{bottom:50%;content:"▴"}table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:after{top:50%;content:"▾"}table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:after{opacity:.6}table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting_asc_disabled:before{display:none}table.dataTable thead>tr>th:active,table.dataTable thead>tr>td:active{outline:none}div.dataTables_scrollBody table.dataTable thead>tr>th:before,div.dataTables_scrollBody table.dataTable thead>tr>th:after,div.dataTables_scrollBody table.dataTable thead>tr>td:before,div.dataTables_scrollBody table.dataTable thead>tr>td:after{display:none}div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:2px}div.dataTables_processing>div:last-child{position:relative;width:80px;height:15px;margin:1em auto}div.dataTables_processing>div:last-child>div{position:absolute;top:0;width:13px;height:13px;border-radius:50%;background:rgba(13, 110, 253, 0.9);animation-timing-function:cubic-bezier(0, 1, 1, 0)}div.dataTables_processing>div:last-child>div:nth-child(1){left:8px;animation:datatables-loader-1 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(2){left:8px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(3){left:32px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(4){left:56px;animation:datatables-loader-3 .6s infinite}@keyframes datatables-loader-1{0%{transform:scale(0)}100%{transform:scale(1)}}@keyframes datatables-loader-3{0%{transform:scale(1)}100%{transform:scale(0)}}@keyframes datatables-loader-2{0%{transform:translate(0, 0)}100%{transform:translate(24px, 0)}}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th,table.dataTable thead td,table.dataTable tfoot th,table.dataTable tfoot td{text-align:left}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px;border-bottom:1px solid rgba(0, 0, 0, 0.3)}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 10px 6px 10px;border-top:1px solid rgba(0, 0, 0, 0.3)}table.dataTable tbody tr{background-color:transparent}table.dataTable tbody tr.selected>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.9);color:white}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid rgba(0, 0, 0, 0.15);border-right:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe>tbody>tr.odd>*,table.dataTable.display>tbody>tr.odd>*{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.023)}table.dataTable.stripe>tbody>tr.odd.selected>*,table.dataTable.display>tbody>tr.odd.selected>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.923)}table.dataTable.hover>tbody>tr:hover>*,table.dataTable.display>tbody>tr:hover>*{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.035)}table.dataTable.hover>tbody>tr.selected:hover>*,table.dataTable.display>tbody>tr.selected:hover>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.935)}table.dataTable.order-column>tbody tr>.sorting_1,table.dataTable.order-column>tbody tr>.sorting_2,table.dataTable.order-column>tbody tr>.sorting_3,table.dataTable.display>tbody tr>.sorting_1,table.dataTable.display>tbody tr>.sorting_2,table.dataTable.display>tbody tr>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.019)}table.dataTable.order-column>tbody tr.selected>.sorting_1,table.dataTable.order-column>tbody tr.selected>.sorting_2,table.dataTable.order-column>tbody tr.selected>.sorting_3,table.dataTable.display>tbody tr.selected>.sorting_1,table.dataTable.display>tbody tr.selected>.sorting_2,table.dataTable.display>tbody tr.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.919)}table.dataTable.display>tbody>tr.odd>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.054)}table.dataTable.display>tbody>tr.odd>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.047)}table.dataTable.display>tbody>tr.odd>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.039)}table.dataTable.display>tbody>tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.954)}table.dataTable.display>tbody>tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.947)}table.dataTable.display>tbody>tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.939)}table.dataTable.display>tbody>tr.even>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.019)}table.dataTable.display>tbody>tr.even>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.011)}table.dataTable.display>tbody>tr.even>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.003)}table.dataTable.display>tbody>tr.even.selected>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.919)}table.dataTable.display>tbody>tr.even.selected>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.911)}table.dataTable.display>tbody>tr.even.selected>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.903)}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.082)}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.074)}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.062)}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.982)}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.974)}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.962)}table.dataTable.no-footer{border-bottom:1px solid rgba(0, 0, 0, 0.3)}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:4px 17px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:4px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;color:#333 !important;border:1px solid transparent;border-radius:2px}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid rgba(0, 0, 0, 0.3);background-color:rgba(230, 230, 230, 0.1);background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, rgba(230, 230, 230, 0.1)), color-stop(100%, rgba(0, 0, 0, 0.1)));background:-webkit-linear-gradient(top, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);background:-moz-linear-gradient(top, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);background:-ms-linear-gradient(top, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);background:-o-linear-gradient(top, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);background:linear-gradient(to bottom, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid rgba(0, 0, 0, 0.3)}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}} diff --git a/content/find/recipes/index_files/dt-core/js/jquery.dataTables.min.js b/content/find/recipes/index_files/dt-core/js/jquery.dataTables.min.js deleted file mode 100644 index 4c73e87d..00000000 --- a/content/find/recipes/index_files/dt-core/js/jquery.dataTables.min.js +++ /dev/null @@ -1,192 +0,0 @@ -/*! - SpryMedia Ltd. - - This source file is free software, available under the following license: - MIT license - http://datatables.net/license - - This source file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the license files for details. - - For details please refer to: http://www.datatables.net - DataTables 1.12.1 - ©2008-2022 SpryMedia Ltd - datatables.net/license -*/ -var $jscomp=$jscomp||{};$jscomp.scope={};$jscomp.findInternal=function(l,y,A){l instanceof String&&(l=String(l));for(var q=l.length,E=0;E").css({position:"fixed",top:0,left:-1*l(y).scrollLeft(),height:1, -width:1,overflow:"hidden"}).append(l("
    ").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(l("
    ").css({width:"100%",height:10}))).appendTo("body"),d=c.children(),e=d.children();b.barWidth=d[0].offsetWidth-d[0].clientWidth;b.bScrollOversize=100===e[0].offsetWidth&&100!==d[0].clientWidth;b.bScrollbarLeft=1!==Math.round(e.offset().left);b.bBounding=c[0].getBoundingClientRect().width?!0:!1;c.remove()}l.extend(a.oBrowser,u.__browser);a.oScroll.iBarWidth=u.__browser.barWidth} -function Gb(a,b,c,d,e,h){var f=!1;if(c!==q){var g=c;f=!0}for(;d!==e;)a.hasOwnProperty(d)&&(g=f?b(g,a[d],d,a):a[d],f=!0,d+=h);return g}function cb(a,b){var c=u.defaults.column,d=a.aoColumns.length;c=l.extend({},u.models.oColumn,c,{nTh:b?b:A.createElement("th"),sTitle:c.sTitle?c.sTitle:b?b.innerHTML:"",aDataSort:c.aDataSort?c.aDataSort:[d],mData:c.mData?c.mData:d,idx:d});a.aoColumns.push(c);c=a.aoPreSearchCols;c[d]=l.extend({},u.models.oSearch,c[d]);Ia(a,d,l(b).data())}function Ia(a,b,c){b=a.aoColumns[b]; -var d=a.oClasses,e=l(b.nTh);if(!b.sWidthOrig){b.sWidthOrig=e.attr("width")||null;var h=(e.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/);h&&(b.sWidthOrig=h[1])}c!==q&&null!==c&&(Eb(c),P(u.defaults.column,c,!0),c.mDataProp===q||c.mData||(c.mData=c.mDataProp),c.sType&&(b._sManualType=c.sType),c.className&&!c.sClass&&(c.sClass=c.className),c.sClass&&e.addClass(c.sClass),h=b.sClass,l.extend(b,c),Y(b,c,"sWidth","sWidthOrig"),h!==b.sClass&&(b.sClass=h+" "+b.sClass),c.iDataSort!==q&&(b.aDataSort=[c.iDataSort]), -Y(b,c,"aDataSort"));var f=b.mData,g=ma(f),k=b.mRender?ma(b.mRender):null;c=function(m){return"string"===typeof m&&-1!==m.indexOf("@")};b._bAttrSrc=l.isPlainObject(f)&&(c(f.sort)||c(f.type)||c(f.filter));b._setter=null;b.fnGetData=function(m,n,p){var t=g(m,n,q,p);return k&&n?k(t,n,m,p):t};b.fnSetData=function(m,n,p){return ha(f)(m,n,p)};"number"!==typeof f&&(a._rowReadObject=!0);a.oFeatures.bSort||(b.bSortable=!1,e.addClass(d.sSortableNone));a=-1!==l.inArray("asc",b.asSorting);c=-1!==l.inArray("desc", -b.asSorting);b.bSortable&&(a||c)?a&&!c?(b.sSortingClass=d.sSortableAsc,b.sSortingClassJUI=d.sSortJUIAscAllowed):!a&&c?(b.sSortingClass=d.sSortableDesc,b.sSortingClassJUI=d.sSortJUIDescAllowed):(b.sSortingClass=d.sSortable,b.sSortingClassJUI=d.sSortJUI):(b.sSortingClass=d.sSortableNone,b.sSortingClassJUI="")}function sa(a){if(!1!==a.oFeatures.bAutoWidth){var b=a.aoColumns;db(a);for(var c=0,d=b.length;cm[n])d(g.length+m[n],k);else if("string"===typeof m[n]){var p=0;for(f=g.length;pb&&a[e]--; -1!=d&&c===q&&a.splice(d,1)}function va(a,b,c,d){var e=a.aoData[b],h,f=function(k,m){for(;k.childNodes.length;)k.removeChild(k.firstChild);k.innerHTML=T(a,b,m,"display")};if("dom"!==c&&(c&&"auto"!==c||"dom"!==e.src)){var g=e.anCells;if(g)if(d!==q)f(g[d],d);else for(c=0,h=g.length;c").appendTo(d));var k=0;for(b=g.length;k=a.fnRecordsDisplay()?0:d,a.iInitDisplayStart=-1);c=F(a,"aoPreDrawCallback","preDraw",[a]);if(-1!==l.inArray(!1,c))V(a,!1);else{c=[];var e=0;d=a.asStripeClasses;var h=d.length,f=a.oLanguage,g="ssp"==Q(a),k=a.aiDisplay,m=a._iDisplayStart,n=a.fnDisplayEnd();a.bDrawing=!0;if(a.bDeferLoading)a.bDeferLoading=!1,a.iDraw++,V(a,!1);else if(!g)a.iDraw++;else if(!a.bDestroying&&!b){Kb(a);return}if(0!==k.length)for(b=g?a.aoData.length:n,f=g?0:m;f",{"class":h?d[0]:""}).append(l("",{valign:"top",colSpan:na(a),"class":a.oClasses.sRowEmpty}).html(e))[0];F(a,"aoHeaderCallback","header",[l(a.nTHead).children("tr")[0], -ib(a),m,n,k]);F(a,"aoFooterCallback","footer",[l(a.nTFoot).children("tr")[0],ib(a),m,n,k]);d=l(a.nTBody);d.children().detach();d.append(l(c));F(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function ka(a,b){var c=a.oFeatures,d=c.bFilter;c.bSort&&Lb(a);d?ya(a,a.oPreviousSearch):a.aiDisplay=a.aiDisplayMaster.slice();!0!==b&&(a._iDisplayStart=0);a._drawHold=b;ja(a);a._drawHold=!1}function Mb(a){var b=a.oClasses,c=l(a.nTable);c=l("
    ").insertBefore(c);var d=a.oFeatures, -e=l("
    ",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=e[0];a.nTableReinsertBefore=a.nTable.nextSibling;for(var h=a.sDom.split(""),f,g,k,m,n,p,t=0;t")[0];m=h[t+1];if("'"==m||'"'==m){n="";for(p=2;h[t+p]!=m;)n+=h[t+p],p++;"H"==n?n=b.sJUIHeader:"F"==n&&(n=b.sJUIFooter);-1!=n.indexOf(".")?(m=n.split("."),k.id=m[0].substr(1,m[0].length-1),k.className=m[1]):"#"==n.charAt(0)?k.id=n.substr(1, -n.length-1):k.className=n;t+=p}e.append(k);e=l(k)}else if(">"==g)e=e.parent();else if("l"==g&&d.bPaginate&&d.bLengthChange)f=Nb(a);else if("f"==g&&d.bFilter)f=Ob(a);else if("r"==g&&d.bProcessing)f=Pb(a);else if("t"==g)f=Qb(a);else if("i"==g&&d.bInfo)f=Rb(a);else if("p"==g&&d.bPaginate)f=Sb(a);else if(0!==u.ext.feature.length)for(k=u.ext.feature,p=0,m=k.length;p',g=d.sSearch;g=g.match(/_INPUT_/)?g.replace("_INPUT_",f):g+f;b=l("
    ",{id:h.f?null:c+"_filter","class":b.sFilter}).append(l("
    ").addClass(b.sLength);a.aanFeatures.l||(k[0].id=c+"_length");k.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",e[0].outerHTML));l("select",k).val(a._iDisplayLength).on("change.DT",function(m){pb(a,l(this).val());ja(a)});l(a.nTable).on("length.dt.DT",function(m,n,p){a===n&&l("select",k).val(p)});return k[0]}function Sb(a){var b=a.sPaginationType,c=u.ext.pager[b],d="function"===typeof c,e=function(f){ja(f)};b=l("
    ").addClass(a.oClasses.sPaging+ -b)[0];var h=a.aanFeatures;d||c.fnInit(a,b,e);h.p||(b.id=a.sTableId+"_paginate",a.aoDrawCallback.push({fn:function(f){if(d){var g=f._iDisplayStart,k=f._iDisplayLength,m=f.fnRecordsDisplay(),n=-1===k;g=n?0:Math.ceil(g/k);k=n?1:Math.ceil(m/k);m=c(g,k);var p;n=0;for(p=h.p.length;nh&&(d=0)):"first"==b?d=0:"previous"==b?(d=0<=e?d-e:0,0>d&&(d=0)):"next"==b?d+e",{id:a.aanFeatures.r?null:a.sTableId+"_processing","class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).append("
    ").insertBefore(a.nTable)[0]}function V(a, -b){a.oFeatures.bProcessing&&l(a.aanFeatures.r).css("display",b?"block":"none");F(a,null,"processing",[a,b])}function Qb(a){var b=l(a.nTable),c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var d=c.sX,e=c.sY,h=a.oClasses,f=b.children("caption"),g=f.length?f[0]._captionSide:null,k=l(b[0].cloneNode(!1)),m=l(b[0].cloneNode(!1)),n=b.children("tfoot");n.length||(n=null);k=l("
    ",{"class":h.sScrollWrapper}).append(l("
    ",{"class":h.sScrollHead}).css({overflow:"hidden",position:"relative",border:0, -width:d?d?K(d):null:"100%"}).append(l("
    ",{"class":h.sScrollHeadInner}).css({"box-sizing":"content-box",width:c.sXInner||"100%"}).append(k.removeAttr("id").css("margin-left",0).append("top"===g?f:null).append(b.children("thead"))))).append(l("
    ",{"class":h.sScrollBody}).css({position:"relative",overflow:"auto",width:d?K(d):null}).append(b));n&&k.append(l("
    ",{"class":h.sScrollFoot}).css({overflow:"hidden",border:0,width:d?d?K(d):null:"100%"}).append(l("
    ",{"class":h.sScrollFootInner}).append(m.removeAttr("id").css("margin-left", -0).append("bottom"===g?f:null).append(b.children("tfoot")))));b=k.children();var p=b[0];h=b[1];var t=n?b[2]:null;if(d)l(h).on("scroll.DT",function(v){v=this.scrollLeft;p.scrollLeft=v;n&&(t.scrollLeft=v)});l(h).css("max-height",e);c.bCollapse||l(h).css("height",e);a.nScrollHead=p;a.nScrollBody=h;a.nScrollFoot=t;a.aoDrawCallback.push({fn:Ja,sName:"scrolling"});return k[0]}function Ja(a){var b=a.oScroll,c=b.sX,d=b.sXInner,e=b.sY;b=b.iBarWidth;var h=l(a.nScrollHead),f=h[0].style,g=h.children("div"),k= -g[0].style,m=g.children("table");g=a.nScrollBody;var n=l(g),p=g.style,t=l(a.nScrollFoot).children("div"),v=t.children("table"),x=l(a.nTHead),w=l(a.nTable),r=w[0],C=r.style,G=a.nTFoot?l(a.nTFoot):null,ba=a.oBrowser,L=ba.bScrollOversize;U(a.aoColumns,"nTh");var O=[],I=[],H=[],fa=[],Z,Ba=function(D){D=D.style;D.paddingTop="0";D.paddingBottom="0";D.borderTopWidth="0";D.borderBottomWidth="0";D.height=0};var X=g.scrollHeight>g.clientHeight;if(a.scrollBarVis!==X&&a.scrollBarVis!==q)a.scrollBarVis=X,sa(a); -else{a.scrollBarVis=X;w.children("thead, tfoot").remove();if(G){X=G.clone().prependTo(w);var ca=G.find("tr");var Ca=X.find("tr");X.find("[id]").removeAttr("id")}var Ua=x.clone().prependTo(w);x=x.find("tr");X=Ua.find("tr");Ua.find("th, td").removeAttr("tabindex");Ua.find("[id]").removeAttr("id");c||(p.width="100%",h[0].style.width="100%");l.each(Pa(a,Ua),function(D,W){Z=ta(a,D);W.style.width=a.aoColumns[Z].sWidth});G&&da(function(D){D.style.width=""},Ca);h=w.outerWidth();""===c?(C.width="100%",L&& -(w.find("tbody").height()>g.offsetHeight||"scroll"==n.css("overflow-y"))&&(C.width=K(w.outerWidth()-b)),h=w.outerWidth()):""!==d&&(C.width=K(d),h=w.outerWidth());da(Ba,X);da(function(D){var W=y.getComputedStyle?y.getComputedStyle(D).width:K(l(D).width());H.push(D.innerHTML);O.push(W)},X);da(function(D,W){D.style.width=O[W]},x);l(X).css("height",0);G&&(da(Ba,Ca),da(function(D){fa.push(D.innerHTML);I.push(K(l(D).css("width")))},Ca),da(function(D,W){D.style.width=I[W]},ca),l(Ca).height(0));da(function(D, -W){D.innerHTML='
    '+H[W]+"
    ";D.childNodes[0].style.height="0";D.childNodes[0].style.overflow="hidden";D.style.width=O[W]},X);G&&da(function(D,W){D.innerHTML='
    '+fa[W]+"
    ";D.childNodes[0].style.height="0";D.childNodes[0].style.overflow="hidden";D.style.width=I[W]},Ca);Math.round(w.outerWidth())g.offsetHeight||"scroll"==n.css("overflow-y")?h+b:h,L&&(g.scrollHeight>g.offsetHeight||"scroll"==n.css("overflow-y"))&& -(C.width=K(ca-b)),""!==c&&""===d||ea(a,1,"Possible column misalignment",6)):ca="100%";p.width=K(ca);f.width=K(ca);G&&(a.nScrollFoot.style.width=K(ca));!e&&L&&(p.height=K(r.offsetHeight+b));c=w.outerWidth();m[0].style.width=K(c);k.width=K(c);d=w.height()>g.clientHeight||"scroll"==n.css("overflow-y");e="padding"+(ba.bScrollbarLeft?"Left":"Right");k[e]=d?b+"px":"0px";G&&(v[0].style.width=K(c),t[0].style.width=K(c),t[0].style[e]=d?b+"px":"0px");w.children("colgroup").insertBefore(w.children("thead")); -n.trigger("scroll");!a.bSorted&&!a.bFiltered||a._drawHold||(g.scrollTop=0)}}function da(a,b,c){for(var d=0,e=0,h=b.length,f,g;e").appendTo(g.find("tbody"));g.find("thead, tfoot").remove();g.append(l(a.nTHead).clone()).append(l(a.nTFoot).clone());g.find("tfoot th, tfoot td").css("width","");m=Pa(a,g.find("thead")[0]); -for(v=0;v").css({width:w.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(a.aoData.length)for(v=0;v").css(h||e?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(g).appendTo(p);h&&f?g.width(f):h? -(g.css("width","auto"),g.removeAttr("width"),g.width()").css("width",K(a)).appendTo(b||A.body);b=a[0].offsetWidth;a.remove();return b}function dc(a,b){var c=ec(a,b);if(0>c)return null;var d=a.aoData[c];return d.nTr?d.anCells[b]:l("").html(T(a,c,b,"display"))[0]}function ec(a,b){for(var c,d=-1,e=-1,h=0,f=a.aoData.length;hd&&(d=c.length,e=h);return e}function K(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function oa(a){var b= -[],c=a.aoColumns;var d=a.aaSortingFixed;var e=l.isPlainObject(d);var h=[];var f=function(n){n.length&&!Array.isArray(n[0])?h.push(n):l.merge(h,n)};Array.isArray(d)&&f(d);e&&d.pre&&f(d.pre);f(a.aaSorting);e&&d.post&&f(d.post);for(a=0;aG?1:0;if(0!==C)return"asc"===r.dir?C:-C}C=c[n];G=c[p];return CG?1:0}):f.sort(function(n,p){var t,v=g.length, -x=e[n]._aSortData,w=e[p]._aSortData;for(t=0;tG?1:0})}a.bSorted=!0}function gc(a){var b=a.aoColumns,c=oa(a);a=a.oLanguage.oAria;for(var d=0,e=b.length;d/g,"");var k=h.nTh;k.removeAttribute("aria-sort");h.bSortable&&(0e?e+1:3))}e=0;for(h=d.length;ee?e+1:3))}a.aLastSort=d}function fc(a,b){var c=a.aoColumns[b],d=u.ext.order[c.sSortDataType],e;d&&(e=d.call(a.oInstance,a,b,ua(a,b)));for(var h,f=u.ext.type.order[c.sType+"-pre"],g=0,k=a.aoData.length;g=e.length?[0,m[1]]:m)}));b.search!==q&&l.extend(a.oPreviousSearch,$b(b.search));if(b.columns){f=0;for(d=b.columns.length;f=c&&(b=c-d);b-=b%d;if(-1===d||0>b)b=0;a._iDisplayStart=b}function lb(a,b){a=a.renderer;var c=u.ext.renderer[b]; -return l.isPlainObject(a)&&a[b]?c[a[b]]||c._:"string"===typeof a?c[a]||c._:c._}function Q(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function Ea(a,b){var c=ic.numbers_length,d=Math.floor(c/2);b<=c?a=pa(0,b):a<=d?(a=pa(0,c-2),a.push("ellipsis"),a.push(b-1)):(a>=b-1-d?a=pa(b-(c-2),b):(a=pa(a-d+2,a+d-1),a.push("ellipsis"),a.push(b-1)),a.splice(0,0,"ellipsis"),a.splice(0,0,0));a.DT_el="span";return a}function bb(a){l.each({num:function(b){return Xa(b,a)},"num-fmt":function(b){return Xa(b, -a,vb)},"html-num":function(b){return Xa(b,a,Ya)},"html-num-fmt":function(b){return Xa(b,a,Ya,vb)}},function(b,c){M.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(M.type.search[b+a]=M.type.search.html)})}function jc(a,b,c,d,e){return y.moment?a[b](e):y.luxon?a[c](e):d?a[d](e):a}function Za(a,b,c){if(y.moment){var d=y.moment.utc(a,b,c,!0);if(!d.isValid())return null}else if(y.luxon){d=b?y.luxon.DateTime.fromFormat(a,b):y.luxon.DateTime.fromISO(a);if(!d.isValid)return null;d.setLocale(c)}else b?(kc|| -alert("DataTables warning: Formatted date without Moment.js or Luxon - https://datatables.net/tn/17"),kc=!0):d=new Date(a);return d}function wb(a){return function(b,c,d,e){0===arguments.length?(d="en",b=c=null):1===arguments.length?(d="en",c=b,b=null):2===arguments.length&&(d=c,c=b,b=null);var h="datetime-"+c;u.ext.type.order[h]||(u.ext.type.detect.unshift(function(f){return f===h?h:!1}),u.ext.type.order[h+"-asc"]=function(f,g){f=f.valueOf();g=g.valueOf();return f===g?0:fg?-1:1});return function(f,g){if(null===f||f===q)"--now"===e?(f=new Date,f=new Date(Date.UTC(f.getFullYear(),f.getMonth(),f.getDate(),f.getHours(),f.getMinutes(),f.getSeconds()))):f="";if("type"===g)return h;if(""===f)return"sort"!==g?"":Za("0000-01-01 00:00:00",null,d);if(null!==c&&b===c&&"sort"!==g&&"type"!==g&&!(f instanceof Date))return f;var k=Za(f,b,d);if(null===k)return f;if("sort"===g)return k;f=null===c?jc(k,"toDate","toJSDate", -"")[a]():jc(k,"format","toFormat","toISOString",c);return"display"===g?$a(f):f}}}function lc(a){return function(){var b=[Wa(this[u.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return u.ext.internal[a].apply(this,b)}}var u=function(a,b){if(this instanceof u)return l(a).DataTable(b);b=a;this.$=function(f,g){return this.api(!0).$(f,g)};this._=function(f,g){return this.api(!0).rows(f,g).data()};this.api=function(f){return f?new B(Wa(this[M.iApiIndex])):new B(this)};this.fnAddData=function(f, -g){var k=this.api(!0);f=Array.isArray(f)&&(Array.isArray(f[0])||l.isPlainObject(f[0]))?k.rows.add(f):k.row.add(f);(g===q||g)&&k.draw();return f.flatten().toArray()};this.fnAdjustColumnSizing=function(f){var g=this.api(!0).columns.adjust(),k=g.settings()[0],m=k.oScroll;f===q||f?g.draw(!1):(""!==m.sX||""!==m.sY)&&Ja(k)};this.fnClearTable=function(f){var g=this.api(!0).clear();(f===q||f)&&g.draw()};this.fnClose=function(f){this.api(!0).row(f).child.hide()};this.fnDeleteRow=function(f,g,k){var m=this.api(!0); -f=m.rows(f);var n=f.settings()[0],p=n.aoData[f[0][0]];f.remove();g&&g.call(this,n,p);(k===q||k)&&m.draw();return p};this.fnDestroy=function(f){this.api(!0).destroy(f)};this.fnDraw=function(f){this.api(!0).draw(f)};this.fnFilter=function(f,g,k,m,n,p){n=this.api(!0);null===g||g===q?n.search(f,k,m,p):n.column(g).search(f,k,m,p);n.draw()};this.fnGetData=function(f,g){var k=this.api(!0);if(f!==q){var m=f.nodeName?f.nodeName.toLowerCase():"";return g!==q||"td"==m||"th"==m?k.cell(f,g).data():k.row(f).data()|| -null}return k.data().toArray()};this.fnGetNodes=function(f){var g=this.api(!0);return f!==q?g.row(f).node():g.rows().nodes().flatten().toArray()};this.fnGetPosition=function(f){var g=this.api(!0),k=f.nodeName.toUpperCase();return"TR"==k?g.row(f).index():"TD"==k||"TH"==k?(f=g.cell(f).index(),[f.row,f.columnVisible,f.column]):null};this.fnIsOpen=function(f){return this.api(!0).row(f).child.isShown()};this.fnOpen=function(f,g,k){return this.api(!0).row(f).child(g,k).show().child()[0]};this.fnPageChange= -function(f,g){f=this.api(!0).page(f);(g===q||g)&&f.draw(!1)};this.fnSetColumnVis=function(f,g,k){f=this.api(!0).column(f).visible(g);(k===q||k)&&f.columns.adjust().draw()};this.fnSettings=function(){return Wa(this[M.iApiIndex])};this.fnSort=function(f){this.api(!0).order(f).draw()};this.fnSortListener=function(f,g,k){this.api(!0).order.listener(f,g,k)};this.fnUpdate=function(f,g,k,m,n){var p=this.api(!0);k===q||null===k?p.row(g).data(f):p.cell(g,k).data(f);(n===q||n)&&p.columns.adjust();(m===q||m)&& -p.draw();return 0};this.fnVersionCheck=M.fnVersionCheck;var c=this,d=b===q,e=this.length;d&&(b={});this.oApi=this.internal=M.internal;for(var h in u.ext.internal)h&&(this[h]=lc(h));this.each(function(){var f={},g=1").appendTo(t));r.nTHead=H[0];var fa=t.children("tbody");0===fa.length&&(fa=l("").insertAfter(H));r.nTBody=fa[0];H=t.children("tfoot");0===H.length&&0").appendTo(t));0===H.length||0===H.children().length?t.addClass(C.sNoFooter):0/g,Dc=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,Ec=/(\/|\.|\*|\+|\?|\||\(|\)|\[|\]|\{|\}|\\|\$|\^|\-)/g,vb=/['\u00A0,$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,aa=function(a){return a&&!0!==a&&"-"!== -a?!1:!0},nc=function(a){var b=parseInt(a,10);return!isNaN(b)&&isFinite(a)?b:null},oc=function(a,b){xb[b]||(xb[b]=new RegExp(ob(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(xb[b],"."):a},yb=function(a,b,c){var d="string"===typeof a;if(aa(a))return!0;b&&d&&(a=oc(a,b));c&&d&&(a=a.replace(vb,""));return!isNaN(parseFloat(a))&&isFinite(a)},pc=function(a,b,c){return aa(a)?!0:aa(a)||"string"===typeof a?yb(a.replace(Ya,""),b,c)?!0:null:null},U=function(a,b,c){var d=[],e=0,h=a.length; -if(c!==q)for(;ea.length)){var b=a.slice().sort();for(var c=b[0], -d=1,e=b.length;d")[0],Bc=Sa.textContent!==q,Cc=/<.*?>/g,mb=u.util.throttle,tc=[],N=Array.prototype,Fc=function(a){var b,c=u.settings,d=l.map(c,function(h,f){return h.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase()){var e= -l.inArray(a,d);return-1!==e?[c[e]]:null}if(a&&"function"===typeof a.settings)return a.settings().toArray();"string"===typeof a?b=l(a):a instanceof l&&(b=a)}else return[];if(b)return b.map(function(h){e=l.inArray(this,d);return-1!==e?c[e]:null}).toArray()};var B=function(a,b){if(!(this instanceof B))return new B(a,b);var c=[],d=function(f){(f=Fc(f))&&c.push.apply(c,f)};if(Array.isArray(a))for(var e=0,h=a.length;ea?new B(b[a],this[a]):null},filter:function(a){var b=[];if(N.filter)b=N.filter.call(this,a,this);else for(var c=0,d=this.length;c").addClass(g),l("td",k).addClass(g).html(f)[0].colSpan=na(a),e.push(k[0]))};h(c,d);b._details&&b._details.detach();b._details=l(e);b._detailsShow&&b._details.insertAfter(b.nTr)},wc=u.util.throttle(function(a){Da(a[0])}, -500),Cb=function(a,b){var c=a.context;c.length&&(a=c[0].aoData[b!==q?b:a[0]])&&a._details&&(a._details.remove(),a._detailsShow=q,a._details=q,l(a.nTr).removeClass("dt-hasChild"),wc(c))},xc=function(a,b){var c=a.context;if(c.length&&a.length){var d=c[0].aoData[a[0]];d._details&&((d._detailsShow=b)?(d._details.insertAfter(d.nTr),l(d.nTr).addClass("dt-hasChild")):(d._details.detach(),l(d.nTr).removeClass("dt-hasChild")),F(c[0],null,"childRow",[b,a.row(a[0])]),Ic(c[0]),wc(c))}},Ic=function(a){var b=new B(a), -c=a.aoData;b.off("draw.dt.DT_details column-sizing.dt.DT_details destroy.dt.DT_details");0g){var n=l.map(d,function(p,t){return p.bVisible?t:null});return[n[n.length+g]]}return[ta(a,g)];case "name":return l.map(e,function(p,t){return p===m[1]?t:null});default:return[]}if(f.nodeName&&f._DT_CellIndex)return[f._DT_CellIndex.column];g=l(h).filter(f).map(function(){return l.inArray(this,h)}).toArray();if(g.length||!f.nodeName)return g; -g=l(f).closest("*[data-dt-column]");return g.length?[g.data("dt-column")]:[]},a,c)};z("columns()",function(a,b){a===q?a="":l.isPlainObject(a)&&(b=a,a="");b=Ab(b);var c=this.iterator("table",function(d){return Kc(d,a,b)},1);c.selector.cols=a;c.selector.opts=b;return c});J("columns().header()","column().header()",function(a,b){return this.iterator("column",function(c,d){return c.aoColumns[d].nTh},1)});J("columns().footer()","column().footer()",function(a,b){return this.iterator("column",function(c, -d){return c.aoColumns[d].nTf},1)});J("columns().data()","column().data()",function(){return this.iterator("column-rows",yc,1)});J("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].mData},1)});J("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,d,e,h){return Fa(b.aoData,h,"search"===a?"_aFilterData":"_aSortData",c)},1)});J("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows", -function(a,b,c,d,e){return Fa(a.aoData,e,"anCells",b)},1)});J("columns().visible()","column().visible()",function(a,b){var c=this,d=this.iterator("column",function(e,h){if(a===q)return e.aoColumns[h].bVisible;var f=e.aoColumns,g=f[h],k=e.aoData,m;if(a!==q&&g.bVisible!==a){if(a){var n=l.inArray(!0,U(f,"bVisible"),h+1);f=0;for(m=k.length;fd;return!0};u.isDataTable=u.fnIsDataTable=function(a){var b=l(a).get(0),c=!1;if(a instanceof u.Api)return!0;l.each(u.settings,function(d,e){d=e.nScrollHead?l("table",e.nScrollHead)[0]:null;var h=e.nScrollFoot? -l("table",e.nScrollFoot)[0]:null;if(e.nTable===b||d===b||h===b)c=!0});return c};u.tables=u.fnTables=function(a){var b=!1;l.isPlainObject(a)&&(b=a.api,a=a.visible);var c=l.map(u.settings,function(d){if(!a||a&&l(d.nTable).is(":visible"))return d.nTable});return b?new B(c):c};u.camelToHungarian=P;z("$()",function(a,b){b=this.rows(b).nodes();b=l(b);return l([].concat(b.filter(a).toArray(),b.find(a).toArray()))});l.each(["on","one","off"],function(a,b){z(b+"()",function(){var c=Array.prototype.slice.call(arguments); -c[0]=l.map(c[0].split(/\s/),function(e){return e.match(/\.dt\b/)?e:e+".dt"}).join(" ");var d=l(this.tables().nodes());d[b].apply(d,c);return this})});z("clear()",function(){return this.iterator("table",function(a){Ma(a)})});z("settings()",function(){return new B(this.context,this.context)});z("init()",function(){var a=this.context;return a.length?a[0].oInit:null});z("data()",function(){return this.iterator("table",function(a){return U(a.aoData,"_aData")}).flatten()});z("destroy()",function(a){a=a|| -!1;return this.iterator("table",function(b){var c=b.oClasses,d=b.nTable,e=b.nTBody,h=b.nTHead,f=b.nTFoot,g=l(d);e=l(e);var k=l(b.nTableWrapper),m=l.map(b.aoData,function(p){return p.nTr}),n;b.bDestroying=!0;F(b,"aoDestroyCallback","destroy",[b]);a||(new B(b)).columns().visible(!0);k.off(".DT").find(":not(tbody *)").off(".DT");l(y).off(".DT-"+b.sInstance);d!=h.parentNode&&(g.children("thead").detach(),g.append(h));f&&d!=f.parentNode&&(g.children("tfoot").detach(),g.append(f));b.aaSorting=[];b.aaSortingFixed= -[];Va(b);l(m).removeClass(b.asStripeClasses.join(" "));l("th, td",h).removeClass(c.sSortable+" "+c.sSortableAsc+" "+c.sSortableDesc+" "+c.sSortableNone);e.children().detach();e.append(m);h=b.nTableWrapper.parentNode;f=a?"remove":"detach";g[f]();k[f]();!a&&h&&(h.insertBefore(d,b.nTableReinsertBefore),g.css("width",b.sDestroyWidth).removeClass(c.sTable),(n=b.asDestroyStripes.length)&&e.children().each(function(p){l(this).addClass(b.asDestroyStripes[p%n])}));c=l.inArray(b,u.settings);-1!==c&&u.settings.splice(c, -1)})});l.each(["column","row","cell"],function(a,b){z(b+"s().every()",function(c){var d=this.selector.opts,e=this;return this.iterator(b,function(h,f,g,k,m){c.call(e[b](f,"cell"===b?g:d,"cell"===b?d:q),f,g,k,m)})})});z("i18n()",function(a,b,c){var d=this.context[0];a=ma(a)(d.oLanguage);a===q&&(a=b);c!==q&&l.isPlainObject(a)&&(a=a[c]!==q?a[c]:a._);return a.replace("%d",c)});u.version="1.12.1";u.settings=[];u.models={};u.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0,"return":!1}; -u.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1};u.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null, -sWidth:null,sWidthOrig:null};u.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g, -this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+a.sInstance+"_"+location.pathname))}catch(b){return{}}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+ -a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries", -sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:l.extend({},u.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"};E(u.defaults); -u.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};E(u.defaults.column);u.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null, -bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[], -aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,jqXHR:null,json:q,oAjaxData:q,fnServerData:null,aoServerParams:[],sServerMethod:null, -fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==Q(this)?1*this._iRecordsTotal:this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==Q(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+ -a,d=this.aiDisplay.length,e=this.oFeatures,h=e.bPaginate;return e.bServerSide?!1===h||-1===a?b+d:Math.min(b+a,this._iRecordsDisplay):!h||c>d||-1===a?d:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null};u.ext=M={buttons:{},classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[], -search:{},order:{}},_unique:0,fnVersionCheck:u.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:u.version};l.extend(M,{afnFiltering:M.search,aTypes:M.type.detect,ofnSearch:M.type.search,oSort:M.type.order,afnSortData:M.order,aoFeatures:M.feature,oApi:M.internal,oStdClasses:M.classes,oPagination:M.pager});l.extend(u.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty", -sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_desc_disabled",sSortableDesc:"sorting_asc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner", -sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",sJUIHeader:"",sJUIFooter:""});var ic=u.ext.pager;l.extend(ic,{simple:function(a,b){return["previous","next"]},full:function(a,b){return["first","previous","next","last"]},numbers:function(a,b){return[Ea(a,b)]},simple_numbers:function(a,b){return["previous", -Ea(a,b),"next"]},full_numbers:function(a,b){return["first","previous",Ea(a,b),"next","last"]},first_last_numbers:function(a,b){return["first",Ea(a,b),"last"]},_numbers:Ea,numbers_length:7});l.extend(!0,u.ext.renderer,{pageButton:{_:function(a,b,c,d,e,h){var f=a.oClasses,g=a.oLanguage.oPaginate,k=a.oLanguage.oAria.paginate||{},m,n,p=0,t=function(x,w){var r,C=f.sPageButtonDisabled,G=function(I){Ta(a,I.data.action,!0)};var ba=0;for(r=w.length;ba").appendTo(x);t(O,L)}else{m=null;n=L;O=a.iTabIndex;switch(L){case "ellipsis":x.append('');break;case "first":m=g.sFirst;0===e&&(O=-1,n+=" "+C);break;case "previous":m=g.sPrevious;0===e&&(O=-1,n+=" "+C);break;case "next":m=g.sNext;if(0===h||e===h-1)O=-1,n+=" "+C;break;case "last":m=g.sLast;if(0===h||e===h-1)O=-1,n+=" "+C;break;default:m=a.fnFormatNumber(L+1),n=e===L?f.sPageButtonActive:""}null!==m&&(O=l("",{"class":f.sPageButton+" "+n,"aria-controls":a.sTableId, -"aria-label":k[L],"data-dt-idx":p,tabindex:O,id:0===c&&"string"===typeof L?a.sTableId+"_"+L:null}).html(m).appendTo(x),sb(O,{action:L},G),p++)}}};try{var v=l(b).find(A.activeElement).data("dt-idx")}catch(x){}t(l(b).empty(),d);v!==q&&l(b).find("[data-dt-idx="+v+"]").trigger("focus")}}});l.extend(u.ext.type.detect,[function(a,b){b=b.oLanguage.sDecimal;return yb(a,b)?"num"+b:null},function(a,b){if(a&&!(a instanceof Date)&&!Dc.test(a))return null;b=Date.parse(a);return null!==b&&!isNaN(b)||aa(a)?"date": -null},function(a,b){b=b.oLanguage.sDecimal;return yb(a,b,!0)?"num-fmt"+b:null},function(a,b){b=b.oLanguage.sDecimal;return pc(a,b)?"html-num"+b:null},function(a,b){b=b.oLanguage.sDecimal;return pc(a,b,!0)?"html-num-fmt"+b:null},function(a,b){return aa(a)||"string"===typeof a&&-1!==a.indexOf("<")?"html":null}]);l.extend(u.ext.type.search,{html:function(a){return aa(a)?a:"string"===typeof a?a.replace(mc," ").replace(Ya,""):""},string:function(a){return aa(a)?a:"string"===typeof a?a.replace(mc," "): -a}});var Xa=function(a,b,c,d){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=oc(a,b));a.replace&&(c&&(a=a.replace(c,"")),d&&(a=a.replace(d,"")));return 1*a};l.extend(M.type.order,{"date-pre":function(a){a=Date.parse(a);return isNaN(a)?-Infinity:a},"html-pre":function(a){return aa(a)?"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return aa(a)?"":"string"===typeof a?a.toLowerCase():a.toString?a.toString():""},"string-asc":function(a,b){return ab?1:0},"string-desc":function(a, -b){return ab?-1:0}});bb("");l.extend(!0,u.ext.renderer,{header:{_:function(a,b,c,d){l(a.nTable).on("order.dt.DT",function(e,h,f,g){a===h&&(e=c.idx,b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass("asc"==g[e]?d.sSortAsc:"desc"==g[e]?d.sSortDesc:c.sSortingClass))})},jqueryui:function(a,b,c,d){l("
    ").addClass(d.sSortJUIWrapper).append(b.contents()).append(l("").addClass(d.sSortIcon+" "+c.sSortingClassJUI)).appendTo(b);l(a.nTable).on("order.dt.DT",function(e,h,f,g){a===h&&(e=c.idx, -b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass("asc"==g[e]?d.sSortAsc:"desc"==g[e]?d.sSortDesc:c.sSortingClass),b.find("span."+d.sSortIcon).removeClass(d.sSortJUIAsc+" "+d.sSortJUIDesc+" "+d.sSortJUI+" "+d.sSortJUIAscAllowed+" "+d.sSortJUIDescAllowed).addClass("asc"==g[e]?d.sSortJUIAsc:"desc"==g[e]?d.sSortJUIDesc:c.sSortingClassJUI))})}}});var $a=function(a){Array.isArray(a)&&(a=a.join(","));return"string"===typeof a?a.replace(/&/g,"&").replace(//g,">").replace(/"/g, -"""):a},kc=!1,zc=",",Ac=".";if(Intl)try{for(var Ha=(new Intl.NumberFormat).formatToParts(100000.1),ra=0;rah?"-":"",g=parseFloat(h);if(isNaN(g))return $a(h);g=g.toFixed(c);h=Math.abs(g);g=parseInt(h,10);h=c?b+(h-g).toFixed(c).substring(2):"";0===g&&0===parseFloat(h)&&(f="");return f+(d||"")+g.toString().replace(/\B(?=(\d{3})+(?!\d))/g,a)+h+(e||"")}}},text:function(){return{display:$a,filter:$a}}}; -l.extend(u.ext.internal,{_fnExternApiFunc:lc,_fnBuildAjax:Qa,_fnAjaxUpdate:Kb,_fnAjaxParameters:Tb,_fnAjaxUpdateDraw:Ub,_fnAjaxDataSrc:za,_fnAddColumn:cb,_fnColumnOptions:Ia,_fnAdjustColumnSizing:sa,_fnVisibleToColumnIndex:ta,_fnColumnIndexToVisible:ua,_fnVisbleColumns:na,_fnGetColumns:Ka,_fnColumnTypes:eb,_fnApplyColumnDefs:Hb,_fnHungarianMap:E,_fnCamelToHungarian:P,_fnLanguageCompat:la,_fnBrowserDetect:Fb,_fnAddData:ia,_fnAddTr:La,_fnNodeToDataIndex:function(a,b){return b._DT_RowIndex!==q?b._DT_RowIndex: -null},_fnNodeToColumnIndex:function(a,b,c){return l.inArray(c,a.aoData[b].anCells)},_fnGetCellData:T,_fnSetCellData:Ib,_fnSplitObjNotation:hb,_fnGetObjectDataFn:ma,_fnSetObjectDataFn:ha,_fnGetDataMaster:ib,_fnClearTable:Ma,_fnDeleteIndex:Na,_fnInvalidate:va,_fnGetRowElements:gb,_fnCreateTr:fb,_fnBuildHead:Jb,_fnDrawHead:xa,_fnDraw:ja,_fnReDraw:ka,_fnAddOptionsHtml:Mb,_fnDetectHeader:wa,_fnGetUniqueThs:Pa,_fnFeatureHtmlFilter:Ob,_fnFilterComplete:ya,_fnFilterCustom:Xb,_fnFilterColumn:Wb,_fnFilter:Vb, -_fnFilterCreateSearch:nb,_fnEscapeRegex:ob,_fnFilterData:Yb,_fnFeatureHtmlInfo:Rb,_fnUpdateInfo:ac,_fnInfoMacros:bc,_fnInitialise:Aa,_fnInitComplete:Ra,_fnLengthChange:pb,_fnFeatureHtmlLength:Nb,_fnFeatureHtmlPaginate:Sb,_fnPageChange:Ta,_fnFeatureHtmlProcessing:Pb,_fnProcessingDisplay:V,_fnFeatureHtmlTable:Qb,_fnScrollDraw:Ja,_fnApplyToChildren:da,_fnCalculateColumnWidths:db,_fnThrottle:mb,_fnConvertToWidth:cc,_fnGetWidestNode:dc,_fnGetMaxLenString:ec,_fnStringToCss:K,_fnSortFlatten:oa,_fnSort:Lb, -_fnSortAria:gc,_fnSortListener:rb,_fnSortAttachListener:kb,_fnSortingClasses:Va,_fnSortData:fc,_fnSaveState:Da,_fnLoadState:hc,_fnImplementState:tb,_fnSettingsFromNode:Wa,_fnLog:ea,_fnMap:Y,_fnBindAction:sb,_fnCallbackReg:R,_fnCallbackFire:F,_fnLengthOverflow:qb,_fnRenderer:lb,_fnDataSource:Q,_fnRowAttributes:jb,_fnExtend:ub,_fnCalculateEnd:function(){}});l.fn.dataTable=u;u.$=l;l.fn.dataTableSettings=u.settings;l.fn.dataTableExt=u.ext;l.fn.DataTable=function(a){return l(this).dataTable(a).api()}; -l.each(u,function(a,b){l.fn.DataTable[a]=b});return u}); diff --git a/content/find/recipes/index_files/htmlwidgets/htmlwidgets.js b/content/find/recipes/index_files/htmlwidgets/htmlwidgets.js deleted file mode 100644 index da8b2367..00000000 --- a/content/find/recipes/index_files/htmlwidgets/htmlwidgets.js +++ /dev/null @@ -1,903 +0,0 @@ -(function() { - // If window.HTMLWidgets is already defined, then use it; otherwise create a - // new object. This allows preceding code to set options that affect the - // initialization process (though none currently exist). - window.HTMLWidgets = window.HTMLWidgets || {}; - - // See if we're running in a viewer pane. If not, we're in a web browser. - var viewerMode = window.HTMLWidgets.viewerMode = - /\bviewer_pane=1\b/.test(window.location); - - // See if we're running in Shiny mode. If not, it's a static document. - // Note that static widgets can appear in both Shiny and static modes, but - // obviously, Shiny widgets can only appear in Shiny apps/documents. - var shinyMode = window.HTMLWidgets.shinyMode = - typeof(window.Shiny) !== "undefined" && !!window.Shiny.outputBindings; - - // We can't count on jQuery being available, so we implement our own - // version if necessary. - function querySelectorAll(scope, selector) { - if (typeof(jQuery) !== "undefined" && scope instanceof jQuery) { - return scope.find(selector); - } - if (scope.querySelectorAll) { - return scope.querySelectorAll(selector); - } - } - - function asArray(value) { - if (value === null) - return []; - if ($.isArray(value)) - return value; - return [value]; - } - - // Implement jQuery's extend - function extend(target /*, ... */) { - if (arguments.length == 1) { - return target; - } - for (var i = 1; i < arguments.length; i++) { - var source = arguments[i]; - for (var prop in source) { - if (source.hasOwnProperty(prop)) { - target[prop] = source[prop]; - } - } - } - return target; - } - - // IE8 doesn't support Array.forEach. - function forEach(values, callback, thisArg) { - if (values.forEach) { - values.forEach(callback, thisArg); - } else { - for (var i = 0; i < values.length; i++) { - callback.call(thisArg, values[i], i, values); - } - } - } - - // Replaces the specified method with the return value of funcSource. - // - // Note that funcSource should not BE the new method, it should be a function - // that RETURNS the new method. funcSource receives a single argument that is - // the overridden method, it can be called from the new method. The overridden - // method can be called like a regular function, it has the target permanently - // bound to it so "this" will work correctly. - function overrideMethod(target, methodName, funcSource) { - var superFunc = target[methodName] || function() {}; - var superFuncBound = function() { - return superFunc.apply(target, arguments); - }; - target[methodName] = funcSource(superFuncBound); - } - - // Add a method to delegator that, when invoked, calls - // delegatee.methodName. If there is no such method on - // the delegatee, but there was one on delegator before - // delegateMethod was called, then the original version - // is invoked instead. - // For example: - // - // var a = { - // method1: function() { console.log('a1'); } - // method2: function() { console.log('a2'); } - // }; - // var b = { - // method1: function() { console.log('b1'); } - // }; - // delegateMethod(a, b, "method1"); - // delegateMethod(a, b, "method2"); - // a.method1(); - // a.method2(); - // - // The output would be "b1", "a2". - function delegateMethod(delegator, delegatee, methodName) { - var inherited = delegator[methodName]; - delegator[methodName] = function() { - var target = delegatee; - var method = delegatee[methodName]; - - // The method doesn't exist on the delegatee. Instead, - // call the method on the delegator, if it exists. - if (!method) { - target = delegator; - method = inherited; - } - - if (method) { - return method.apply(target, arguments); - } - }; - } - - // Implement a vague facsimilie of jQuery's data method - function elementData(el, name, value) { - if (arguments.length == 2) { - return el["htmlwidget_data_" + name]; - } else if (arguments.length == 3) { - el["htmlwidget_data_" + name] = value; - return el; - } else { - throw new Error("Wrong number of arguments for elementData: " + - arguments.length); - } - } - - // http://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex - function escapeRegExp(str) { - return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); - } - - function hasClass(el, className) { - var re = new RegExp("\\b" + escapeRegExp(className) + "\\b"); - return re.test(el.className); - } - - // elements - array (or array-like object) of HTML elements - // className - class name to test for - // include - if true, only return elements with given className; - // if false, only return elements *without* given className - function filterByClass(elements, className, include) { - var results = []; - for (var i = 0; i < elements.length; i++) { - if (hasClass(elements[i], className) == include) - results.push(elements[i]); - } - return results; - } - - function on(obj, eventName, func) { - if (obj.addEventListener) { - obj.addEventListener(eventName, func, false); - } else if (obj.attachEvent) { - obj.attachEvent(eventName, func); - } - } - - function off(obj, eventName, func) { - if (obj.removeEventListener) - obj.removeEventListener(eventName, func, false); - else if (obj.detachEvent) { - obj.detachEvent(eventName, func); - } - } - - // Translate array of values to top/right/bottom/left, as usual with - // the "padding" CSS property - // https://developer.mozilla.org/en-US/docs/Web/CSS/padding - function unpackPadding(value) { - if (typeof(value) === "number") - value = [value]; - if (value.length === 1) { - return {top: value[0], right: value[0], bottom: value[0], left: value[0]}; - } - if (value.length === 2) { - return {top: value[0], right: value[1], bottom: value[0], left: value[1]}; - } - if (value.length === 3) { - return {top: value[0], right: value[1], bottom: value[2], left: value[1]}; - } - if (value.length === 4) { - return {top: value[0], right: value[1], bottom: value[2], left: value[3]}; - } - } - - // Convert an unpacked padding object to a CSS value - function paddingToCss(paddingObj) { - return paddingObj.top + "px " + paddingObj.right + "px " + paddingObj.bottom + "px " + paddingObj.left + "px"; - } - - // Makes a number suitable for CSS - function px(x) { - if (typeof(x) === "number") - return x + "px"; - else - return x; - } - - // Retrieves runtime widget sizing information for an element. - // The return value is either null, or an object with fill, padding, - // defaultWidth, defaultHeight fields. - function sizingPolicy(el) { - var sizingEl = document.querySelector("script[data-for='" + el.id + "'][type='application/htmlwidget-sizing']"); - if (!sizingEl) - return null; - var sp = JSON.parse(sizingEl.textContent || sizingEl.text || "{}"); - if (viewerMode) { - return sp.viewer; - } else { - return sp.browser; - } - } - - // @param tasks Array of strings (or falsy value, in which case no-op). - // Each element must be a valid JavaScript expression that yields a - // function. Or, can be an array of objects with "code" and "data" - // properties; in this case, the "code" property should be a string - // of JS that's an expr that yields a function, and "data" should be - // an object that will be added as an additional argument when that - // function is called. - // @param target The object that will be "this" for each function - // execution. - // @param args Array of arguments to be passed to the functions. (The - // same arguments will be passed to all functions.) - function evalAndRun(tasks, target, args) { - if (tasks) { - forEach(tasks, function(task) { - var theseArgs = args; - if (typeof(task) === "object") { - theseArgs = theseArgs.concat([task.data]); - task = task.code; - } - var taskFunc = tryEval(task); - if (typeof(taskFunc) !== "function") { - throw new Error("Task must be a function! Source:\n" + task); - } - taskFunc.apply(target, theseArgs); - }); - } - } - - // Attempt eval() both with and without enclosing in parentheses. - // Note that enclosing coerces a function declaration into - // an expression that eval() can parse - // (otherwise, a SyntaxError is thrown) - function tryEval(code) { - var result = null; - try { - result = eval("(" + code + ")"); - } catch(error) { - if (!(error instanceof SyntaxError)) { - throw error; - } - try { - result = eval(code); - } catch(e) { - if (e instanceof SyntaxError) { - throw error; - } else { - throw e; - } - } - } - return result; - } - - function initSizing(el) { - var sizing = sizingPolicy(el); - if (!sizing) - return; - - var cel = document.getElementById("htmlwidget_container"); - if (!cel) - return; - - if (typeof(sizing.padding) !== "undefined") { - document.body.style.margin = "0"; - document.body.style.padding = paddingToCss(unpackPadding(sizing.padding)); - } - - if (sizing.fill) { - document.body.style.overflow = "hidden"; - document.body.style.width = "100%"; - document.body.style.height = "100%"; - document.documentElement.style.width = "100%"; - document.documentElement.style.height = "100%"; - if (cel) { - cel.style.position = "absolute"; - var pad = unpackPadding(sizing.padding); - cel.style.top = pad.top + "px"; - cel.style.right = pad.right + "px"; - cel.style.bottom = pad.bottom + "px"; - cel.style.left = pad.left + "px"; - el.style.width = "100%"; - el.style.height = "100%"; - } - - return { - getWidth: function() { return cel.offsetWidth; }, - getHeight: function() { return cel.offsetHeight; } - }; - - } else { - el.style.width = px(sizing.width); - el.style.height = px(sizing.height); - - return { - getWidth: function() { return el.offsetWidth; }, - getHeight: function() { return el.offsetHeight; } - }; - } - } - - // Default implementations for methods - var defaults = { - find: function(scope) { - return querySelectorAll(scope, "." + this.name); - }, - renderError: function(el, err) { - var $el = $(el); - - this.clearError(el); - - // Add all these error classes, as Shiny does - var errClass = "shiny-output-error"; - if (err.type !== null) { - // use the classes of the error condition as CSS class names - errClass = errClass + " " + $.map(asArray(err.type), function(type) { - return errClass + "-" + type; - }).join(" "); - } - errClass = errClass + " htmlwidgets-error"; - - // Is el inline or block? If inline or inline-block, just display:none it - // and add an inline error. - var display = $el.css("display"); - $el.data("restore-display-mode", display); - - if (display === "inline" || display === "inline-block") { - $el.hide(); - if (err.message !== "") { - var errorSpan = $("").addClass(errClass); - errorSpan.text(err.message); - $el.after(errorSpan); - } - } else if (display === "block") { - // If block, add an error just after the el, set visibility:none on the - // el, and position the error to be on top of the el. - // Mark it with a unique ID and CSS class so we can remove it later. - $el.css("visibility", "hidden"); - if (err.message !== "") { - var errorDiv = $("
    ").addClass(errClass).css("position", "absolute") - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - // setting width can push out the page size, forcing otherwise - // unnecessary scrollbars to appear and making it impossible for - // the element to shrink; so use max-width instead - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - errorDiv.text(err.message); - $el.after(errorDiv); - - // Really dumb way to keep the size/position of the error in sync with - // the parent element as the window is resized or whatever. - var intId = setInterval(function() { - if (!errorDiv[0].parentElement) { - clearInterval(intId); - return; - } - errorDiv - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - }, 500); - } - } - }, - clearError: function(el) { - var $el = $(el); - var display = $el.data("restore-display-mode"); - $el.data("restore-display-mode", null); - - if (display === "inline" || display === "inline-block") { - if (display) - $el.css("display", display); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } else if (display === "block"){ - $el.css("visibility", "inherit"); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } - }, - sizing: {} - }; - - // Called by widget bindings to register a new type of widget. The definition - // object can contain the following properties: - // - name (required) - A string indicating the binding name, which will be - // used by default as the CSS classname to look for. - // - initialize (optional) - A function(el) that will be called once per - // widget element; if a value is returned, it will be passed as the third - // value to renderValue. - // - renderValue (required) - A function(el, data, initValue) that will be - // called with data. Static contexts will cause this to be called once per - // element; Shiny apps will cause this to be called multiple times per - // element, as the data changes. - window.HTMLWidgets.widget = function(definition) { - if (!definition.name) { - throw new Error("Widget must have a name"); - } - if (!definition.type) { - throw new Error("Widget must have a type"); - } - // Currently we only support output widgets - if (definition.type !== "output") { - throw new Error("Unrecognized widget type '" + definition.type + "'"); - } - // TODO: Verify that .name is a valid CSS classname - - // Support new-style instance-bound definitions. Old-style class-bound - // definitions have one widget "object" per widget per type/class of - // widget; the renderValue and resize methods on such widget objects - // take el and instance arguments, because the widget object can't - // store them. New-style instance-bound definitions have one widget - // object per widget instance; the definition that's passed in doesn't - // provide renderValue or resize methods at all, just the single method - // factory(el, width, height) - // which returns an object that has renderValue(x) and resize(w, h). - // This enables a far more natural programming style for the widget - // author, who can store per-instance state using either OO-style - // instance fields or functional-style closure variables (I guess this - // is in contrast to what can only be called C-style pseudo-OO which is - // what we required before). - if (definition.factory) { - definition = createLegacyDefinitionAdapter(definition); - } - - if (!definition.renderValue) { - throw new Error("Widget must have a renderValue function"); - } - - // For static rendering (non-Shiny), use a simple widget registration - // scheme. We also use this scheme for Shiny apps/documents that also - // contain static widgets. - window.HTMLWidgets.widgets = window.HTMLWidgets.widgets || []; - // Merge defaults into the definition; don't mutate the original definition. - var staticBinding = extend({}, defaults, definition); - overrideMethod(staticBinding, "find", function(superfunc) { - return function(scope) { - var results = superfunc(scope); - // Filter out Shiny outputs, we only want the static kind - return filterByClass(results, "html-widget-output", false); - }; - }); - window.HTMLWidgets.widgets.push(staticBinding); - - if (shinyMode) { - // Shiny is running. Register the definition with an output binding. - // The definition itself will not be the output binding, instead - // we will make an output binding object that delegates to the - // definition. This is because we foolishly used the same method - // name (renderValue) for htmlwidgets definition and Shiny bindings - // but they actually have quite different semantics (the Shiny - // bindings receive data that includes lots of metadata that it - // strips off before calling htmlwidgets renderValue). We can't - // just ignore the difference because in some widgets it's helpful - // to call this.renderValue() from inside of resize(), and if - // we're not delegating, then that call will go to the Shiny - // version instead of the htmlwidgets version. - - // Merge defaults with definition, without mutating either. - var bindingDef = extend({}, defaults, definition); - - // This object will be our actual Shiny binding. - var shinyBinding = new Shiny.OutputBinding(); - - // With a few exceptions, we'll want to simply use the bindingDef's - // version of methods if they are available, otherwise fall back to - // Shiny's defaults. NOTE: If Shiny's output bindings gain additional - // methods in the future, and we want them to be overrideable by - // HTMLWidget binding definitions, then we'll need to add them to this - // list. - delegateMethod(shinyBinding, bindingDef, "getId"); - delegateMethod(shinyBinding, bindingDef, "onValueChange"); - delegateMethod(shinyBinding, bindingDef, "onValueError"); - delegateMethod(shinyBinding, bindingDef, "renderError"); - delegateMethod(shinyBinding, bindingDef, "clearError"); - delegateMethod(shinyBinding, bindingDef, "showProgress"); - - // The find, renderValue, and resize are handled differently, because we - // want to actually decorate the behavior of the bindingDef methods. - - shinyBinding.find = function(scope) { - var results = bindingDef.find(scope); - - // Only return elements that are Shiny outputs, not static ones - var dynamicResults = results.filter(".html-widget-output"); - - // It's possible that whatever caused Shiny to think there might be - // new dynamic outputs, also caused there to be new static outputs. - // Since there might be lots of different htmlwidgets bindings, we - // schedule execution for later--no need to staticRender multiple - // times. - if (results.length !== dynamicResults.length) - scheduleStaticRender(); - - return dynamicResults; - }; - - // Wrap renderValue to handle initialization, which unfortunately isn't - // supported natively by Shiny at the time of this writing. - - shinyBinding.renderValue = function(el, data) { - Shiny.renderDependencies(data.deps); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var i = 0; data.evals && i < data.evals.length; i++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[i]); - } - if (!bindingDef.renderOnNullValue) { - if (data.x === null) { - el.style.visibility = "hidden"; - return; - } else { - el.style.visibility = "inherit"; - } - } - if (!elementData(el, "initialized")) { - initSizing(el); - - elementData(el, "initialized", true); - if (bindingDef.initialize) { - var result = bindingDef.initialize(el, el.offsetWidth, - el.offsetHeight); - elementData(el, "init_result", result); - } - } - bindingDef.renderValue(el, data.x, elementData(el, "init_result")); - evalAndRun(data.jsHooks.render, elementData(el, "init_result"), [el, data.x]); - }; - - // Only override resize if bindingDef implements it - if (bindingDef.resize) { - shinyBinding.resize = function(el, width, height) { - // Shiny can call resize before initialize/renderValue have been - // called, which doesn't make sense for widgets. - if (elementData(el, "initialized")) { - bindingDef.resize(el, width, height, elementData(el, "init_result")); - } - }; - } - - Shiny.outputBindings.register(shinyBinding, bindingDef.name); - } - }; - - var scheduleStaticRenderTimerId = null; - function scheduleStaticRender() { - if (!scheduleStaticRenderTimerId) { - scheduleStaticRenderTimerId = setTimeout(function() { - scheduleStaticRenderTimerId = null; - window.HTMLWidgets.staticRender(); - }, 1); - } - } - - // Render static widgets after the document finishes loading - // Statically render all elements that are of this widget's class - window.HTMLWidgets.staticRender = function() { - var bindings = window.HTMLWidgets.widgets || []; - forEach(bindings, function(binding) { - var matches = binding.find(document.documentElement); - forEach(matches, function(el) { - var sizeObj = initSizing(el, binding); - - if (hasClass(el, "html-widget-static-bound")) - return; - el.className = el.className + " html-widget-static-bound"; - - var initResult; - if (binding.initialize) { - initResult = binding.initialize(el, - sizeObj ? sizeObj.getWidth() : el.offsetWidth, - sizeObj ? sizeObj.getHeight() : el.offsetHeight - ); - elementData(el, "init_result", initResult); - } - - if (binding.resize) { - var lastSize = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - var resizeHandler = function(e) { - var size = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - if (size.w === 0 && size.h === 0) - return; - if (size.w === lastSize.w && size.h === lastSize.h) - return; - lastSize = size; - binding.resize(el, size.w, size.h, initResult); - }; - - on(window, "resize", resizeHandler); - - // This is needed for cases where we're running in a Shiny - // app, but the widget itself is not a Shiny output, but - // rather a simple static widget. One example of this is - // an rmarkdown document that has runtime:shiny and widget - // that isn't in a render function. Shiny only knows to - // call resize handlers for Shiny outputs, not for static - // widgets, so we do it ourselves. - if (window.jQuery) { - window.jQuery(document).on( - "shown.htmlwidgets shown.bs.tab.htmlwidgets shown.bs.collapse.htmlwidgets", - resizeHandler - ); - window.jQuery(document).on( - "hidden.htmlwidgets hidden.bs.tab.htmlwidgets hidden.bs.collapse.htmlwidgets", - resizeHandler - ); - } - - // This is needed for the specific case of ioslides, which - // flips slides between display:none and display:block. - // Ideally we would not have to have ioslide-specific code - // here, but rather have ioslides raise a generic event, - // but the rmarkdown package just went to CRAN so the - // window to getting that fixed may be long. - if (window.addEventListener) { - // It's OK to limit this to window.addEventListener - // browsers because ioslides itself only supports - // such browsers. - on(document, "slideenter", resizeHandler); - on(document, "slideleave", resizeHandler); - } - } - - var scriptData = document.querySelector("script[data-for='" + el.id + "'][type='application/json']"); - if (scriptData) { - var data = JSON.parse(scriptData.textContent || scriptData.text); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var k = 0; data.evals && k < data.evals.length; k++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[k]); - } - binding.renderValue(el, data.x, initResult); - evalAndRun(data.jsHooks.render, initResult, [el, data.x]); - } - }); - }); - - invokePostRenderHandlers(); - } - - - function has_jQuery3() { - if (!window.jQuery) { - return false; - } - var $version = window.jQuery.fn.jquery; - var $major_version = parseInt($version.split(".")[0]); - return $major_version >= 3; - } - - /* - / Shiny 1.4 bumped jQuery from 1.x to 3.x which means jQuery's - / on-ready handler (i.e., $(fn)) is now asyncronous (i.e., it now - / really means $(setTimeout(fn)). - / https://jquery.com/upgrade-guide/3.0/#breaking-change-document-ready-handlers-are-now-asynchronous - / - / Since Shiny uses $() to schedule initShiny, shiny>=1.4 calls initShiny - / one tick later than it did before, which means staticRender() is - / called renderValue() earlier than (advanced) widget authors might be expecting. - / https://github.com/rstudio/shiny/issues/2630 - / - / For a concrete example, leaflet has some methods (e.g., updateBounds) - / which reference Shiny methods registered in initShiny (e.g., setInputValue). - / Since leaflet is privy to this life-cycle, it knows to use setTimeout() to - / delay execution of those methods (until Shiny methods are ready) - / https://github.com/rstudio/leaflet/blob/18ec981/javascript/src/index.js#L266-L268 - / - / Ideally widget authors wouldn't need to use this setTimeout() hack that - / leaflet uses to call Shiny methods on a staticRender(). In the long run, - / the logic initShiny should be broken up so that method registration happens - / right away, but binding happens later. - */ - function maybeStaticRenderLater() { - if (shinyMode && has_jQuery3()) { - window.jQuery(window.HTMLWidgets.staticRender); - } else { - window.HTMLWidgets.staticRender(); - } - } - - if (document.addEventListener) { - document.addEventListener("DOMContentLoaded", function() { - document.removeEventListener("DOMContentLoaded", arguments.callee, false); - maybeStaticRenderLater(); - }, false); - } else if (document.attachEvent) { - document.attachEvent("onreadystatechange", function() { - if (document.readyState === "complete") { - document.detachEvent("onreadystatechange", arguments.callee); - maybeStaticRenderLater(); - } - }); - } - - - window.HTMLWidgets.getAttachmentUrl = function(depname, key) { - // If no key, default to the first item - if (typeof(key) === "undefined") - key = 1; - - var link = document.getElementById(depname + "-" + key + "-attachment"); - if (!link) { - throw new Error("Attachment " + depname + "/" + key + " not found in document"); - } - return link.getAttribute("href"); - }; - - window.HTMLWidgets.dataframeToD3 = function(df) { - var names = []; - var length; - for (var name in df) { - if (df.hasOwnProperty(name)) - names.push(name); - if (typeof(df[name]) !== "object" || typeof(df[name].length) === "undefined") { - throw new Error("All fields must be arrays"); - } else if (typeof(length) !== "undefined" && length !== df[name].length) { - throw new Error("All fields must be arrays of the same length"); - } - length = df[name].length; - } - var results = []; - var item; - for (var row = 0; row < length; row++) { - item = {}; - for (var col = 0; col < names.length; col++) { - item[names[col]] = df[names[col]][row]; - } - results.push(item); - } - return results; - }; - - window.HTMLWidgets.transposeArray2D = function(array) { - if (array.length === 0) return array; - var newArray = array[0].map(function(col, i) { - return array.map(function(row) { - return row[i] - }) - }); - return newArray; - }; - // Split value at splitChar, but allow splitChar to be escaped - // using escapeChar. Any other characters escaped by escapeChar - // will be included as usual (including escapeChar itself). - function splitWithEscape(value, splitChar, escapeChar) { - var results = []; - var escapeMode = false; - var currentResult = ""; - for (var pos = 0; pos < value.length; pos++) { - if (!escapeMode) { - if (value[pos] === splitChar) { - results.push(currentResult); - currentResult = ""; - } else if (value[pos] === escapeChar) { - escapeMode = true; - } else { - currentResult += value[pos]; - } - } else { - currentResult += value[pos]; - escapeMode = false; - } - } - if (currentResult !== "") { - results.push(currentResult); - } - return results; - } - // Function authored by Yihui/JJ Allaire - window.HTMLWidgets.evaluateStringMember = function(o, member) { - var parts = splitWithEscape(member, '.', '\\'); - for (var i = 0, l = parts.length; i < l; i++) { - var part = parts[i]; - // part may be a character or 'numeric' member name - if (o !== null && typeof o === "object" && part in o) { - if (i == (l - 1)) { // if we are at the end of the line then evalulate - if (typeof o[part] === "string") - o[part] = tryEval(o[part]); - } else { // otherwise continue to next embedded object - o = o[part]; - } - } - } - }; - - // Retrieve the HTMLWidget instance (i.e. the return value of an - // HTMLWidget binding's initialize() or factory() function) - // associated with an element, or null if none. - window.HTMLWidgets.getInstance = function(el) { - return elementData(el, "init_result"); - }; - - // Finds the first element in the scope that matches the selector, - // and returns the HTMLWidget instance (i.e. the return value of - // an HTMLWidget binding's initialize() or factory() function) - // associated with that element, if any. If no element matches the - // selector, or the first matching element has no HTMLWidget - // instance associated with it, then null is returned. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.find = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var el = scope.querySelector(selector); - if (el === null) { - return null; - } else { - return window.HTMLWidgets.getInstance(el); - } - }; - - // Finds all elements in the scope that match the selector, and - // returns the HTMLWidget instances (i.e. the return values of - // an HTMLWidget binding's initialize() or factory() function) - // associated with the elements, in an array. If elements that - // match the selector don't have an associated HTMLWidget - // instance, the returned array will contain nulls. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.findAll = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var nodes = scope.querySelectorAll(selector); - var results = []; - for (var i = 0; i < nodes.length; i++) { - results.push(window.HTMLWidgets.getInstance(nodes[i])); - } - return results; - }; - - var postRenderHandlers = []; - function invokePostRenderHandlers() { - while (postRenderHandlers.length) { - var handler = postRenderHandlers.shift(); - if (handler) { - handler(); - } - } - } - - // Register the given callback function to be invoked after the - // next time static widgets are rendered. - window.HTMLWidgets.addPostRenderHandler = function(callback) { - postRenderHandlers.push(callback); - }; - - // Takes a new-style instance-bound definition, and returns an - // old-style class-bound definition. This saves us from having - // to rewrite all the logic in this file to accomodate both - // types of definitions. - function createLegacyDefinitionAdapter(defn) { - var result = { - name: defn.name, - type: defn.type, - initialize: function(el, width, height) { - return defn.factory(el, width, height); - }, - renderValue: function(el, x, instance) { - return instance.renderValue(x); - }, - resize: function(el, width, height, instance) { - return instance.resize(width, height); - } - }; - - if (defn.find) - result.find = defn.find; - if (defn.renderError) - result.renderError = defn.renderError; - if (defn.clearError) - result.clearError = defn.clearError; - - return result; - } -})(); - diff --git a/content/find/recipes/index_files/jquery/jquery-3.6.0.js b/content/find/recipes/index_files/jquery/jquery-3.6.0.js deleted file mode 100644 index fc6c299b..00000000 --- a/content/find/recipes/index_files/jquery/jquery-3.6.0.js +++ /dev/null @@ -1,10881 +0,0 @@ -/*! - * jQuery JavaScript Library v3.6.0 - * https://jquery.com/ - * - * Includes Sizzle.js - * https://sizzlejs.com/ - * - * Copyright OpenJS Foundation and other contributors - * Released under the MIT license - * https://jquery.org/license - * - * Date: 2021-03-02T17:08Z - */ -( function( global, factory ) { - - "use strict"; - - if ( typeof module === "object" && typeof module.exports === "object" ) { - - // For CommonJS and CommonJS-like environments where a proper `window` - // is present, execute the factory and get jQuery. - // For environments that do not have a `window` with a `document` - // (such as Node.js), expose a factory as module.exports. - // This accentuates the need for the creation of a real `window`. - // e.g. var jQuery = require("jquery")(window); - // See ticket #14549 for more info. - module.exports = global.document ? - factory( global, true ) : - function( w ) { - if ( !w.document ) { - throw new Error( "jQuery requires a window with a document" ); - } - return factory( w ); - }; - } else { - factory( global ); - } - -// Pass this if window is not defined yet -} )( typeof window !== "undefined" ? window : this, function( window, noGlobal ) { - -// Edge <= 12 - 13+, Firefox <=18 - 45+, IE 10 - 11, Safari 5.1 - 9+, iOS 6 - 9.1 -// throw exceptions when non-strict code (e.g., ASP.NET 4.5) accesses strict mode -// arguments.callee.caller (trac-13335). But as of jQuery 3.0 (2016), strict mode should be common -// enough that all such attempts are guarded in a try block. -"use strict"; - -var arr = []; - -var getProto = Object.getPrototypeOf; - -var slice = arr.slice; - -var flat = arr.flat ? function( array ) { - return arr.flat.call( array ); -} : function( array ) { - return arr.concat.apply( [], array ); -}; - - -var push = arr.push; - -var indexOf = arr.indexOf; - -var class2type = {}; - -var toString = class2type.toString; - -var hasOwn = class2type.hasOwnProperty; - -var fnToString = hasOwn.toString; - -var ObjectFunctionString = fnToString.call( Object ); - -var support = {}; - -var isFunction = function isFunction( obj ) { - - // Support: Chrome <=57, Firefox <=52 - // In some browsers, typeof returns "function" for HTML elements - // (i.e., `typeof document.createElement( "object" ) === "function"`). - // We don't want to classify *any* DOM node as a function. - // Support: QtWeb <=3.8.5, WebKit <=534.34, wkhtmltopdf tool <=0.12.5 - // Plus for old WebKit, typeof returns "function" for HTML collections - // (e.g., `typeof document.getElementsByTagName("div") === "function"`). (gh-4756) - return typeof obj === "function" && typeof obj.nodeType !== "number" && - typeof obj.item !== "function"; - }; - - -var isWindow = function isWindow( obj ) { - return obj != null && obj === obj.window; - }; - - -var document = window.document; - - - - var preservedScriptAttributes = { - type: true, - src: true, - nonce: true, - noModule: true - }; - - function DOMEval( code, node, doc ) { - doc = doc || document; - - var i, val, - script = doc.createElement( "script" ); - - script.text = code; - if ( node ) { - for ( i in preservedScriptAttributes ) { - - // Support: Firefox 64+, Edge 18+ - // Some browsers don't support the "nonce" property on scripts. - // On the other hand, just using `getAttribute` is not enough as - // the `nonce` attribute is reset to an empty string whenever it - // becomes browsing-context connected. - // See https://github.com/whatwg/html/issues/2369 - // See https://html.spec.whatwg.org/#nonce-attributes - // The `node.getAttribute` check was added for the sake of - // `jQuery.globalEval` so that it can fake a nonce-containing node - // via an object. - val = node[ i ] || node.getAttribute && node.getAttribute( i ); - if ( val ) { - script.setAttribute( i, val ); - } - } - } - doc.head.appendChild( script ).parentNode.removeChild( script ); - } - - -function toType( obj ) { - if ( obj == null ) { - return obj + ""; - } - - // Support: Android <=2.3 only (functionish RegExp) - return typeof obj === "object" || typeof obj === "function" ? - class2type[ toString.call( obj ) ] || "object" : - typeof obj; -} -/* global Symbol */ -// Defining this global in .eslintrc.json would create a danger of using the global -// unguarded in another place, it seems safer to define global only for this module - - - -var - version = "3.6.0", - - // Define a local copy of jQuery - jQuery = function( selector, context ) { - - // The jQuery object is actually just the init constructor 'enhanced' - // Need init if jQuery is called (just allow error to be thrown if not included) - return new jQuery.fn.init( selector, context ); - }; - -jQuery.fn = jQuery.prototype = { - - // The current version of jQuery being used - jquery: version, - - constructor: jQuery, - - // The default length of a jQuery object is 0 - length: 0, - - toArray: function() { - return slice.call( this ); - }, - - // Get the Nth element in the matched element set OR - // Get the whole matched element set as a clean array - get: function( num ) { - - // Return all the elements in a clean array - if ( num == null ) { - return slice.call( this ); - } - - // Return just the one element from the set - return num < 0 ? this[ num + this.length ] : this[ num ]; - }, - - // Take an array of elements and push it onto the stack - // (returning the new matched element set) - pushStack: function( elems ) { - - // Build a new jQuery matched element set - var ret = jQuery.merge( this.constructor(), elems ); - - // Add the old object onto the stack (as a reference) - ret.prevObject = this; - - // Return the newly-formed element set - return ret; - }, - - // Execute a callback for every element in the matched set. - each: function( callback ) { - return jQuery.each( this, callback ); - }, - - map: function( callback ) { - return this.pushStack( jQuery.map( this, function( elem, i ) { - return callback.call( elem, i, elem ); - } ) ); - }, - - slice: function() { - return this.pushStack( slice.apply( this, arguments ) ); - }, - - first: function() { - return this.eq( 0 ); - }, - - last: function() { - return this.eq( -1 ); - }, - - even: function() { - return this.pushStack( jQuery.grep( this, function( _elem, i ) { - return ( i + 1 ) % 2; - } ) ); - }, - - odd: function() { - return this.pushStack( jQuery.grep( this, function( _elem, i ) { - return i % 2; - } ) ); - }, - - eq: function( i ) { - var len = this.length, - j = +i + ( i < 0 ? len : 0 ); - return this.pushStack( j >= 0 && j < len ? [ this[ j ] ] : [] ); - }, - - end: function() { - return this.prevObject || this.constructor(); - }, - - // For internal use only. - // Behaves like an Array's method, not like a jQuery method. - push: push, - sort: arr.sort, - splice: arr.splice -}; - -jQuery.extend = jQuery.fn.extend = function() { - var options, name, src, copy, copyIsArray, clone, - target = arguments[ 0 ] || {}, - i = 1, - length = arguments.length, - deep = false; - - // Handle a deep copy situation - if ( typeof target === "boolean" ) { - deep = target; - - // Skip the boolean and the target - target = arguments[ i ] || {}; - i++; - } - - // Handle case when target is a string or something (possible in deep copy) - if ( typeof target !== "object" && !isFunction( target ) ) { - target = {}; - } - - // Extend jQuery itself if only one argument is passed - if ( i === length ) { - target = this; - i--; - } - - for ( ; i < length; i++ ) { - - // Only deal with non-null/undefined values - if ( ( options = arguments[ i ] ) != null ) { - - // Extend the base object - for ( name in options ) { - copy = options[ name ]; - - // Prevent Object.prototype pollution - // Prevent never-ending loop - if ( name === "__proto__" || target === copy ) { - continue; - } - - // Recurse if we're merging plain objects or arrays - if ( deep && copy && ( jQuery.isPlainObject( copy ) || - ( copyIsArray = Array.isArray( copy ) ) ) ) { - src = target[ name ]; - - // Ensure proper type for the source value - if ( copyIsArray && !Array.isArray( src ) ) { - clone = []; - } else if ( !copyIsArray && !jQuery.isPlainObject( src ) ) { - clone = {}; - } else { - clone = src; - } - copyIsArray = false; - - // Never move original objects, clone them - target[ name ] = jQuery.extend( deep, clone, copy ); - - // Don't bring in undefined values - } else if ( copy !== undefined ) { - target[ name ] = copy; - } - } - } - } - - // Return the modified object - return target; -}; - -jQuery.extend( { - - // Unique for each copy of jQuery on the page - expando: "jQuery" + ( version + Math.random() ).replace( /\D/g, "" ), - - // Assume jQuery is ready without the ready module - isReady: true, - - error: function( msg ) { - throw new Error( msg ); - }, - - noop: function() {}, - - isPlainObject: function( obj ) { - var proto, Ctor; - - // Detect obvious negatives - // Use toString instead of jQuery.type to catch host objects - if ( !obj || toString.call( obj ) !== "[object Object]" ) { - return false; - } - - proto = getProto( obj ); - - // Objects with no prototype (e.g., `Object.create( null )`) are plain - if ( !proto ) { - return true; - } - - // Objects with prototype are plain iff they were constructed by a global Object function - Ctor = hasOwn.call( proto, "constructor" ) && proto.constructor; - return typeof Ctor === "function" && fnToString.call( Ctor ) === ObjectFunctionString; - }, - - isEmptyObject: function( obj ) { - var name; - - for ( name in obj ) { - return false; - } - return true; - }, - - // Evaluates a script in a provided context; falls back to the global one - // if not specified. - globalEval: function( code, options, doc ) { - DOMEval( code, { nonce: options && options.nonce }, doc ); - }, - - each: function( obj, callback ) { - var length, i = 0; - - if ( isArrayLike( obj ) ) { - length = obj.length; - for ( ; i < length; i++ ) { - if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { - break; - } - } - } else { - for ( i in obj ) { - if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { - break; - } - } - } - - return obj; - }, - - // results is for internal usage only - makeArray: function( arr, results ) { - var ret = results || []; - - if ( arr != null ) { - if ( isArrayLike( Object( arr ) ) ) { - jQuery.merge( ret, - typeof arr === "string" ? - [ arr ] : arr - ); - } else { - push.call( ret, arr ); - } - } - - return ret; - }, - - inArray: function( elem, arr, i ) { - return arr == null ? -1 : indexOf.call( arr, elem, i ); - }, - - // Support: Android <=4.0 only, PhantomJS 1 only - // push.apply(_, arraylike) throws on ancient WebKit - merge: function( first, second ) { - var len = +second.length, - j = 0, - i = first.length; - - for ( ; j < len; j++ ) { - first[ i++ ] = second[ j ]; - } - - first.length = i; - - return first; - }, - - grep: function( elems, callback, invert ) { - var callbackInverse, - matches = [], - i = 0, - length = elems.length, - callbackExpect = !invert; - - // Go through the array, only saving the items - // that pass the validator function - for ( ; i < length; i++ ) { - callbackInverse = !callback( elems[ i ], i ); - if ( callbackInverse !== callbackExpect ) { - matches.push( elems[ i ] ); - } - } - - return matches; - }, - - // arg is for internal usage only - map: function( elems, callback, arg ) { - var length, value, - i = 0, - ret = []; - - // Go through the array, translating each of the items to their new values - if ( isArrayLike( elems ) ) { - length = elems.length; - for ( ; i < length; i++ ) { - value = callback( elems[ i ], i, arg ); - - if ( value != null ) { - ret.push( value ); - } - } - - // Go through every key on the object, - } else { - for ( i in elems ) { - value = callback( elems[ i ], i, arg ); - - if ( value != null ) { - ret.push( value ); - } - } - } - - // Flatten any nested arrays - return flat( ret ); - }, - - // A global GUID counter for objects - guid: 1, - - // jQuery.support is not used in Core but other projects attach their - // properties to it so it needs to exist. - support: support -} ); - -if ( typeof Symbol === "function" ) { - jQuery.fn[ Symbol.iterator ] = arr[ Symbol.iterator ]; -} - -// Populate the class2type map -jQuery.each( "Boolean Number String Function Array Date RegExp Object Error Symbol".split( " " ), - function( _i, name ) { - class2type[ "[object " + name + "]" ] = name.toLowerCase(); - } ); - -function isArrayLike( obj ) { - - // Support: real iOS 8.2 only (not reproducible in simulator) - // `in` check used to prevent JIT error (gh-2145) - // hasOwn isn't used here due to false negatives - // regarding Nodelist length in IE - var length = !!obj && "length" in obj && obj.length, - type = toType( obj ); - - if ( isFunction( obj ) || isWindow( obj ) ) { - return false; - } - - return type === "array" || length === 0 || - typeof length === "number" && length > 0 && ( length - 1 ) in obj; -} -var Sizzle = -/*! - * Sizzle CSS Selector Engine v2.3.6 - * https://sizzlejs.com/ - * - * Copyright JS Foundation and other contributors - * Released under the MIT license - * https://js.foundation/ - * - * Date: 2021-02-16 - */ -( function( window ) { -var i, - support, - Expr, - getText, - isXML, - tokenize, - compile, - select, - outermostContext, - sortInput, - hasDuplicate, - - // Local document vars - setDocument, - document, - docElem, - documentIsHTML, - rbuggyQSA, - rbuggyMatches, - matches, - contains, - - // Instance-specific data - expando = "sizzle" + 1 * new Date(), - preferredDoc = window.document, - dirruns = 0, - done = 0, - classCache = createCache(), - tokenCache = createCache(), - compilerCache = createCache(), - nonnativeSelectorCache = createCache(), - sortOrder = function( a, b ) { - if ( a === b ) { - hasDuplicate = true; - } - return 0; - }, - - // Instance methods - hasOwn = ( {} ).hasOwnProperty, - arr = [], - pop = arr.pop, - pushNative = arr.push, - push = arr.push, - slice = arr.slice, - - // Use a stripped-down indexOf as it's faster than native - // https://jsperf.com/thor-indexof-vs-for/5 - indexOf = function( list, elem ) { - var i = 0, - len = list.length; - for ( ; i < len; i++ ) { - if ( list[ i ] === elem ) { - return i; - } - } - return -1; - }, - - booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|" + - "ismap|loop|multiple|open|readonly|required|scoped", - - // Regular expressions - - // http://www.w3.org/TR/css3-selectors/#whitespace - whitespace = "[\\x20\\t\\r\\n\\f]", - - // https://www.w3.org/TR/css-syntax-3/#ident-token-diagram - identifier = "(?:\\\\[\\da-fA-F]{1,6}" + whitespace + - "?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+", - - // Attribute selectors: http://www.w3.org/TR/selectors/#attribute-selectors - attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace + - - // Operator (capture 2) - "*([*^$|!~]?=)" + whitespace + - - // "Attribute values must be CSS identifiers [capture 5] - // or strings [capture 3 or capture 4]" - "*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" + - whitespace + "*\\]", - - pseudos = ":(" + identifier + ")(?:\\((" + - - // To reduce the number of selectors needing tokenize in the preFilter, prefer arguments: - // 1. quoted (capture 3; capture 4 or capture 5) - "('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" + - - // 2. simple (capture 6) - "((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" + - - // 3. anything else (capture 2) - ".*" + - ")\\)|)", - - // Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter - rwhitespace = new RegExp( whitespace + "+", "g" ), - rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + - whitespace + "+$", "g" ), - - rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ), - rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace + - "*" ), - rdescend = new RegExp( whitespace + "|>" ), - - rpseudo = new RegExp( pseudos ), - ridentifier = new RegExp( "^" + identifier + "$" ), - - matchExpr = { - "ID": new RegExp( "^#(" + identifier + ")" ), - "CLASS": new RegExp( "^\\.(" + identifier + ")" ), - "TAG": new RegExp( "^(" + identifier + "|[*])" ), - "ATTR": new RegExp( "^" + attributes ), - "PSEUDO": new RegExp( "^" + pseudos ), - "CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" + - whitespace + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + - whitespace + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ), - "bool": new RegExp( "^(?:" + booleans + ")$", "i" ), - - // For use in libraries implementing .is() - // We use this for POS matching in `select` - "needsContext": new RegExp( "^" + whitespace + - "*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + whitespace + - "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" ) - }, - - rhtml = /HTML$/i, - rinputs = /^(?:input|select|textarea|button)$/i, - rheader = /^h\d$/i, - - rnative = /^[^{]+\{\s*\[native \w/, - - // Easily-parseable/retrievable ID or TAG or CLASS selectors - rquickExpr = /^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/, - - rsibling = /[+~]/, - - // CSS escapes - // http://www.w3.org/TR/CSS21/syndata.html#escaped-characters - runescape = new RegExp( "\\\\[\\da-fA-F]{1,6}" + whitespace + "?|\\\\([^\\r\\n\\f])", "g" ), - funescape = function( escape, nonHex ) { - var high = "0x" + escape.slice( 1 ) - 0x10000; - - return nonHex ? - - // Strip the backslash prefix from a non-hex escape sequence - nonHex : - - // Replace a hexadecimal escape sequence with the encoded Unicode code point - // Support: IE <=11+ - // For values outside the Basic Multilingual Plane (BMP), manually construct a - // surrogate pair - high < 0 ? - String.fromCharCode( high + 0x10000 ) : - String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 ); - }, - - // CSS string/identifier serialization - // https://drafts.csswg.org/cssom/#common-serializing-idioms - rcssescape = /([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g, - fcssescape = function( ch, asCodePoint ) { - if ( asCodePoint ) { - - // U+0000 NULL becomes U+FFFD REPLACEMENT CHARACTER - if ( ch === "\0" ) { - return "\uFFFD"; - } - - // Control characters and (dependent upon position) numbers get escaped as code points - return ch.slice( 0, -1 ) + "\\" + - ch.charCodeAt( ch.length - 1 ).toString( 16 ) + " "; - } - - // Other potentially-special ASCII characters get backslash-escaped - return "\\" + ch; - }, - - // Used for iframes - // See setDocument() - // Removing the function wrapper causes a "Permission Denied" - // error in IE - unloadHandler = function() { - setDocument(); - }, - - inDisabledFieldset = addCombinator( - function( elem ) { - return elem.disabled === true && elem.nodeName.toLowerCase() === "fieldset"; - }, - { dir: "parentNode", next: "legend" } - ); - -// Optimize for push.apply( _, NodeList ) -try { - push.apply( - ( arr = slice.call( preferredDoc.childNodes ) ), - preferredDoc.childNodes - ); - - // Support: Android<4.0 - // Detect silently failing push.apply - // eslint-disable-next-line no-unused-expressions - arr[ preferredDoc.childNodes.length ].nodeType; -} catch ( e ) { - push = { apply: arr.length ? - - // Leverage slice if possible - function( target, els ) { - pushNative.apply( target, slice.call( els ) ); - } : - - // Support: IE<9 - // Otherwise append directly - function( target, els ) { - var j = target.length, - i = 0; - - // Can't trust NodeList.length - while ( ( target[ j++ ] = els[ i++ ] ) ) {} - target.length = j - 1; - } - }; -} - -function Sizzle( selector, context, results, seed ) { - var m, i, elem, nid, match, groups, newSelector, - newContext = context && context.ownerDocument, - - // nodeType defaults to 9, since context defaults to document - nodeType = context ? context.nodeType : 9; - - results = results || []; - - // Return early from calls with invalid selector or context - if ( typeof selector !== "string" || !selector || - nodeType !== 1 && nodeType !== 9 && nodeType !== 11 ) { - - return results; - } - - // Try to shortcut find operations (as opposed to filters) in HTML documents - if ( !seed ) { - setDocument( context ); - context = context || document; - - if ( documentIsHTML ) { - - // If the selector is sufficiently simple, try using a "get*By*" DOM method - // (excepting DocumentFragment context, where the methods don't exist) - if ( nodeType !== 11 && ( match = rquickExpr.exec( selector ) ) ) { - - // ID selector - if ( ( m = match[ 1 ] ) ) { - - // Document context - if ( nodeType === 9 ) { - if ( ( elem = context.getElementById( m ) ) ) { - - // Support: IE, Opera, Webkit - // TODO: identify versions - // getElementById can match elements by name instead of ID - if ( elem.id === m ) { - results.push( elem ); - return results; - } - } else { - return results; - } - - // Element context - } else { - - // Support: IE, Opera, Webkit - // TODO: identify versions - // getElementById can match elements by name instead of ID - if ( newContext && ( elem = newContext.getElementById( m ) ) && - contains( context, elem ) && - elem.id === m ) { - - results.push( elem ); - return results; - } - } - - // Type selector - } else if ( match[ 2 ] ) { - push.apply( results, context.getElementsByTagName( selector ) ); - return results; - - // Class selector - } else if ( ( m = match[ 3 ] ) && support.getElementsByClassName && - context.getElementsByClassName ) { - - push.apply( results, context.getElementsByClassName( m ) ); - return results; - } - } - - // Take advantage of querySelectorAll - if ( support.qsa && - !nonnativeSelectorCache[ selector + " " ] && - ( !rbuggyQSA || !rbuggyQSA.test( selector ) ) && - - // Support: IE 8 only - // Exclude object elements - ( nodeType !== 1 || context.nodeName.toLowerCase() !== "object" ) ) { - - newSelector = selector; - newContext = context; - - // qSA considers elements outside a scoping root when evaluating child or - // descendant combinators, which is not what we want. - // In such cases, we work around the behavior by prefixing every selector in the - // list with an ID selector referencing the scope context. - // The technique has to be used as well when a leading combinator is used - // as such selectors are not recognized by querySelectorAll. - // Thanks to Andrew Dupont for this technique. - if ( nodeType === 1 && - ( rdescend.test( selector ) || rcombinators.test( selector ) ) ) { - - // Expand context for sibling selectors - newContext = rsibling.test( selector ) && testContext( context.parentNode ) || - context; - - // We can use :scope instead of the ID hack if the browser - // supports it & if we're not changing the context. - if ( newContext !== context || !support.scope ) { - - // Capture the context ID, setting it first if necessary - if ( ( nid = context.getAttribute( "id" ) ) ) { - nid = nid.replace( rcssescape, fcssescape ); - } else { - context.setAttribute( "id", ( nid = expando ) ); - } - } - - // Prefix every selector in the list - groups = tokenize( selector ); - i = groups.length; - while ( i-- ) { - groups[ i ] = ( nid ? "#" + nid : ":scope" ) + " " + - toSelector( groups[ i ] ); - } - newSelector = groups.join( "," ); - } - - try { - push.apply( results, - newContext.querySelectorAll( newSelector ) - ); - return results; - } catch ( qsaError ) { - nonnativeSelectorCache( selector, true ); - } finally { - if ( nid === expando ) { - context.removeAttribute( "id" ); - } - } - } - } - } - - // All others - return select( selector.replace( rtrim, "$1" ), context, results, seed ); -} - -/** - * Create key-value caches of limited size - * @returns {function(string, object)} Returns the Object data after storing it on itself with - * property name the (space-suffixed) string and (if the cache is larger than Expr.cacheLength) - * deleting the oldest entry - */ -function createCache() { - var keys = []; - - function cache( key, value ) { - - // Use (key + " ") to avoid collision with native prototype properties (see Issue #157) - if ( keys.push( key + " " ) > Expr.cacheLength ) { - - // Only keep the most recent entries - delete cache[ keys.shift() ]; - } - return ( cache[ key + " " ] = value ); - } - return cache; -} - -/** - * Mark a function for special use by Sizzle - * @param {Function} fn The function to mark - */ -function markFunction( fn ) { - fn[ expando ] = true; - return fn; -} - -/** - * Support testing using an element - * @param {Function} fn Passed the created element and returns a boolean result - */ -function assert( fn ) { - var el = document.createElement( "fieldset" ); - - try { - return !!fn( el ); - } catch ( e ) { - return false; - } finally { - - // Remove from its parent by default - if ( el.parentNode ) { - el.parentNode.removeChild( el ); - } - - // release memory in IE - el = null; - } -} - -/** - * Adds the same handler for all of the specified attrs - * @param {String} attrs Pipe-separated list of attributes - * @param {Function} handler The method that will be applied - */ -function addHandle( attrs, handler ) { - var arr = attrs.split( "|" ), - i = arr.length; - - while ( i-- ) { - Expr.attrHandle[ arr[ i ] ] = handler; - } -} - -/** - * Checks document order of two siblings - * @param {Element} a - * @param {Element} b - * @returns {Number} Returns less than 0 if a precedes b, greater than 0 if a follows b - */ -function siblingCheck( a, b ) { - var cur = b && a, - diff = cur && a.nodeType === 1 && b.nodeType === 1 && - a.sourceIndex - b.sourceIndex; - - // Use IE sourceIndex if available on both nodes - if ( diff ) { - return diff; - } - - // Check if b follows a - if ( cur ) { - while ( ( cur = cur.nextSibling ) ) { - if ( cur === b ) { - return -1; - } - } - } - - return a ? 1 : -1; -} - -/** - * Returns a function to use in pseudos for input types - * @param {String} type - */ -function createInputPseudo( type ) { - return function( elem ) { - var name = elem.nodeName.toLowerCase(); - return name === "input" && elem.type === type; - }; -} - -/** - * Returns a function to use in pseudos for buttons - * @param {String} type - */ -function createButtonPseudo( type ) { - return function( elem ) { - var name = elem.nodeName.toLowerCase(); - return ( name === "input" || name === "button" ) && elem.type === type; - }; -} - -/** - * Returns a function to use in pseudos for :enabled/:disabled - * @param {Boolean} disabled true for :disabled; false for :enabled - */ -function createDisabledPseudo( disabled ) { - - // Known :disabled false positives: fieldset[disabled] > legend:nth-of-type(n+2) :can-disable - return function( elem ) { - - // Only certain elements can match :enabled or :disabled - // https://html.spec.whatwg.org/multipage/scripting.html#selector-enabled - // https://html.spec.whatwg.org/multipage/scripting.html#selector-disabled - if ( "form" in elem ) { - - // Check for inherited disabledness on relevant non-disabled elements: - // * listed form-associated elements in a disabled fieldset - // https://html.spec.whatwg.org/multipage/forms.html#category-listed - // https://html.spec.whatwg.org/multipage/forms.html#concept-fe-disabled - // * option elements in a disabled optgroup - // https://html.spec.whatwg.org/multipage/forms.html#concept-option-disabled - // All such elements have a "form" property. - if ( elem.parentNode && elem.disabled === false ) { - - // Option elements defer to a parent optgroup if present - if ( "label" in elem ) { - if ( "label" in elem.parentNode ) { - return elem.parentNode.disabled === disabled; - } else { - return elem.disabled === disabled; - } - } - - // Support: IE 6 - 11 - // Use the isDisabled shortcut property to check for disabled fieldset ancestors - return elem.isDisabled === disabled || - - // Where there is no isDisabled, check manually - /* jshint -W018 */ - elem.isDisabled !== !disabled && - inDisabledFieldset( elem ) === disabled; - } - - return elem.disabled === disabled; - - // Try to winnow out elements that can't be disabled before trusting the disabled property. - // Some victims get caught in our net (label, legend, menu, track), but it shouldn't - // even exist on them, let alone have a boolean value. - } else if ( "label" in elem ) { - return elem.disabled === disabled; - } - - // Remaining elements are neither :enabled nor :disabled - return false; - }; -} - -/** - * Returns a function to use in pseudos for positionals - * @param {Function} fn - */ -function createPositionalPseudo( fn ) { - return markFunction( function( argument ) { - argument = +argument; - return markFunction( function( seed, matches ) { - var j, - matchIndexes = fn( [], seed.length, argument ), - i = matchIndexes.length; - - // Match elements found at the specified indexes - while ( i-- ) { - if ( seed[ ( j = matchIndexes[ i ] ) ] ) { - seed[ j ] = !( matches[ j ] = seed[ j ] ); - } - } - } ); - } ); -} - -/** - * Checks a node for validity as a Sizzle context - * @param {Element|Object=} context - * @returns {Element|Object|Boolean} The input node if acceptable, otherwise a falsy value - */ -function testContext( context ) { - return context && typeof context.getElementsByTagName !== "undefined" && context; -} - -// Expose support vars for convenience -support = Sizzle.support = {}; - -/** - * Detects XML nodes - * @param {Element|Object} elem An element or a document - * @returns {Boolean} True iff elem is a non-HTML XML node - */ -isXML = Sizzle.isXML = function( elem ) { - var namespace = elem && elem.namespaceURI, - docElem = elem && ( elem.ownerDocument || elem ).documentElement; - - // Support: IE <=8 - // Assume HTML when documentElement doesn't yet exist, such as inside loading iframes - // https://bugs.jquery.com/ticket/4833 - return !rhtml.test( namespace || docElem && docElem.nodeName || "HTML" ); -}; - -/** - * Sets document-related variables once based on the current document - * @param {Element|Object} [doc] An element or document object to use to set the document - * @returns {Object} Returns the current document - */ -setDocument = Sizzle.setDocument = function( node ) { - var hasCompare, subWindow, - doc = node ? node.ownerDocument || node : preferredDoc; - - // Return early if doc is invalid or already selected - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - if ( doc == document || doc.nodeType !== 9 || !doc.documentElement ) { - return document; - } - - // Update global variables - document = doc; - docElem = document.documentElement; - documentIsHTML = !isXML( document ); - - // Support: IE 9 - 11+, Edge 12 - 18+ - // Accessing iframe documents after unload throws "permission denied" errors (jQuery #13936) - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - if ( preferredDoc != document && - ( subWindow = document.defaultView ) && subWindow.top !== subWindow ) { - - // Support: IE 11, Edge - if ( subWindow.addEventListener ) { - subWindow.addEventListener( "unload", unloadHandler, false ); - - // Support: IE 9 - 10 only - } else if ( subWindow.attachEvent ) { - subWindow.attachEvent( "onunload", unloadHandler ); - } - } - - // Support: IE 8 - 11+, Edge 12 - 18+, Chrome <=16 - 25 only, Firefox <=3.6 - 31 only, - // Safari 4 - 5 only, Opera <=11.6 - 12.x only - // IE/Edge & older browsers don't support the :scope pseudo-class. - // Support: Safari 6.0 only - // Safari 6.0 supports :scope but it's an alias of :root there. - support.scope = assert( function( el ) { - docElem.appendChild( el ).appendChild( document.createElement( "div" ) ); - return typeof el.querySelectorAll !== "undefined" && - !el.querySelectorAll( ":scope fieldset div" ).length; - } ); - - /* Attributes - ---------------------------------------------------------------------- */ - - // Support: IE<8 - // Verify that getAttribute really returns attributes and not properties - // (excepting IE8 booleans) - support.attributes = assert( function( el ) { - el.className = "i"; - return !el.getAttribute( "className" ); - } ); - - /* getElement(s)By* - ---------------------------------------------------------------------- */ - - // Check if getElementsByTagName("*") returns only elements - support.getElementsByTagName = assert( function( el ) { - el.appendChild( document.createComment( "" ) ); - return !el.getElementsByTagName( "*" ).length; - } ); - - // Support: IE<9 - support.getElementsByClassName = rnative.test( document.getElementsByClassName ); - - // Support: IE<10 - // Check if getElementById returns elements by name - // The broken getElementById methods don't pick up programmatically-set names, - // so use a roundabout getElementsByName test - support.getById = assert( function( el ) { - docElem.appendChild( el ).id = expando; - return !document.getElementsByName || !document.getElementsByName( expando ).length; - } ); - - // ID filter and find - if ( support.getById ) { - Expr.filter[ "ID" ] = function( id ) { - var attrId = id.replace( runescape, funescape ); - return function( elem ) { - return elem.getAttribute( "id" ) === attrId; - }; - }; - Expr.find[ "ID" ] = function( id, context ) { - if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { - var elem = context.getElementById( id ); - return elem ? [ elem ] : []; - } - }; - } else { - Expr.filter[ "ID" ] = function( id ) { - var attrId = id.replace( runescape, funescape ); - return function( elem ) { - var node = typeof elem.getAttributeNode !== "undefined" && - elem.getAttributeNode( "id" ); - return node && node.value === attrId; - }; - }; - - // Support: IE 6 - 7 only - // getElementById is not reliable as a find shortcut - Expr.find[ "ID" ] = function( id, context ) { - if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { - var node, i, elems, - elem = context.getElementById( id ); - - if ( elem ) { - - // Verify the id attribute - node = elem.getAttributeNode( "id" ); - if ( node && node.value === id ) { - return [ elem ]; - } - - // Fall back on getElementsByName - elems = context.getElementsByName( id ); - i = 0; - while ( ( elem = elems[ i++ ] ) ) { - node = elem.getAttributeNode( "id" ); - if ( node && node.value === id ) { - return [ elem ]; - } - } - } - - return []; - } - }; - } - - // Tag - Expr.find[ "TAG" ] = support.getElementsByTagName ? - function( tag, context ) { - if ( typeof context.getElementsByTagName !== "undefined" ) { - return context.getElementsByTagName( tag ); - - // DocumentFragment nodes don't have gEBTN - } else if ( support.qsa ) { - return context.querySelectorAll( tag ); - } - } : - - function( tag, context ) { - var elem, - tmp = [], - i = 0, - - // By happy coincidence, a (broken) gEBTN appears on DocumentFragment nodes too - results = context.getElementsByTagName( tag ); - - // Filter out possible comments - if ( tag === "*" ) { - while ( ( elem = results[ i++ ] ) ) { - if ( elem.nodeType === 1 ) { - tmp.push( elem ); - } - } - - return tmp; - } - return results; - }; - - // Class - Expr.find[ "CLASS" ] = support.getElementsByClassName && function( className, context ) { - if ( typeof context.getElementsByClassName !== "undefined" && documentIsHTML ) { - return context.getElementsByClassName( className ); - } - }; - - /* QSA/matchesSelector - ---------------------------------------------------------------------- */ - - // QSA and matchesSelector support - - // matchesSelector(:active) reports false when true (IE9/Opera 11.5) - rbuggyMatches = []; - - // qSa(:focus) reports false when true (Chrome 21) - // We allow this because of a bug in IE8/9 that throws an error - // whenever `document.activeElement` is accessed on an iframe - // So, we allow :focus to pass through QSA all the time to avoid the IE error - // See https://bugs.jquery.com/ticket/13378 - rbuggyQSA = []; - - if ( ( support.qsa = rnative.test( document.querySelectorAll ) ) ) { - - // Build QSA regex - // Regex strategy adopted from Diego Perini - assert( function( el ) { - - var input; - - // Select is set to empty string on purpose - // This is to test IE's treatment of not explicitly - // setting a boolean content attribute, - // since its presence should be enough - // https://bugs.jquery.com/ticket/12359 - docElem.appendChild( el ).innerHTML = "" + - ""; - - // Support: IE8, Opera 11-12.16 - // Nothing should be selected when empty strings follow ^= or $= or *= - // The test attribute must be unknown in Opera but "safe" for WinRT - // https://msdn.microsoft.com/en-us/library/ie/hh465388.aspx#attribute_section - if ( el.querySelectorAll( "[msallowcapture^='']" ).length ) { - rbuggyQSA.push( "[*^$]=" + whitespace + "*(?:''|\"\")" ); - } - - // Support: IE8 - // Boolean attributes and "value" are not treated correctly - if ( !el.querySelectorAll( "[selected]" ).length ) { - rbuggyQSA.push( "\\[" + whitespace + "*(?:value|" + booleans + ")" ); - } - - // Support: Chrome<29, Android<4.4, Safari<7.0+, iOS<7.0+, PhantomJS<1.9.8+ - if ( !el.querySelectorAll( "[id~=" + expando + "-]" ).length ) { - rbuggyQSA.push( "~=" ); - } - - // Support: IE 11+, Edge 15 - 18+ - // IE 11/Edge don't find elements on a `[name='']` query in some cases. - // Adding a temporary attribute to the document before the selection works - // around the issue. - // Interestingly, IE 10 & older don't seem to have the issue. - input = document.createElement( "input" ); - input.setAttribute( "name", "" ); - el.appendChild( input ); - if ( !el.querySelectorAll( "[name='']" ).length ) { - rbuggyQSA.push( "\\[" + whitespace + "*name" + whitespace + "*=" + - whitespace + "*(?:''|\"\")" ); - } - - // Webkit/Opera - :checked should return selected option elements - // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked - // IE8 throws error here and will not see later tests - if ( !el.querySelectorAll( ":checked" ).length ) { - rbuggyQSA.push( ":checked" ); - } - - // Support: Safari 8+, iOS 8+ - // https://bugs.webkit.org/show_bug.cgi?id=136851 - // In-page `selector#id sibling-combinator selector` fails - if ( !el.querySelectorAll( "a#" + expando + "+*" ).length ) { - rbuggyQSA.push( ".#.+[+~]" ); - } - - // Support: Firefox <=3.6 - 5 only - // Old Firefox doesn't throw on a badly-escaped identifier. - el.querySelectorAll( "\\\f" ); - rbuggyQSA.push( "[\\r\\n\\f]" ); - } ); - - assert( function( el ) { - el.innerHTML = "" + - ""; - - // Support: Windows 8 Native Apps - // The type and name attributes are restricted during .innerHTML assignment - var input = document.createElement( "input" ); - input.setAttribute( "type", "hidden" ); - el.appendChild( input ).setAttribute( "name", "D" ); - - // Support: IE8 - // Enforce case-sensitivity of name attribute - if ( el.querySelectorAll( "[name=d]" ).length ) { - rbuggyQSA.push( "name" + whitespace + "*[*^$|!~]?=" ); - } - - // FF 3.5 - :enabled/:disabled and hidden elements (hidden elements are still enabled) - // IE8 throws error here and will not see later tests - if ( el.querySelectorAll( ":enabled" ).length !== 2 ) { - rbuggyQSA.push( ":enabled", ":disabled" ); - } - - // Support: IE9-11+ - // IE's :disabled selector does not pick up the children of disabled fieldsets - docElem.appendChild( el ).disabled = true; - if ( el.querySelectorAll( ":disabled" ).length !== 2 ) { - rbuggyQSA.push( ":enabled", ":disabled" ); - } - - // Support: Opera 10 - 11 only - // Opera 10-11 does not throw on post-comma invalid pseudos - el.querySelectorAll( "*,:x" ); - rbuggyQSA.push( ",.*:" ); - } ); - } - - if ( ( support.matchesSelector = rnative.test( ( matches = docElem.matches || - docElem.webkitMatchesSelector || - docElem.mozMatchesSelector || - docElem.oMatchesSelector || - docElem.msMatchesSelector ) ) ) ) { - - assert( function( el ) { - - // Check to see if it's possible to do matchesSelector - // on a disconnected node (IE 9) - support.disconnectedMatch = matches.call( el, "*" ); - - // This should fail with an exception - // Gecko does not error, returns false instead - matches.call( el, "[s!='']:x" ); - rbuggyMatches.push( "!=", pseudos ); - } ); - } - - rbuggyQSA = rbuggyQSA.length && new RegExp( rbuggyQSA.join( "|" ) ); - rbuggyMatches = rbuggyMatches.length && new RegExp( rbuggyMatches.join( "|" ) ); - - /* Contains - ---------------------------------------------------------------------- */ - hasCompare = rnative.test( docElem.compareDocumentPosition ); - - // Element contains another - // Purposefully self-exclusive - // As in, an element does not contain itself - contains = hasCompare || rnative.test( docElem.contains ) ? - function( a, b ) { - var adown = a.nodeType === 9 ? a.documentElement : a, - bup = b && b.parentNode; - return a === bup || !!( bup && bup.nodeType === 1 && ( - adown.contains ? - adown.contains( bup ) : - a.compareDocumentPosition && a.compareDocumentPosition( bup ) & 16 - ) ); - } : - function( a, b ) { - if ( b ) { - while ( ( b = b.parentNode ) ) { - if ( b === a ) { - return true; - } - } - } - return false; - }; - - /* Sorting - ---------------------------------------------------------------------- */ - - // Document order sorting - sortOrder = hasCompare ? - function( a, b ) { - - // Flag for duplicate removal - if ( a === b ) { - hasDuplicate = true; - return 0; - } - - // Sort on method existence if only one input has compareDocumentPosition - var compare = !a.compareDocumentPosition - !b.compareDocumentPosition; - if ( compare ) { - return compare; - } - - // Calculate position if both inputs belong to the same document - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - compare = ( a.ownerDocument || a ) == ( b.ownerDocument || b ) ? - a.compareDocumentPosition( b ) : - - // Otherwise we know they are disconnected - 1; - - // Disconnected nodes - if ( compare & 1 || - ( !support.sortDetached && b.compareDocumentPosition( a ) === compare ) ) { - - // Choose the first element that is related to our preferred document - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - if ( a == document || a.ownerDocument == preferredDoc && - contains( preferredDoc, a ) ) { - return -1; - } - - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - if ( b == document || b.ownerDocument == preferredDoc && - contains( preferredDoc, b ) ) { - return 1; - } - - // Maintain original order - return sortInput ? - ( indexOf( sortInput, a ) - indexOf( sortInput, b ) ) : - 0; - } - - return compare & 4 ? -1 : 1; - } : - function( a, b ) { - - // Exit early if the nodes are identical - if ( a === b ) { - hasDuplicate = true; - return 0; - } - - var cur, - i = 0, - aup = a.parentNode, - bup = b.parentNode, - ap = [ a ], - bp = [ b ]; - - // Parentless nodes are either documents or disconnected - if ( !aup || !bup ) { - - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - /* eslint-disable eqeqeq */ - return a == document ? -1 : - b == document ? 1 : - /* eslint-enable eqeqeq */ - aup ? -1 : - bup ? 1 : - sortInput ? - ( indexOf( sortInput, a ) - indexOf( sortInput, b ) ) : - 0; - - // If the nodes are siblings, we can do a quick check - } else if ( aup === bup ) { - return siblingCheck( a, b ); - } - - // Otherwise we need full lists of their ancestors for comparison - cur = a; - while ( ( cur = cur.parentNode ) ) { - ap.unshift( cur ); - } - cur = b; - while ( ( cur = cur.parentNode ) ) { - bp.unshift( cur ); - } - - // Walk down the tree looking for a discrepancy - while ( ap[ i ] === bp[ i ] ) { - i++; - } - - return i ? - - // Do a sibling check if the nodes have a common ancestor - siblingCheck( ap[ i ], bp[ i ] ) : - - // Otherwise nodes in our document sort first - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - /* eslint-disable eqeqeq */ - ap[ i ] == preferredDoc ? -1 : - bp[ i ] == preferredDoc ? 1 : - /* eslint-enable eqeqeq */ - 0; - }; - - return document; -}; - -Sizzle.matches = function( expr, elements ) { - return Sizzle( expr, null, null, elements ); -}; - -Sizzle.matchesSelector = function( elem, expr ) { - setDocument( elem ); - - if ( support.matchesSelector && documentIsHTML && - !nonnativeSelectorCache[ expr + " " ] && - ( !rbuggyMatches || !rbuggyMatches.test( expr ) ) && - ( !rbuggyQSA || !rbuggyQSA.test( expr ) ) ) { - - try { - var ret = matches.call( elem, expr ); - - // IE 9's matchesSelector returns false on disconnected nodes - if ( ret || support.disconnectedMatch || - - // As well, disconnected nodes are said to be in a document - // fragment in IE 9 - elem.document && elem.document.nodeType !== 11 ) { - return ret; - } - } catch ( e ) { - nonnativeSelectorCache( expr, true ); - } - } - - return Sizzle( expr, document, null, [ elem ] ).length > 0; -}; - -Sizzle.contains = function( context, elem ) { - - // Set document vars if needed - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - if ( ( context.ownerDocument || context ) != document ) { - setDocument( context ); - } - return contains( context, elem ); -}; - -Sizzle.attr = function( elem, name ) { - - // Set document vars if needed - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - if ( ( elem.ownerDocument || elem ) != document ) { - setDocument( elem ); - } - - var fn = Expr.attrHandle[ name.toLowerCase() ], - - // Don't get fooled by Object.prototype properties (jQuery #13807) - val = fn && hasOwn.call( Expr.attrHandle, name.toLowerCase() ) ? - fn( elem, name, !documentIsHTML ) : - undefined; - - return val !== undefined ? - val : - support.attributes || !documentIsHTML ? - elem.getAttribute( name ) : - ( val = elem.getAttributeNode( name ) ) && val.specified ? - val.value : - null; -}; - -Sizzle.escape = function( sel ) { - return ( sel + "" ).replace( rcssescape, fcssescape ); -}; - -Sizzle.error = function( msg ) { - throw new Error( "Syntax error, unrecognized expression: " + msg ); -}; - -/** - * Document sorting and removing duplicates - * @param {ArrayLike} results - */ -Sizzle.uniqueSort = function( results ) { - var elem, - duplicates = [], - j = 0, - i = 0; - - // Unless we *know* we can detect duplicates, assume their presence - hasDuplicate = !support.detectDuplicates; - sortInput = !support.sortStable && results.slice( 0 ); - results.sort( sortOrder ); - - if ( hasDuplicate ) { - while ( ( elem = results[ i++ ] ) ) { - if ( elem === results[ i ] ) { - j = duplicates.push( i ); - } - } - while ( j-- ) { - results.splice( duplicates[ j ], 1 ); - } - } - - // Clear input after sorting to release objects - // See https://github.com/jquery/sizzle/pull/225 - sortInput = null; - - return results; -}; - -/** - * Utility function for retrieving the text value of an array of DOM nodes - * @param {Array|Element} elem - */ -getText = Sizzle.getText = function( elem ) { - var node, - ret = "", - i = 0, - nodeType = elem.nodeType; - - if ( !nodeType ) { - - // If no nodeType, this is expected to be an array - while ( ( node = elem[ i++ ] ) ) { - - // Do not traverse comment nodes - ret += getText( node ); - } - } else if ( nodeType === 1 || nodeType === 9 || nodeType === 11 ) { - - // Use textContent for elements - // innerText usage removed for consistency of new lines (jQuery #11153) - if ( typeof elem.textContent === "string" ) { - return elem.textContent; - } else { - - // Traverse its children - for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) { - ret += getText( elem ); - } - } - } else if ( nodeType === 3 || nodeType === 4 ) { - return elem.nodeValue; - } - - // Do not include comment or processing instruction nodes - - return ret; -}; - -Expr = Sizzle.selectors = { - - // Can be adjusted by the user - cacheLength: 50, - - createPseudo: markFunction, - - match: matchExpr, - - attrHandle: {}, - - find: {}, - - relative: { - ">": { dir: "parentNode", first: true }, - " ": { dir: "parentNode" }, - "+": { dir: "previousSibling", first: true }, - "~": { dir: "previousSibling" } - }, - - preFilter: { - "ATTR": function( match ) { - match[ 1 ] = match[ 1 ].replace( runescape, funescape ); - - // Move the given value to match[3] whether quoted or unquoted - match[ 3 ] = ( match[ 3 ] || match[ 4 ] || - match[ 5 ] || "" ).replace( runescape, funescape ); - - if ( match[ 2 ] === "~=" ) { - match[ 3 ] = " " + match[ 3 ] + " "; - } - - return match.slice( 0, 4 ); - }, - - "CHILD": function( match ) { - - /* matches from matchExpr["CHILD"] - 1 type (only|nth|...) - 2 what (child|of-type) - 3 argument (even|odd|\d*|\d*n([+-]\d+)?|...) - 4 xn-component of xn+y argument ([+-]?\d*n|) - 5 sign of xn-component - 6 x of xn-component - 7 sign of y-component - 8 y of y-component - */ - match[ 1 ] = match[ 1 ].toLowerCase(); - - if ( match[ 1 ].slice( 0, 3 ) === "nth" ) { - - // nth-* requires argument - if ( !match[ 3 ] ) { - Sizzle.error( match[ 0 ] ); - } - - // numeric x and y parameters for Expr.filter.CHILD - // remember that false/true cast respectively to 0/1 - match[ 4 ] = +( match[ 4 ] ? - match[ 5 ] + ( match[ 6 ] || 1 ) : - 2 * ( match[ 3 ] === "even" || match[ 3 ] === "odd" ) ); - match[ 5 ] = +( ( match[ 7 ] + match[ 8 ] ) || match[ 3 ] === "odd" ); - - // other types prohibit arguments - } else if ( match[ 3 ] ) { - Sizzle.error( match[ 0 ] ); - } - - return match; - }, - - "PSEUDO": function( match ) { - var excess, - unquoted = !match[ 6 ] && match[ 2 ]; - - if ( matchExpr[ "CHILD" ].test( match[ 0 ] ) ) { - return null; - } - - // Accept quoted arguments as-is - if ( match[ 3 ] ) { - match[ 2 ] = match[ 4 ] || match[ 5 ] || ""; - - // Strip excess characters from unquoted arguments - } else if ( unquoted && rpseudo.test( unquoted ) && - - // Get excess from tokenize (recursively) - ( excess = tokenize( unquoted, true ) ) && - - // advance to the next closing parenthesis - ( excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length ) ) { - - // excess is a negative index - match[ 0 ] = match[ 0 ].slice( 0, excess ); - match[ 2 ] = unquoted.slice( 0, excess ); - } - - // Return only captures needed by the pseudo filter method (type and argument) - return match.slice( 0, 3 ); - } - }, - - filter: { - - "TAG": function( nodeNameSelector ) { - var nodeName = nodeNameSelector.replace( runescape, funescape ).toLowerCase(); - return nodeNameSelector === "*" ? - function() { - return true; - } : - function( elem ) { - return elem.nodeName && elem.nodeName.toLowerCase() === nodeName; - }; - }, - - "CLASS": function( className ) { - var pattern = classCache[ className + " " ]; - - return pattern || - ( pattern = new RegExp( "(^|" + whitespace + - ")" + className + "(" + whitespace + "|$)" ) ) && classCache( - className, function( elem ) { - return pattern.test( - typeof elem.className === "string" && elem.className || - typeof elem.getAttribute !== "undefined" && - elem.getAttribute( "class" ) || - "" - ); - } ); - }, - - "ATTR": function( name, operator, check ) { - return function( elem ) { - var result = Sizzle.attr( elem, name ); - - if ( result == null ) { - return operator === "!="; - } - if ( !operator ) { - return true; - } - - result += ""; - - /* eslint-disable max-len */ - - return operator === "=" ? result === check : - operator === "!=" ? result !== check : - operator === "^=" ? check && result.indexOf( check ) === 0 : - operator === "*=" ? check && result.indexOf( check ) > -1 : - operator === "$=" ? check && result.slice( -check.length ) === check : - operator === "~=" ? ( " " + result.replace( rwhitespace, " " ) + " " ).indexOf( check ) > -1 : - operator === "|=" ? result === check || result.slice( 0, check.length + 1 ) === check + "-" : - false; - /* eslint-enable max-len */ - - }; - }, - - "CHILD": function( type, what, _argument, first, last ) { - var simple = type.slice( 0, 3 ) !== "nth", - forward = type.slice( -4 ) !== "last", - ofType = what === "of-type"; - - return first === 1 && last === 0 ? - - // Shortcut for :nth-*(n) - function( elem ) { - return !!elem.parentNode; - } : - - function( elem, _context, xml ) { - var cache, uniqueCache, outerCache, node, nodeIndex, start, - dir = simple !== forward ? "nextSibling" : "previousSibling", - parent = elem.parentNode, - name = ofType && elem.nodeName.toLowerCase(), - useCache = !xml && !ofType, - diff = false; - - if ( parent ) { - - // :(first|last|only)-(child|of-type) - if ( simple ) { - while ( dir ) { - node = elem; - while ( ( node = node[ dir ] ) ) { - if ( ofType ? - node.nodeName.toLowerCase() === name : - node.nodeType === 1 ) { - - return false; - } - } - - // Reverse direction for :only-* (if we haven't yet done so) - start = dir = type === "only" && !start && "nextSibling"; - } - return true; - } - - start = [ forward ? parent.firstChild : parent.lastChild ]; - - // non-xml :nth-child(...) stores cache data on `parent` - if ( forward && useCache ) { - - // Seek `elem` from a previously-cached index - - // ...in a gzip-friendly way - node = parent; - outerCache = node[ expando ] || ( node[ expando ] = {} ); - - // Support: IE <9 only - // Defend against cloned attroperties (jQuery gh-1709) - uniqueCache = outerCache[ node.uniqueID ] || - ( outerCache[ node.uniqueID ] = {} ); - - cache = uniqueCache[ type ] || []; - nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ]; - diff = nodeIndex && cache[ 2 ]; - node = nodeIndex && parent.childNodes[ nodeIndex ]; - - while ( ( node = ++nodeIndex && node && node[ dir ] || - - // Fallback to seeking `elem` from the start - ( diff = nodeIndex = 0 ) || start.pop() ) ) { - - // When found, cache indexes on `parent` and break - if ( node.nodeType === 1 && ++diff && node === elem ) { - uniqueCache[ type ] = [ dirruns, nodeIndex, diff ]; - break; - } - } - - } else { - - // Use previously-cached element index if available - if ( useCache ) { - - // ...in a gzip-friendly way - node = elem; - outerCache = node[ expando ] || ( node[ expando ] = {} ); - - // Support: IE <9 only - // Defend against cloned attroperties (jQuery gh-1709) - uniqueCache = outerCache[ node.uniqueID ] || - ( outerCache[ node.uniqueID ] = {} ); - - cache = uniqueCache[ type ] || []; - nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ]; - diff = nodeIndex; - } - - // xml :nth-child(...) - // or :nth-last-child(...) or :nth(-last)?-of-type(...) - if ( diff === false ) { - - // Use the same loop as above to seek `elem` from the start - while ( ( node = ++nodeIndex && node && node[ dir ] || - ( diff = nodeIndex = 0 ) || start.pop() ) ) { - - if ( ( ofType ? - node.nodeName.toLowerCase() === name : - node.nodeType === 1 ) && - ++diff ) { - - // Cache the index of each encountered element - if ( useCache ) { - outerCache = node[ expando ] || - ( node[ expando ] = {} ); - - // Support: IE <9 only - // Defend against cloned attroperties (jQuery gh-1709) - uniqueCache = outerCache[ node.uniqueID ] || - ( outerCache[ node.uniqueID ] = {} ); - - uniqueCache[ type ] = [ dirruns, diff ]; - } - - if ( node === elem ) { - break; - } - } - } - } - } - - // Incorporate the offset, then check against cycle size - diff -= last; - return diff === first || ( diff % first === 0 && diff / first >= 0 ); - } - }; - }, - - "PSEUDO": function( pseudo, argument ) { - - // pseudo-class names are case-insensitive - // http://www.w3.org/TR/selectors/#pseudo-classes - // Prioritize by case sensitivity in case custom pseudos are added with uppercase letters - // Remember that setFilters inherits from pseudos - var args, - fn = Expr.pseudos[ pseudo ] || Expr.setFilters[ pseudo.toLowerCase() ] || - Sizzle.error( "unsupported pseudo: " + pseudo ); - - // The user may use createPseudo to indicate that - // arguments are needed to create the filter function - // just as Sizzle does - if ( fn[ expando ] ) { - return fn( argument ); - } - - // But maintain support for old signatures - if ( fn.length > 1 ) { - args = [ pseudo, pseudo, "", argument ]; - return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ? - markFunction( function( seed, matches ) { - var idx, - matched = fn( seed, argument ), - i = matched.length; - while ( i-- ) { - idx = indexOf( seed, matched[ i ] ); - seed[ idx ] = !( matches[ idx ] = matched[ i ] ); - } - } ) : - function( elem ) { - return fn( elem, 0, args ); - }; - } - - return fn; - } - }, - - pseudos: { - - // Potentially complex pseudos - "not": markFunction( function( selector ) { - - // Trim the selector passed to compile - // to avoid treating leading and trailing - // spaces as combinators - var input = [], - results = [], - matcher = compile( selector.replace( rtrim, "$1" ) ); - - return matcher[ expando ] ? - markFunction( function( seed, matches, _context, xml ) { - var elem, - unmatched = matcher( seed, null, xml, [] ), - i = seed.length; - - // Match elements unmatched by `matcher` - while ( i-- ) { - if ( ( elem = unmatched[ i ] ) ) { - seed[ i ] = !( matches[ i ] = elem ); - } - } - } ) : - function( elem, _context, xml ) { - input[ 0 ] = elem; - matcher( input, null, xml, results ); - - // Don't keep the element (issue #299) - input[ 0 ] = null; - return !results.pop(); - }; - } ), - - "has": markFunction( function( selector ) { - return function( elem ) { - return Sizzle( selector, elem ).length > 0; - }; - } ), - - "contains": markFunction( function( text ) { - text = text.replace( runescape, funescape ); - return function( elem ) { - return ( elem.textContent || getText( elem ) ).indexOf( text ) > -1; - }; - } ), - - // "Whether an element is represented by a :lang() selector - // is based solely on the element's language value - // being equal to the identifier C, - // or beginning with the identifier C immediately followed by "-". - // The matching of C against the element's language value is performed case-insensitively. - // The identifier C does not have to be a valid language name." - // http://www.w3.org/TR/selectors/#lang-pseudo - "lang": markFunction( function( lang ) { - - // lang value must be a valid identifier - if ( !ridentifier.test( lang || "" ) ) { - Sizzle.error( "unsupported lang: " + lang ); - } - lang = lang.replace( runescape, funescape ).toLowerCase(); - return function( elem ) { - var elemLang; - do { - if ( ( elemLang = documentIsHTML ? - elem.lang : - elem.getAttribute( "xml:lang" ) || elem.getAttribute( "lang" ) ) ) { - - elemLang = elemLang.toLowerCase(); - return elemLang === lang || elemLang.indexOf( lang + "-" ) === 0; - } - } while ( ( elem = elem.parentNode ) && elem.nodeType === 1 ); - return false; - }; - } ), - - // Miscellaneous - "target": function( elem ) { - var hash = window.location && window.location.hash; - return hash && hash.slice( 1 ) === elem.id; - }, - - "root": function( elem ) { - return elem === docElem; - }, - - "focus": function( elem ) { - return elem === document.activeElement && - ( !document.hasFocus || document.hasFocus() ) && - !!( elem.type || elem.href || ~elem.tabIndex ); - }, - - // Boolean properties - "enabled": createDisabledPseudo( false ), - "disabled": createDisabledPseudo( true ), - - "checked": function( elem ) { - - // In CSS3, :checked should return both checked and selected elements - // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked - var nodeName = elem.nodeName.toLowerCase(); - return ( nodeName === "input" && !!elem.checked ) || - ( nodeName === "option" && !!elem.selected ); - }, - - "selected": function( elem ) { - - // Accessing this property makes selected-by-default - // options in Safari work properly - if ( elem.parentNode ) { - // eslint-disable-next-line no-unused-expressions - elem.parentNode.selectedIndex; - } - - return elem.selected === true; - }, - - // Contents - "empty": function( elem ) { - - // http://www.w3.org/TR/selectors/#empty-pseudo - // :empty is negated by element (1) or content nodes (text: 3; cdata: 4; entity ref: 5), - // but not by others (comment: 8; processing instruction: 7; etc.) - // nodeType < 6 works because attributes (2) do not appear as children - for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) { - if ( elem.nodeType < 6 ) { - return false; - } - } - return true; - }, - - "parent": function( elem ) { - return !Expr.pseudos[ "empty" ]( elem ); - }, - - // Element/input types - "header": function( elem ) { - return rheader.test( elem.nodeName ); - }, - - "input": function( elem ) { - return rinputs.test( elem.nodeName ); - }, - - "button": function( elem ) { - var name = elem.nodeName.toLowerCase(); - return name === "input" && elem.type === "button" || name === "button"; - }, - - "text": function( elem ) { - var attr; - return elem.nodeName.toLowerCase() === "input" && - elem.type === "text" && - - // Support: IE<8 - // New HTML5 attribute values (e.g., "search") appear with elem.type === "text" - ( ( attr = elem.getAttribute( "type" ) ) == null || - attr.toLowerCase() === "text" ); - }, - - // Position-in-collection - "first": createPositionalPseudo( function() { - return [ 0 ]; - } ), - - "last": createPositionalPseudo( function( _matchIndexes, length ) { - return [ length - 1 ]; - } ), - - "eq": createPositionalPseudo( function( _matchIndexes, length, argument ) { - return [ argument < 0 ? argument + length : argument ]; - } ), - - "even": createPositionalPseudo( function( matchIndexes, length ) { - var i = 0; - for ( ; i < length; i += 2 ) { - matchIndexes.push( i ); - } - return matchIndexes; - } ), - - "odd": createPositionalPseudo( function( matchIndexes, length ) { - var i = 1; - for ( ; i < length; i += 2 ) { - matchIndexes.push( i ); - } - return matchIndexes; - } ), - - "lt": createPositionalPseudo( function( matchIndexes, length, argument ) { - var i = argument < 0 ? - argument + length : - argument > length ? - length : - argument; - for ( ; --i >= 0; ) { - matchIndexes.push( i ); - } - return matchIndexes; - } ), - - "gt": createPositionalPseudo( function( matchIndexes, length, argument ) { - var i = argument < 0 ? argument + length : argument; - for ( ; ++i < length; ) { - matchIndexes.push( i ); - } - return matchIndexes; - } ) - } -}; - -Expr.pseudos[ "nth" ] = Expr.pseudos[ "eq" ]; - -// Add button/input type pseudos -for ( i in { radio: true, checkbox: true, file: true, password: true, image: true } ) { - Expr.pseudos[ i ] = createInputPseudo( i ); -} -for ( i in { submit: true, reset: true } ) { - Expr.pseudos[ i ] = createButtonPseudo( i ); -} - -// Easy API for creating new setFilters -function setFilters() {} -setFilters.prototype = Expr.filters = Expr.pseudos; -Expr.setFilters = new setFilters(); - -tokenize = Sizzle.tokenize = function( selector, parseOnly ) { - var matched, match, tokens, type, - soFar, groups, preFilters, - cached = tokenCache[ selector + " " ]; - - if ( cached ) { - return parseOnly ? 0 : cached.slice( 0 ); - } - - soFar = selector; - groups = []; - preFilters = Expr.preFilter; - - while ( soFar ) { - - // Comma and first run - if ( !matched || ( match = rcomma.exec( soFar ) ) ) { - if ( match ) { - - // Don't consume trailing commas as valid - soFar = soFar.slice( match[ 0 ].length ) || soFar; - } - groups.push( ( tokens = [] ) ); - } - - matched = false; - - // Combinators - if ( ( match = rcombinators.exec( soFar ) ) ) { - matched = match.shift(); - tokens.push( { - value: matched, - - // Cast descendant combinators to space - type: match[ 0 ].replace( rtrim, " " ) - } ); - soFar = soFar.slice( matched.length ); - } - - // Filters - for ( type in Expr.filter ) { - if ( ( match = matchExpr[ type ].exec( soFar ) ) && ( !preFilters[ type ] || - ( match = preFilters[ type ]( match ) ) ) ) { - matched = match.shift(); - tokens.push( { - value: matched, - type: type, - matches: match - } ); - soFar = soFar.slice( matched.length ); - } - } - - if ( !matched ) { - break; - } - } - - // Return the length of the invalid excess - // if we're just parsing - // Otherwise, throw an error or return tokens - return parseOnly ? - soFar.length : - soFar ? - Sizzle.error( selector ) : - - // Cache the tokens - tokenCache( selector, groups ).slice( 0 ); -}; - -function toSelector( tokens ) { - var i = 0, - len = tokens.length, - selector = ""; - for ( ; i < len; i++ ) { - selector += tokens[ i ].value; - } - return selector; -} - -function addCombinator( matcher, combinator, base ) { - var dir = combinator.dir, - skip = combinator.next, - key = skip || dir, - checkNonElements = base && key === "parentNode", - doneName = done++; - - return combinator.first ? - - // Check against closest ancestor/preceding element - function( elem, context, xml ) { - while ( ( elem = elem[ dir ] ) ) { - if ( elem.nodeType === 1 || checkNonElements ) { - return matcher( elem, context, xml ); - } - } - return false; - } : - - // Check against all ancestor/preceding elements - function( elem, context, xml ) { - var oldCache, uniqueCache, outerCache, - newCache = [ dirruns, doneName ]; - - // We can't set arbitrary data on XML nodes, so they don't benefit from combinator caching - if ( xml ) { - while ( ( elem = elem[ dir ] ) ) { - if ( elem.nodeType === 1 || checkNonElements ) { - if ( matcher( elem, context, xml ) ) { - return true; - } - } - } - } else { - while ( ( elem = elem[ dir ] ) ) { - if ( elem.nodeType === 1 || checkNonElements ) { - outerCache = elem[ expando ] || ( elem[ expando ] = {} ); - - // Support: IE <9 only - // Defend against cloned attroperties (jQuery gh-1709) - uniqueCache = outerCache[ elem.uniqueID ] || - ( outerCache[ elem.uniqueID ] = {} ); - - if ( skip && skip === elem.nodeName.toLowerCase() ) { - elem = elem[ dir ] || elem; - } else if ( ( oldCache = uniqueCache[ key ] ) && - oldCache[ 0 ] === dirruns && oldCache[ 1 ] === doneName ) { - - // Assign to newCache so results back-propagate to previous elements - return ( newCache[ 2 ] = oldCache[ 2 ] ); - } else { - - // Reuse newcache so results back-propagate to previous elements - uniqueCache[ key ] = newCache; - - // A match means we're done; a fail means we have to keep checking - if ( ( newCache[ 2 ] = matcher( elem, context, xml ) ) ) { - return true; - } - } - } - } - } - return false; - }; -} - -function elementMatcher( matchers ) { - return matchers.length > 1 ? - function( elem, context, xml ) { - var i = matchers.length; - while ( i-- ) { - if ( !matchers[ i ]( elem, context, xml ) ) { - return false; - } - } - return true; - } : - matchers[ 0 ]; -} - -function multipleContexts( selector, contexts, results ) { - var i = 0, - len = contexts.length; - for ( ; i < len; i++ ) { - Sizzle( selector, contexts[ i ], results ); - } - return results; -} - -function condense( unmatched, map, filter, context, xml ) { - var elem, - newUnmatched = [], - i = 0, - len = unmatched.length, - mapped = map != null; - - for ( ; i < len; i++ ) { - if ( ( elem = unmatched[ i ] ) ) { - if ( !filter || filter( elem, context, xml ) ) { - newUnmatched.push( elem ); - if ( mapped ) { - map.push( i ); - } - } - } - } - - return newUnmatched; -} - -function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postSelector ) { - if ( postFilter && !postFilter[ expando ] ) { - postFilter = setMatcher( postFilter ); - } - if ( postFinder && !postFinder[ expando ] ) { - postFinder = setMatcher( postFinder, postSelector ); - } - return markFunction( function( seed, results, context, xml ) { - var temp, i, elem, - preMap = [], - postMap = [], - preexisting = results.length, - - // Get initial elements from seed or context - elems = seed || multipleContexts( - selector || "*", - context.nodeType ? [ context ] : context, - [] - ), - - // Prefilter to get matcher input, preserving a map for seed-results synchronization - matcherIn = preFilter && ( seed || !selector ) ? - condense( elems, preMap, preFilter, context, xml ) : - elems, - - matcherOut = matcher ? - - // If we have a postFinder, or filtered seed, or non-seed postFilter or preexisting results, - postFinder || ( seed ? preFilter : preexisting || postFilter ) ? - - // ...intermediate processing is necessary - [] : - - // ...otherwise use results directly - results : - matcherIn; - - // Find primary matches - if ( matcher ) { - matcher( matcherIn, matcherOut, context, xml ); - } - - // Apply postFilter - if ( postFilter ) { - temp = condense( matcherOut, postMap ); - postFilter( temp, [], context, xml ); - - // Un-match failing elements by moving them back to matcherIn - i = temp.length; - while ( i-- ) { - if ( ( elem = temp[ i ] ) ) { - matcherOut[ postMap[ i ] ] = !( matcherIn[ postMap[ i ] ] = elem ); - } - } - } - - if ( seed ) { - if ( postFinder || preFilter ) { - if ( postFinder ) { - - // Get the final matcherOut by condensing this intermediate into postFinder contexts - temp = []; - i = matcherOut.length; - while ( i-- ) { - if ( ( elem = matcherOut[ i ] ) ) { - - // Restore matcherIn since elem is not yet a final match - temp.push( ( matcherIn[ i ] = elem ) ); - } - } - postFinder( null, ( matcherOut = [] ), temp, xml ); - } - - // Move matched elements from seed to results to keep them synchronized - i = matcherOut.length; - while ( i-- ) { - if ( ( elem = matcherOut[ i ] ) && - ( temp = postFinder ? indexOf( seed, elem ) : preMap[ i ] ) > -1 ) { - - seed[ temp ] = !( results[ temp ] = elem ); - } - } - } - - // Add elements to results, through postFinder if defined - } else { - matcherOut = condense( - matcherOut === results ? - matcherOut.splice( preexisting, matcherOut.length ) : - matcherOut - ); - if ( postFinder ) { - postFinder( null, results, matcherOut, xml ); - } else { - push.apply( results, matcherOut ); - } - } - } ); -} - -function matcherFromTokens( tokens ) { - var checkContext, matcher, j, - len = tokens.length, - leadingRelative = Expr.relative[ tokens[ 0 ].type ], - implicitRelative = leadingRelative || Expr.relative[ " " ], - i = leadingRelative ? 1 : 0, - - // The foundational matcher ensures that elements are reachable from top-level context(s) - matchContext = addCombinator( function( elem ) { - return elem === checkContext; - }, implicitRelative, true ), - matchAnyContext = addCombinator( function( elem ) { - return indexOf( checkContext, elem ) > -1; - }, implicitRelative, true ), - matchers = [ function( elem, context, xml ) { - var ret = ( !leadingRelative && ( xml || context !== outermostContext ) ) || ( - ( checkContext = context ).nodeType ? - matchContext( elem, context, xml ) : - matchAnyContext( elem, context, xml ) ); - - // Avoid hanging onto element (issue #299) - checkContext = null; - return ret; - } ]; - - for ( ; i < len; i++ ) { - if ( ( matcher = Expr.relative[ tokens[ i ].type ] ) ) { - matchers = [ addCombinator( elementMatcher( matchers ), matcher ) ]; - } else { - matcher = Expr.filter[ tokens[ i ].type ].apply( null, tokens[ i ].matches ); - - // Return special upon seeing a positional matcher - if ( matcher[ expando ] ) { - - // Find the next relative operator (if any) for proper handling - j = ++i; - for ( ; j < len; j++ ) { - if ( Expr.relative[ tokens[ j ].type ] ) { - break; - } - } - return setMatcher( - i > 1 && elementMatcher( matchers ), - i > 1 && toSelector( - - // If the preceding token was a descendant combinator, insert an implicit any-element `*` - tokens - .slice( 0, i - 1 ) - .concat( { value: tokens[ i - 2 ].type === " " ? "*" : "" } ) - ).replace( rtrim, "$1" ), - matcher, - i < j && matcherFromTokens( tokens.slice( i, j ) ), - j < len && matcherFromTokens( ( tokens = tokens.slice( j ) ) ), - j < len && toSelector( tokens ) - ); - } - matchers.push( matcher ); - } - } - - return elementMatcher( matchers ); -} - -function matcherFromGroupMatchers( elementMatchers, setMatchers ) { - var bySet = setMatchers.length > 0, - byElement = elementMatchers.length > 0, - superMatcher = function( seed, context, xml, results, outermost ) { - var elem, j, matcher, - matchedCount = 0, - i = "0", - unmatched = seed && [], - setMatched = [], - contextBackup = outermostContext, - - // We must always have either seed elements or outermost context - elems = seed || byElement && Expr.find[ "TAG" ]( "*", outermost ), - - // Use integer dirruns iff this is the outermost matcher - dirrunsUnique = ( dirruns += contextBackup == null ? 1 : Math.random() || 0.1 ), - len = elems.length; - - if ( outermost ) { - - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - outermostContext = context == document || context || outermost; - } - - // Add elements passing elementMatchers directly to results - // Support: IE<9, Safari - // Tolerate NodeList properties (IE: "length"; Safari: ) matching elements by id - for ( ; i !== len && ( elem = elems[ i ] ) != null; i++ ) { - if ( byElement && elem ) { - j = 0; - - // Support: IE 11+, Edge 17 - 18+ - // IE/Edge sometimes throw a "Permission denied" error when strict-comparing - // two documents; shallow comparisons work. - // eslint-disable-next-line eqeqeq - if ( !context && elem.ownerDocument != document ) { - setDocument( elem ); - xml = !documentIsHTML; - } - while ( ( matcher = elementMatchers[ j++ ] ) ) { - if ( matcher( elem, context || document, xml ) ) { - results.push( elem ); - break; - } - } - if ( outermost ) { - dirruns = dirrunsUnique; - } - } - - // Track unmatched elements for set filters - if ( bySet ) { - - // They will have gone through all possible matchers - if ( ( elem = !matcher && elem ) ) { - matchedCount--; - } - - // Lengthen the array for every element, matched or not - if ( seed ) { - unmatched.push( elem ); - } - } - } - - // `i` is now the count of elements visited above, and adding it to `matchedCount` - // makes the latter nonnegative. - matchedCount += i; - - // Apply set filters to unmatched elements - // NOTE: This can be skipped if there are no unmatched elements (i.e., `matchedCount` - // equals `i`), unless we didn't visit _any_ elements in the above loop because we have - // no element matchers and no seed. - // Incrementing an initially-string "0" `i` allows `i` to remain a string only in that - // case, which will result in a "00" `matchedCount` that differs from `i` but is also - // numerically zero. - if ( bySet && i !== matchedCount ) { - j = 0; - while ( ( matcher = setMatchers[ j++ ] ) ) { - matcher( unmatched, setMatched, context, xml ); - } - - if ( seed ) { - - // Reintegrate element matches to eliminate the need for sorting - if ( matchedCount > 0 ) { - while ( i-- ) { - if ( !( unmatched[ i ] || setMatched[ i ] ) ) { - setMatched[ i ] = pop.call( results ); - } - } - } - - // Discard index placeholder values to get only actual matches - setMatched = condense( setMatched ); - } - - // Add matches to results - push.apply( results, setMatched ); - - // Seedless set matches succeeding multiple successful matchers stipulate sorting - if ( outermost && !seed && setMatched.length > 0 && - ( matchedCount + setMatchers.length ) > 1 ) { - - Sizzle.uniqueSort( results ); - } - } - - // Override manipulation of globals by nested matchers - if ( outermost ) { - dirruns = dirrunsUnique; - outermostContext = contextBackup; - } - - return unmatched; - }; - - return bySet ? - markFunction( superMatcher ) : - superMatcher; -} - -compile = Sizzle.compile = function( selector, match /* Internal Use Only */ ) { - var i, - setMatchers = [], - elementMatchers = [], - cached = compilerCache[ selector + " " ]; - - if ( !cached ) { - - // Generate a function of recursive functions that can be used to check each element - if ( !match ) { - match = tokenize( selector ); - } - i = match.length; - while ( i-- ) { - cached = matcherFromTokens( match[ i ] ); - if ( cached[ expando ] ) { - setMatchers.push( cached ); - } else { - elementMatchers.push( cached ); - } - } - - // Cache the compiled function - cached = compilerCache( - selector, - matcherFromGroupMatchers( elementMatchers, setMatchers ) - ); - - // Save selector and tokenization - cached.selector = selector; - } - return cached; -}; - -/** - * A low-level selection function that works with Sizzle's compiled - * selector functions - * @param {String|Function} selector A selector or a pre-compiled - * selector function built with Sizzle.compile - * @param {Element} context - * @param {Array} [results] - * @param {Array} [seed] A set of elements to match against - */ -select = Sizzle.select = function( selector, context, results, seed ) { - var i, tokens, token, type, find, - compiled = typeof selector === "function" && selector, - match = !seed && tokenize( ( selector = compiled.selector || selector ) ); - - results = results || []; - - // Try to minimize operations if there is only one selector in the list and no seed - // (the latter of which guarantees us context) - if ( match.length === 1 ) { - - // Reduce context if the leading compound selector is an ID - tokens = match[ 0 ] = match[ 0 ].slice( 0 ); - if ( tokens.length > 2 && ( token = tokens[ 0 ] ).type === "ID" && - context.nodeType === 9 && documentIsHTML && Expr.relative[ tokens[ 1 ].type ] ) { - - context = ( Expr.find[ "ID" ]( token.matches[ 0 ] - .replace( runescape, funescape ), context ) || [] )[ 0 ]; - if ( !context ) { - return results; - - // Precompiled matchers will still verify ancestry, so step up a level - } else if ( compiled ) { - context = context.parentNode; - } - - selector = selector.slice( tokens.shift().value.length ); - } - - // Fetch a seed set for right-to-left matching - i = matchExpr[ "needsContext" ].test( selector ) ? 0 : tokens.length; - while ( i-- ) { - token = tokens[ i ]; - - // Abort if we hit a combinator - if ( Expr.relative[ ( type = token.type ) ] ) { - break; - } - if ( ( find = Expr.find[ type ] ) ) { - - // Search, expanding context for leading sibling combinators - if ( ( seed = find( - token.matches[ 0 ].replace( runescape, funescape ), - rsibling.test( tokens[ 0 ].type ) && testContext( context.parentNode ) || - context - ) ) ) { - - // If seed is empty or no tokens remain, we can return early - tokens.splice( i, 1 ); - selector = seed.length && toSelector( tokens ); - if ( !selector ) { - push.apply( results, seed ); - return results; - } - - break; - } - } - } - } - - // Compile and execute a filtering function if one is not provided - // Provide `match` to avoid retokenization if we modified the selector above - ( compiled || compile( selector, match ) )( - seed, - context, - !documentIsHTML, - results, - !context || rsibling.test( selector ) && testContext( context.parentNode ) || context - ); - return results; -}; - -// One-time assignments - -// Sort stability -support.sortStable = expando.split( "" ).sort( sortOrder ).join( "" ) === expando; - -// Support: Chrome 14-35+ -// Always assume duplicates if they aren't passed to the comparison function -support.detectDuplicates = !!hasDuplicate; - -// Initialize against the default document -setDocument(); - -// Support: Webkit<537.32 - Safari 6.0.3/Chrome 25 (fixed in Chrome 27) -// Detached nodes confoundingly follow *each other* -support.sortDetached = assert( function( el ) { - - // Should return 1, but returns 4 (following) - return el.compareDocumentPosition( document.createElement( "fieldset" ) ) & 1; -} ); - -// Support: IE<8 -// Prevent attribute/property "interpolation" -// https://msdn.microsoft.com/en-us/library/ms536429%28VS.85%29.aspx -if ( !assert( function( el ) { - el.innerHTML = ""; - return el.firstChild.getAttribute( "href" ) === "#"; -} ) ) { - addHandle( "type|href|height|width", function( elem, name, isXML ) { - if ( !isXML ) { - return elem.getAttribute( name, name.toLowerCase() === "type" ? 1 : 2 ); - } - } ); -} - -// Support: IE<9 -// Use defaultValue in place of getAttribute("value") -if ( !support.attributes || !assert( function( el ) { - el.innerHTML = ""; - el.firstChild.setAttribute( "value", "" ); - return el.firstChild.getAttribute( "value" ) === ""; -} ) ) { - addHandle( "value", function( elem, _name, isXML ) { - if ( !isXML && elem.nodeName.toLowerCase() === "input" ) { - return elem.defaultValue; - } - } ); -} - -// Support: IE<9 -// Use getAttributeNode to fetch booleans when getAttribute lies -if ( !assert( function( el ) { - return el.getAttribute( "disabled" ) == null; -} ) ) { - addHandle( booleans, function( elem, name, isXML ) { - var val; - if ( !isXML ) { - return elem[ name ] === true ? name.toLowerCase() : - ( val = elem.getAttributeNode( name ) ) && val.specified ? - val.value : - null; - } - } ); -} - -return Sizzle; - -} )( window ); - - - -jQuery.find = Sizzle; -jQuery.expr = Sizzle.selectors; - -// Deprecated -jQuery.expr[ ":" ] = jQuery.expr.pseudos; -jQuery.uniqueSort = jQuery.unique = Sizzle.uniqueSort; -jQuery.text = Sizzle.getText; -jQuery.isXMLDoc = Sizzle.isXML; -jQuery.contains = Sizzle.contains; -jQuery.escapeSelector = Sizzle.escape; - - - - -var dir = function( elem, dir, until ) { - var matched = [], - truncate = until !== undefined; - - while ( ( elem = elem[ dir ] ) && elem.nodeType !== 9 ) { - if ( elem.nodeType === 1 ) { - if ( truncate && jQuery( elem ).is( until ) ) { - break; - } - matched.push( elem ); - } - } - return matched; -}; - - -var siblings = function( n, elem ) { - var matched = []; - - for ( ; n; n = n.nextSibling ) { - if ( n.nodeType === 1 && n !== elem ) { - matched.push( n ); - } - } - - return matched; -}; - - -var rneedsContext = jQuery.expr.match.needsContext; - - - -function nodeName( elem, name ) { - - return elem.nodeName && elem.nodeName.toLowerCase() === name.toLowerCase(); - -} -var rsingleTag = ( /^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i ); - - - -// Implement the identical functionality for filter and not -function winnow( elements, qualifier, not ) { - if ( isFunction( qualifier ) ) { - return jQuery.grep( elements, function( elem, i ) { - return !!qualifier.call( elem, i, elem ) !== not; - } ); - } - - // Single element - if ( qualifier.nodeType ) { - return jQuery.grep( elements, function( elem ) { - return ( elem === qualifier ) !== not; - } ); - } - - // Arraylike of elements (jQuery, arguments, Array) - if ( typeof qualifier !== "string" ) { - return jQuery.grep( elements, function( elem ) { - return ( indexOf.call( qualifier, elem ) > -1 ) !== not; - } ); - } - - // Filtered directly for both simple and complex selectors - return jQuery.filter( qualifier, elements, not ); -} - -jQuery.filter = function( expr, elems, not ) { - var elem = elems[ 0 ]; - - if ( not ) { - expr = ":not(" + expr + ")"; - } - - if ( elems.length === 1 && elem.nodeType === 1 ) { - return jQuery.find.matchesSelector( elem, expr ) ? [ elem ] : []; - } - - return jQuery.find.matches( expr, jQuery.grep( elems, function( elem ) { - return elem.nodeType === 1; - } ) ); -}; - -jQuery.fn.extend( { - find: function( selector ) { - var i, ret, - len = this.length, - self = this; - - if ( typeof selector !== "string" ) { - return this.pushStack( jQuery( selector ).filter( function() { - for ( i = 0; i < len; i++ ) { - if ( jQuery.contains( self[ i ], this ) ) { - return true; - } - } - } ) ); - } - - ret = this.pushStack( [] ); - - for ( i = 0; i < len; i++ ) { - jQuery.find( selector, self[ i ], ret ); - } - - return len > 1 ? jQuery.uniqueSort( ret ) : ret; - }, - filter: function( selector ) { - return this.pushStack( winnow( this, selector || [], false ) ); - }, - not: function( selector ) { - return this.pushStack( winnow( this, selector || [], true ) ); - }, - is: function( selector ) { - return !!winnow( - this, - - // If this is a positional/relative selector, check membership in the returned set - // so $("p:first").is("p:last") won't return true for a doc with two "p". - typeof selector === "string" && rneedsContext.test( selector ) ? - jQuery( selector ) : - selector || [], - false - ).length; - } -} ); - - -// Initialize a jQuery object - - -// A central reference to the root jQuery(document) -var rootjQuery, - - // A simple way to check for HTML strings - // Prioritize #id over to avoid XSS via location.hash (#9521) - // Strict HTML recognition (#11290: must start with <) - // Shortcut simple #id case for speed - rquickExpr = /^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/, - - init = jQuery.fn.init = function( selector, context, root ) { - var match, elem; - - // HANDLE: $(""), $(null), $(undefined), $(false) - if ( !selector ) { - return this; - } - - // Method init() accepts an alternate rootjQuery - // so migrate can support jQuery.sub (gh-2101) - root = root || rootjQuery; - - // Handle HTML strings - if ( typeof selector === "string" ) { - if ( selector[ 0 ] === "<" && - selector[ selector.length - 1 ] === ">" && - selector.length >= 3 ) { - - // Assume that strings that start and end with <> are HTML and skip the regex check - match = [ null, selector, null ]; - - } else { - match = rquickExpr.exec( selector ); - } - - // Match html or make sure no context is specified for #id - if ( match && ( match[ 1 ] || !context ) ) { - - // HANDLE: $(html) -> $(array) - if ( match[ 1 ] ) { - context = context instanceof jQuery ? context[ 0 ] : context; - - // Option to run scripts is true for back-compat - // Intentionally let the error be thrown if parseHTML is not present - jQuery.merge( this, jQuery.parseHTML( - match[ 1 ], - context && context.nodeType ? context.ownerDocument || context : document, - true - ) ); - - // HANDLE: $(html, props) - if ( rsingleTag.test( match[ 1 ] ) && jQuery.isPlainObject( context ) ) { - for ( match in context ) { - - // Properties of context are called as methods if possible - if ( isFunction( this[ match ] ) ) { - this[ match ]( context[ match ] ); - - // ...and otherwise set as attributes - } else { - this.attr( match, context[ match ] ); - } - } - } - - return this; - - // HANDLE: $(#id) - } else { - elem = document.getElementById( match[ 2 ] ); - - if ( elem ) { - - // Inject the element directly into the jQuery object - this[ 0 ] = elem; - this.length = 1; - } - return this; - } - - // HANDLE: $(expr, $(...)) - } else if ( !context || context.jquery ) { - return ( context || root ).find( selector ); - - // HANDLE: $(expr, context) - // (which is just equivalent to: $(context).find(expr) - } else { - return this.constructor( context ).find( selector ); - } - - // HANDLE: $(DOMElement) - } else if ( selector.nodeType ) { - this[ 0 ] = selector; - this.length = 1; - return this; - - // HANDLE: $(function) - // Shortcut for document ready - } else if ( isFunction( selector ) ) { - return root.ready !== undefined ? - root.ready( selector ) : - - // Execute immediately if ready is not present - selector( jQuery ); - } - - return jQuery.makeArray( selector, this ); - }; - -// Give the init function the jQuery prototype for later instantiation -init.prototype = jQuery.fn; - -// Initialize central reference -rootjQuery = jQuery( document ); - - -var rparentsprev = /^(?:parents|prev(?:Until|All))/, - - // Methods guaranteed to produce a unique set when starting from a unique set - guaranteedUnique = { - children: true, - contents: true, - next: true, - prev: true - }; - -jQuery.fn.extend( { - has: function( target ) { - var targets = jQuery( target, this ), - l = targets.length; - - return this.filter( function() { - var i = 0; - for ( ; i < l; i++ ) { - if ( jQuery.contains( this, targets[ i ] ) ) { - return true; - } - } - } ); - }, - - closest: function( selectors, context ) { - var cur, - i = 0, - l = this.length, - matched = [], - targets = typeof selectors !== "string" && jQuery( selectors ); - - // Positional selectors never match, since there's no _selection_ context - if ( !rneedsContext.test( selectors ) ) { - for ( ; i < l; i++ ) { - for ( cur = this[ i ]; cur && cur !== context; cur = cur.parentNode ) { - - // Always skip document fragments - if ( cur.nodeType < 11 && ( targets ? - targets.index( cur ) > -1 : - - // Don't pass non-elements to Sizzle - cur.nodeType === 1 && - jQuery.find.matchesSelector( cur, selectors ) ) ) { - - matched.push( cur ); - break; - } - } - } - } - - return this.pushStack( matched.length > 1 ? jQuery.uniqueSort( matched ) : matched ); - }, - - // Determine the position of an element within the set - index: function( elem ) { - - // No argument, return index in parent - if ( !elem ) { - return ( this[ 0 ] && this[ 0 ].parentNode ) ? this.first().prevAll().length : -1; - } - - // Index in selector - if ( typeof elem === "string" ) { - return indexOf.call( jQuery( elem ), this[ 0 ] ); - } - - // Locate the position of the desired element - return indexOf.call( this, - - // If it receives a jQuery object, the first element is used - elem.jquery ? elem[ 0 ] : elem - ); - }, - - add: function( selector, context ) { - return this.pushStack( - jQuery.uniqueSort( - jQuery.merge( this.get(), jQuery( selector, context ) ) - ) - ); - }, - - addBack: function( selector ) { - return this.add( selector == null ? - this.prevObject : this.prevObject.filter( selector ) - ); - } -} ); - -function sibling( cur, dir ) { - while ( ( cur = cur[ dir ] ) && cur.nodeType !== 1 ) {} - return cur; -} - -jQuery.each( { - parent: function( elem ) { - var parent = elem.parentNode; - return parent && parent.nodeType !== 11 ? parent : null; - }, - parents: function( elem ) { - return dir( elem, "parentNode" ); - }, - parentsUntil: function( elem, _i, until ) { - return dir( elem, "parentNode", until ); - }, - next: function( elem ) { - return sibling( elem, "nextSibling" ); - }, - prev: function( elem ) { - return sibling( elem, "previousSibling" ); - }, - nextAll: function( elem ) { - return dir( elem, "nextSibling" ); - }, - prevAll: function( elem ) { - return dir( elem, "previousSibling" ); - }, - nextUntil: function( elem, _i, until ) { - return dir( elem, "nextSibling", until ); - }, - prevUntil: function( elem, _i, until ) { - return dir( elem, "previousSibling", until ); - }, - siblings: function( elem ) { - return siblings( ( elem.parentNode || {} ).firstChild, elem ); - }, - children: function( elem ) { - return siblings( elem.firstChild ); - }, - contents: function( elem ) { - if ( elem.contentDocument != null && - - // Support: IE 11+ - // elements with no `data` attribute has an object - // `contentDocument` with a `null` prototype. - getProto( elem.contentDocument ) ) { - - return elem.contentDocument; - } - - // Support: IE 9 - 11 only, iOS 7 only, Android Browser <=4.3 only - // Treat the template element as a regular one in browsers that - // don't support it. - if ( nodeName( elem, "template" ) ) { - elem = elem.content || elem; - } - - return jQuery.merge( [], elem.childNodes ); - } -}, function( name, fn ) { - jQuery.fn[ name ] = function( until, selector ) { - var matched = jQuery.map( this, fn, until ); - - if ( name.slice( -5 ) !== "Until" ) { - selector = until; - } - - if ( selector && typeof selector === "string" ) { - matched = jQuery.filter( selector, matched ); - } - - if ( this.length > 1 ) { - - // Remove duplicates - if ( !guaranteedUnique[ name ] ) { - jQuery.uniqueSort( matched ); - } - - // Reverse order for parents* and prev-derivatives - if ( rparentsprev.test( name ) ) { - matched.reverse(); - } - } - - return this.pushStack( matched ); - }; -} ); -var rnothtmlwhite = ( /[^\x20\t\r\n\f]+/g ); - - - -// Convert String-formatted options into Object-formatted ones -function createOptions( options ) { - var object = {}; - jQuery.each( options.match( rnothtmlwhite ) || [], function( _, flag ) { - object[ flag ] = true; - } ); - return object; -} - -/* - * Create a callback list using the following parameters: - * - * options: an optional list of space-separated options that will change how - * the callback list behaves or a more traditional option object - * - * By default a callback list will act like an event callback list and can be - * "fired" multiple times. - * - * Possible options: - * - * once: will ensure the callback list can only be fired once (like a Deferred) - * - * memory: will keep track of previous values and will call any callback added - * after the list has been fired right away with the latest "memorized" - * values (like a Deferred) - * - * unique: will ensure a callback can only be added once (no duplicate in the list) - * - * stopOnFalse: interrupt callings when a callback returns false - * - */ -jQuery.Callbacks = function( options ) { - - // Convert options from String-formatted to Object-formatted if needed - // (we check in cache first) - options = typeof options === "string" ? - createOptions( options ) : - jQuery.extend( {}, options ); - - var // Flag to know if list is currently firing - firing, - - // Last fire value for non-forgettable lists - memory, - - // Flag to know if list was already fired - fired, - - // Flag to prevent firing - locked, - - // Actual callback list - list = [], - - // Queue of execution data for repeatable lists - queue = [], - - // Index of currently firing callback (modified by add/remove as needed) - firingIndex = -1, - - // Fire callbacks - fire = function() { - - // Enforce single-firing - locked = locked || options.once; - - // Execute callbacks for all pending executions, - // respecting firingIndex overrides and runtime changes - fired = firing = true; - for ( ; queue.length; firingIndex = -1 ) { - memory = queue.shift(); - while ( ++firingIndex < list.length ) { - - // Run callback and check for early termination - if ( list[ firingIndex ].apply( memory[ 0 ], memory[ 1 ] ) === false && - options.stopOnFalse ) { - - // Jump to end and forget the data so .add doesn't re-fire - firingIndex = list.length; - memory = false; - } - } - } - - // Forget the data if we're done with it - if ( !options.memory ) { - memory = false; - } - - firing = false; - - // Clean up if we're done firing for good - if ( locked ) { - - // Keep an empty list if we have data for future add calls - if ( memory ) { - list = []; - - // Otherwise, this object is spent - } else { - list = ""; - } - } - }, - - // Actual Callbacks object - self = { - - // Add a callback or a collection of callbacks to the list - add: function() { - if ( list ) { - - // If we have memory from a past run, we should fire after adding - if ( memory && !firing ) { - firingIndex = list.length - 1; - queue.push( memory ); - } - - ( function add( args ) { - jQuery.each( args, function( _, arg ) { - if ( isFunction( arg ) ) { - if ( !options.unique || !self.has( arg ) ) { - list.push( arg ); - } - } else if ( arg && arg.length && toType( arg ) !== "string" ) { - - // Inspect recursively - add( arg ); - } - } ); - } )( arguments ); - - if ( memory && !firing ) { - fire(); - } - } - return this; - }, - - // Remove a callback from the list - remove: function() { - jQuery.each( arguments, function( _, arg ) { - var index; - while ( ( index = jQuery.inArray( arg, list, index ) ) > -1 ) { - list.splice( index, 1 ); - - // Handle firing indexes - if ( index <= firingIndex ) { - firingIndex--; - } - } - } ); - return this; - }, - - // Check if a given callback is in the list. - // If no argument is given, return whether or not list has callbacks attached. - has: function( fn ) { - return fn ? - jQuery.inArray( fn, list ) > -1 : - list.length > 0; - }, - - // Remove all callbacks from the list - empty: function() { - if ( list ) { - list = []; - } - return this; - }, - - // Disable .fire and .add - // Abort any current/pending executions - // Clear all callbacks and values - disable: function() { - locked = queue = []; - list = memory = ""; - return this; - }, - disabled: function() { - return !list; - }, - - // Disable .fire - // Also disable .add unless we have memory (since it would have no effect) - // Abort any pending executions - lock: function() { - locked = queue = []; - if ( !memory && !firing ) { - list = memory = ""; - } - return this; - }, - locked: function() { - return !!locked; - }, - - // Call all callbacks with the given context and arguments - fireWith: function( context, args ) { - if ( !locked ) { - args = args || []; - args = [ context, args.slice ? args.slice() : args ]; - queue.push( args ); - if ( !firing ) { - fire(); - } - } - return this; - }, - - // Call all the callbacks with the given arguments - fire: function() { - self.fireWith( this, arguments ); - return this; - }, - - // To know if the callbacks have already been called at least once - fired: function() { - return !!fired; - } - }; - - return self; -}; - - -function Identity( v ) { - return v; -} -function Thrower( ex ) { - throw ex; -} - -function adoptValue( value, resolve, reject, noValue ) { - var method; - - try { - - // Check for promise aspect first to privilege synchronous behavior - if ( value && isFunction( ( method = value.promise ) ) ) { - method.call( value ).done( resolve ).fail( reject ); - - // Other thenables - } else if ( value && isFunction( ( method = value.then ) ) ) { - method.call( value, resolve, reject ); - - // Other non-thenables - } else { - - // Control `resolve` arguments by letting Array#slice cast boolean `noValue` to integer: - // * false: [ value ].slice( 0 ) => resolve( value ) - // * true: [ value ].slice( 1 ) => resolve() - resolve.apply( undefined, [ value ].slice( noValue ) ); - } - - // For Promises/A+, convert exceptions into rejections - // Since jQuery.when doesn't unwrap thenables, we can skip the extra checks appearing in - // Deferred#then to conditionally suppress rejection. - } catch ( value ) { - - // Support: Android 4.0 only - // Strict mode functions invoked without .call/.apply get global-object context - reject.apply( undefined, [ value ] ); - } -} - -jQuery.extend( { - - Deferred: function( func ) { - var tuples = [ - - // action, add listener, callbacks, - // ... .then handlers, argument index, [final state] - [ "notify", "progress", jQuery.Callbacks( "memory" ), - jQuery.Callbacks( "memory" ), 2 ], - [ "resolve", "done", jQuery.Callbacks( "once memory" ), - jQuery.Callbacks( "once memory" ), 0, "resolved" ], - [ "reject", "fail", jQuery.Callbacks( "once memory" ), - jQuery.Callbacks( "once memory" ), 1, "rejected" ] - ], - state = "pending", - promise = { - state: function() { - return state; - }, - always: function() { - deferred.done( arguments ).fail( arguments ); - return this; - }, - "catch": function( fn ) { - return promise.then( null, fn ); - }, - - // Keep pipe for back-compat - pipe: function( /* fnDone, fnFail, fnProgress */ ) { - var fns = arguments; - - return jQuery.Deferred( function( newDefer ) { - jQuery.each( tuples, function( _i, tuple ) { - - // Map tuples (progress, done, fail) to arguments (done, fail, progress) - var fn = isFunction( fns[ tuple[ 4 ] ] ) && fns[ tuple[ 4 ] ]; - - // deferred.progress(function() { bind to newDefer or newDefer.notify }) - // deferred.done(function() { bind to newDefer or newDefer.resolve }) - // deferred.fail(function() { bind to newDefer or newDefer.reject }) - deferred[ tuple[ 1 ] ]( function() { - var returned = fn && fn.apply( this, arguments ); - if ( returned && isFunction( returned.promise ) ) { - returned.promise() - .progress( newDefer.notify ) - .done( newDefer.resolve ) - .fail( newDefer.reject ); - } else { - newDefer[ tuple[ 0 ] + "With" ]( - this, - fn ? [ returned ] : arguments - ); - } - } ); - } ); - fns = null; - } ).promise(); - }, - then: function( onFulfilled, onRejected, onProgress ) { - var maxDepth = 0; - function resolve( depth, deferred, handler, special ) { - return function() { - var that = this, - args = arguments, - mightThrow = function() { - var returned, then; - - // Support: Promises/A+ section 2.3.3.3.3 - // https://promisesaplus.com/#point-59 - // Ignore double-resolution attempts - if ( depth < maxDepth ) { - return; - } - - returned = handler.apply( that, args ); - - // Support: Promises/A+ section 2.3.1 - // https://promisesaplus.com/#point-48 - if ( returned === deferred.promise() ) { - throw new TypeError( "Thenable self-resolution" ); - } - - // Support: Promises/A+ sections 2.3.3.1, 3.5 - // https://promisesaplus.com/#point-54 - // https://promisesaplus.com/#point-75 - // Retrieve `then` only once - then = returned && - - // Support: Promises/A+ section 2.3.4 - // https://promisesaplus.com/#point-64 - // Only check objects and functions for thenability - ( typeof returned === "object" || - typeof returned === "function" ) && - returned.then; - - // Handle a returned thenable - if ( isFunction( then ) ) { - - // Special processors (notify) just wait for resolution - if ( special ) { - then.call( - returned, - resolve( maxDepth, deferred, Identity, special ), - resolve( maxDepth, deferred, Thrower, special ) - ); - - // Normal processors (resolve) also hook into progress - } else { - - // ...and disregard older resolution values - maxDepth++; - - then.call( - returned, - resolve( maxDepth, deferred, Identity, special ), - resolve( maxDepth, deferred, Thrower, special ), - resolve( maxDepth, deferred, Identity, - deferred.notifyWith ) - ); - } - - // Handle all other returned values - } else { - - // Only substitute handlers pass on context - // and multiple values (non-spec behavior) - if ( handler !== Identity ) { - that = undefined; - args = [ returned ]; - } - - // Process the value(s) - // Default process is resolve - ( special || deferred.resolveWith )( that, args ); - } - }, - - // Only normal processors (resolve) catch and reject exceptions - process = special ? - mightThrow : - function() { - try { - mightThrow(); - } catch ( e ) { - - if ( jQuery.Deferred.exceptionHook ) { - jQuery.Deferred.exceptionHook( e, - process.stackTrace ); - } - - // Support: Promises/A+ section 2.3.3.3.4.1 - // https://promisesaplus.com/#point-61 - // Ignore post-resolution exceptions - if ( depth + 1 >= maxDepth ) { - - // Only substitute handlers pass on context - // and multiple values (non-spec behavior) - if ( handler !== Thrower ) { - that = undefined; - args = [ e ]; - } - - deferred.rejectWith( that, args ); - } - } - }; - - // Support: Promises/A+ section 2.3.3.3.1 - // https://promisesaplus.com/#point-57 - // Re-resolve promises immediately to dodge false rejection from - // subsequent errors - if ( depth ) { - process(); - } else { - - // Call an optional hook to record the stack, in case of exception - // since it's otherwise lost when execution goes async - if ( jQuery.Deferred.getStackHook ) { - process.stackTrace = jQuery.Deferred.getStackHook(); - } - window.setTimeout( process ); - } - }; - } - - return jQuery.Deferred( function( newDefer ) { - - // progress_handlers.add( ... ) - tuples[ 0 ][ 3 ].add( - resolve( - 0, - newDefer, - isFunction( onProgress ) ? - onProgress : - Identity, - newDefer.notifyWith - ) - ); - - // fulfilled_handlers.add( ... ) - tuples[ 1 ][ 3 ].add( - resolve( - 0, - newDefer, - isFunction( onFulfilled ) ? - onFulfilled : - Identity - ) - ); - - // rejected_handlers.add( ... ) - tuples[ 2 ][ 3 ].add( - resolve( - 0, - newDefer, - isFunction( onRejected ) ? - onRejected : - Thrower - ) - ); - } ).promise(); - }, - - // Get a promise for this deferred - // If obj is provided, the promise aspect is added to the object - promise: function( obj ) { - return obj != null ? jQuery.extend( obj, promise ) : promise; - } - }, - deferred = {}; - - // Add list-specific methods - jQuery.each( tuples, function( i, tuple ) { - var list = tuple[ 2 ], - stateString = tuple[ 5 ]; - - // promise.progress = list.add - // promise.done = list.add - // promise.fail = list.add - promise[ tuple[ 1 ] ] = list.add; - - // Handle state - if ( stateString ) { - list.add( - function() { - - // state = "resolved" (i.e., fulfilled) - // state = "rejected" - state = stateString; - }, - - // rejected_callbacks.disable - // fulfilled_callbacks.disable - tuples[ 3 - i ][ 2 ].disable, - - // rejected_handlers.disable - // fulfilled_handlers.disable - tuples[ 3 - i ][ 3 ].disable, - - // progress_callbacks.lock - tuples[ 0 ][ 2 ].lock, - - // progress_handlers.lock - tuples[ 0 ][ 3 ].lock - ); - } - - // progress_handlers.fire - // fulfilled_handlers.fire - // rejected_handlers.fire - list.add( tuple[ 3 ].fire ); - - // deferred.notify = function() { deferred.notifyWith(...) } - // deferred.resolve = function() { deferred.resolveWith(...) } - // deferred.reject = function() { deferred.rejectWith(...) } - deferred[ tuple[ 0 ] ] = function() { - deferred[ tuple[ 0 ] + "With" ]( this === deferred ? undefined : this, arguments ); - return this; - }; - - // deferred.notifyWith = list.fireWith - // deferred.resolveWith = list.fireWith - // deferred.rejectWith = list.fireWith - deferred[ tuple[ 0 ] + "With" ] = list.fireWith; - } ); - - // Make the deferred a promise - promise.promise( deferred ); - - // Call given func if any - if ( func ) { - func.call( deferred, deferred ); - } - - // All done! - return deferred; - }, - - // Deferred helper - when: function( singleValue ) { - var - - // count of uncompleted subordinates - remaining = arguments.length, - - // count of unprocessed arguments - i = remaining, - - // subordinate fulfillment data - resolveContexts = Array( i ), - resolveValues = slice.call( arguments ), - - // the primary Deferred - primary = jQuery.Deferred(), - - // subordinate callback factory - updateFunc = function( i ) { - return function( value ) { - resolveContexts[ i ] = this; - resolveValues[ i ] = arguments.length > 1 ? slice.call( arguments ) : value; - if ( !( --remaining ) ) { - primary.resolveWith( resolveContexts, resolveValues ); - } - }; - }; - - // Single- and empty arguments are adopted like Promise.resolve - if ( remaining <= 1 ) { - adoptValue( singleValue, primary.done( updateFunc( i ) ).resolve, primary.reject, - !remaining ); - - // Use .then() to unwrap secondary thenables (cf. gh-3000) - if ( primary.state() === "pending" || - isFunction( resolveValues[ i ] && resolveValues[ i ].then ) ) { - - return primary.then(); - } - } - - // Multiple arguments are aggregated like Promise.all array elements - while ( i-- ) { - adoptValue( resolveValues[ i ], updateFunc( i ), primary.reject ); - } - - return primary.promise(); - } -} ); - - -// These usually indicate a programmer mistake during development, -// warn about them ASAP rather than swallowing them by default. -var rerrorNames = /^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/; - -jQuery.Deferred.exceptionHook = function( error, stack ) { - - // Support: IE 8 - 9 only - // Console exists when dev tools are open, which can happen at any time - if ( window.console && window.console.warn && error && rerrorNames.test( error.name ) ) { - window.console.warn( "jQuery.Deferred exception: " + error.message, error.stack, stack ); - } -}; - - - - -jQuery.readyException = function( error ) { - window.setTimeout( function() { - throw error; - } ); -}; - - - - -// The deferred used on DOM ready -var readyList = jQuery.Deferred(); - -jQuery.fn.ready = function( fn ) { - - readyList - .then( fn ) - - // Wrap jQuery.readyException in a function so that the lookup - // happens at the time of error handling instead of callback - // registration. - .catch( function( error ) { - jQuery.readyException( error ); - } ); - - return this; -}; - -jQuery.extend( { - - // Is the DOM ready to be used? Set to true once it occurs. - isReady: false, - - // A counter to track how many items to wait for before - // the ready event fires. See #6781 - readyWait: 1, - - // Handle when the DOM is ready - ready: function( wait ) { - - // Abort if there are pending holds or we're already ready - if ( wait === true ? --jQuery.readyWait : jQuery.isReady ) { - return; - } - - // Remember that the DOM is ready - jQuery.isReady = true; - - // If a normal DOM Ready event fired, decrement, and wait if need be - if ( wait !== true && --jQuery.readyWait > 0 ) { - return; - } - - // If there are functions bound, to execute - readyList.resolveWith( document, [ jQuery ] ); - } -} ); - -jQuery.ready.then = readyList.then; - -// The ready event handler and self cleanup method -function completed() { - document.removeEventListener( "DOMContentLoaded", completed ); - window.removeEventListener( "load", completed ); - jQuery.ready(); -} - -// Catch cases where $(document).ready() is called -// after the browser event has already occurred. -// Support: IE <=9 - 10 only -// Older IE sometimes signals "interactive" too soon -if ( document.readyState === "complete" || - ( document.readyState !== "loading" && !document.documentElement.doScroll ) ) { - - // Handle it asynchronously to allow scripts the opportunity to delay ready - window.setTimeout( jQuery.ready ); - -} else { - - // Use the handy event callback - document.addEventListener( "DOMContentLoaded", completed ); - - // A fallback to window.onload, that will always work - window.addEventListener( "load", completed ); -} - - - - -// Multifunctional method to get and set values of a collection -// The value/s can optionally be executed if it's a function -var access = function( elems, fn, key, value, chainable, emptyGet, raw ) { - var i = 0, - len = elems.length, - bulk = key == null; - - // Sets many values - if ( toType( key ) === "object" ) { - chainable = true; - for ( i in key ) { - access( elems, fn, i, key[ i ], true, emptyGet, raw ); - } - - // Sets one value - } else if ( value !== undefined ) { - chainable = true; - - if ( !isFunction( value ) ) { - raw = true; - } - - if ( bulk ) { - - // Bulk operations run against the entire set - if ( raw ) { - fn.call( elems, value ); - fn = null; - - // ...except when executing function values - } else { - bulk = fn; - fn = function( elem, _key, value ) { - return bulk.call( jQuery( elem ), value ); - }; - } - } - - if ( fn ) { - for ( ; i < len; i++ ) { - fn( - elems[ i ], key, raw ? - value : - value.call( elems[ i ], i, fn( elems[ i ], key ) ) - ); - } - } - } - - if ( chainable ) { - return elems; - } - - // Gets - if ( bulk ) { - return fn.call( elems ); - } - - return len ? fn( elems[ 0 ], key ) : emptyGet; -}; - - -// Matches dashed string for camelizing -var rmsPrefix = /^-ms-/, - rdashAlpha = /-([a-z])/g; - -// Used by camelCase as callback to replace() -function fcamelCase( _all, letter ) { - return letter.toUpperCase(); -} - -// Convert dashed to camelCase; used by the css and data modules -// Support: IE <=9 - 11, Edge 12 - 15 -// Microsoft forgot to hump their vendor prefix (#9572) -function camelCase( string ) { - return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase ); -} -var acceptData = function( owner ) { - - // Accepts only: - // - Node - // - Node.ELEMENT_NODE - // - Node.DOCUMENT_NODE - // - Object - // - Any - return owner.nodeType === 1 || owner.nodeType === 9 || !( +owner.nodeType ); -}; - - - - -function Data() { - this.expando = jQuery.expando + Data.uid++; -} - -Data.uid = 1; - -Data.prototype = { - - cache: function( owner ) { - - // Check if the owner object already has a cache - var value = owner[ this.expando ]; - - // If not, create one - if ( !value ) { - value = {}; - - // We can accept data for non-element nodes in modern browsers, - // but we should not, see #8335. - // Always return an empty object. - if ( acceptData( owner ) ) { - - // If it is a node unlikely to be stringify-ed or looped over - // use plain assignment - if ( owner.nodeType ) { - owner[ this.expando ] = value; - - // Otherwise secure it in a non-enumerable property - // configurable must be true to allow the property to be - // deleted when data is removed - } else { - Object.defineProperty( owner, this.expando, { - value: value, - configurable: true - } ); - } - } - } - - return value; - }, - set: function( owner, data, value ) { - var prop, - cache = this.cache( owner ); - - // Handle: [ owner, key, value ] args - // Always use camelCase key (gh-2257) - if ( typeof data === "string" ) { - cache[ camelCase( data ) ] = value; - - // Handle: [ owner, { properties } ] args - } else { - - // Copy the properties one-by-one to the cache object - for ( prop in data ) { - cache[ camelCase( prop ) ] = data[ prop ]; - } - } - return cache; - }, - get: function( owner, key ) { - return key === undefined ? - this.cache( owner ) : - - // Always use camelCase key (gh-2257) - owner[ this.expando ] && owner[ this.expando ][ camelCase( key ) ]; - }, - access: function( owner, key, value ) { - - // In cases where either: - // - // 1. No key was specified - // 2. A string key was specified, but no value provided - // - // Take the "read" path and allow the get method to determine - // which value to return, respectively either: - // - // 1. The entire cache object - // 2. The data stored at the key - // - if ( key === undefined || - ( ( key && typeof key === "string" ) && value === undefined ) ) { - - return this.get( owner, key ); - } - - // When the key is not a string, or both a key and value - // are specified, set or extend (existing objects) with either: - // - // 1. An object of properties - // 2. A key and value - // - this.set( owner, key, value ); - - // Since the "set" path can have two possible entry points - // return the expected data based on which path was taken[*] - return value !== undefined ? value : key; - }, - remove: function( owner, key ) { - var i, - cache = owner[ this.expando ]; - - if ( cache === undefined ) { - return; - } - - if ( key !== undefined ) { - - // Support array or space separated string of keys - if ( Array.isArray( key ) ) { - - // If key is an array of keys... - // We always set camelCase keys, so remove that. - key = key.map( camelCase ); - } else { - key = camelCase( key ); - - // If a key with the spaces exists, use it. - // Otherwise, create an array by matching non-whitespace - key = key in cache ? - [ key ] : - ( key.match( rnothtmlwhite ) || [] ); - } - - i = key.length; - - while ( i-- ) { - delete cache[ key[ i ] ]; - } - } - - // Remove the expando if there's no more data - if ( key === undefined || jQuery.isEmptyObject( cache ) ) { - - // Support: Chrome <=35 - 45 - // Webkit & Blink performance suffers when deleting properties - // from DOM nodes, so set to undefined instead - // https://bugs.chromium.org/p/chromium/issues/detail?id=378607 (bug restricted) - if ( owner.nodeType ) { - owner[ this.expando ] = undefined; - } else { - delete owner[ this.expando ]; - } - } - }, - hasData: function( owner ) { - var cache = owner[ this.expando ]; - return cache !== undefined && !jQuery.isEmptyObject( cache ); - } -}; -var dataPriv = new Data(); - -var dataUser = new Data(); - - - -// Implementation Summary -// -// 1. Enforce API surface and semantic compatibility with 1.9.x branch -// 2. Improve the module's maintainability by reducing the storage -// paths to a single mechanism. -// 3. Use the same single mechanism to support "private" and "user" data. -// 4. _Never_ expose "private" data to user code (TODO: Drop _data, _removeData) -// 5. Avoid exposing implementation details on user objects (eg. expando properties) -// 6. Provide a clear path for implementation upgrade to WeakMap in 2014 - -var rbrace = /^(?:\{[\w\W]*\}|\[[\w\W]*\])$/, - rmultiDash = /[A-Z]/g; - -function getData( data ) { - if ( data === "true" ) { - return true; - } - - if ( data === "false" ) { - return false; - } - - if ( data === "null" ) { - return null; - } - - // Only convert to a number if it doesn't change the string - if ( data === +data + "" ) { - return +data; - } - - if ( rbrace.test( data ) ) { - return JSON.parse( data ); - } - - return data; -} - -function dataAttr( elem, key, data ) { - var name; - - // If nothing was found internally, try to fetch any - // data from the HTML5 data-* attribute - if ( data === undefined && elem.nodeType === 1 ) { - name = "data-" + key.replace( rmultiDash, "-$&" ).toLowerCase(); - data = elem.getAttribute( name ); - - if ( typeof data === "string" ) { - try { - data = getData( data ); - } catch ( e ) {} - - // Make sure we set the data so it isn't changed later - dataUser.set( elem, key, data ); - } else { - data = undefined; - } - } - return data; -} - -jQuery.extend( { - hasData: function( elem ) { - return dataUser.hasData( elem ) || dataPriv.hasData( elem ); - }, - - data: function( elem, name, data ) { - return dataUser.access( elem, name, data ); - }, - - removeData: function( elem, name ) { - dataUser.remove( elem, name ); - }, - - // TODO: Now that all calls to _data and _removeData have been replaced - // with direct calls to dataPriv methods, these can be deprecated. - _data: function( elem, name, data ) { - return dataPriv.access( elem, name, data ); - }, - - _removeData: function( elem, name ) { - dataPriv.remove( elem, name ); - } -} ); - -jQuery.fn.extend( { - data: function( key, value ) { - var i, name, data, - elem = this[ 0 ], - attrs = elem && elem.attributes; - - // Gets all values - if ( key === undefined ) { - if ( this.length ) { - data = dataUser.get( elem ); - - if ( elem.nodeType === 1 && !dataPriv.get( elem, "hasDataAttrs" ) ) { - i = attrs.length; - while ( i-- ) { - - // Support: IE 11 only - // The attrs elements can be null (#14894) - if ( attrs[ i ] ) { - name = attrs[ i ].name; - if ( name.indexOf( "data-" ) === 0 ) { - name = camelCase( name.slice( 5 ) ); - dataAttr( elem, name, data[ name ] ); - } - } - } - dataPriv.set( elem, "hasDataAttrs", true ); - } - } - - return data; - } - - // Sets multiple values - if ( typeof key === "object" ) { - return this.each( function() { - dataUser.set( this, key ); - } ); - } - - return access( this, function( value ) { - var data; - - // The calling jQuery object (element matches) is not empty - // (and therefore has an element appears at this[ 0 ]) and the - // `value` parameter was not undefined. An empty jQuery object - // will result in `undefined` for elem = this[ 0 ] which will - // throw an exception if an attempt to read a data cache is made. - if ( elem && value === undefined ) { - - // Attempt to get data from the cache - // The key will always be camelCased in Data - data = dataUser.get( elem, key ); - if ( data !== undefined ) { - return data; - } - - // Attempt to "discover" the data in - // HTML5 custom data-* attrs - data = dataAttr( elem, key ); - if ( data !== undefined ) { - return data; - } - - // We tried really hard, but the data doesn't exist. - return; - } - - // Set the data... - this.each( function() { - - // We always store the camelCased key - dataUser.set( this, key, value ); - } ); - }, null, value, arguments.length > 1, null, true ); - }, - - removeData: function( key ) { - return this.each( function() { - dataUser.remove( this, key ); - } ); - } -} ); - - -jQuery.extend( { - queue: function( elem, type, data ) { - var queue; - - if ( elem ) { - type = ( type || "fx" ) + "queue"; - queue = dataPriv.get( elem, type ); - - // Speed up dequeue by getting out quickly if this is just a lookup - if ( data ) { - if ( !queue || Array.isArray( data ) ) { - queue = dataPriv.access( elem, type, jQuery.makeArray( data ) ); - } else { - queue.push( data ); - } - } - return queue || []; - } - }, - - dequeue: function( elem, type ) { - type = type || "fx"; - - var queue = jQuery.queue( elem, type ), - startLength = queue.length, - fn = queue.shift(), - hooks = jQuery._queueHooks( elem, type ), - next = function() { - jQuery.dequeue( elem, type ); - }; - - // If the fx queue is dequeued, always remove the progress sentinel - if ( fn === "inprogress" ) { - fn = queue.shift(); - startLength--; - } - - if ( fn ) { - - // Add a progress sentinel to prevent the fx queue from being - // automatically dequeued - if ( type === "fx" ) { - queue.unshift( "inprogress" ); - } - - // Clear up the last queue stop function - delete hooks.stop; - fn.call( elem, next, hooks ); - } - - if ( !startLength && hooks ) { - hooks.empty.fire(); - } - }, - - // Not public - generate a queueHooks object, or return the current one - _queueHooks: function( elem, type ) { - var key = type + "queueHooks"; - return dataPriv.get( elem, key ) || dataPriv.access( elem, key, { - empty: jQuery.Callbacks( "once memory" ).add( function() { - dataPriv.remove( elem, [ type + "queue", key ] ); - } ) - } ); - } -} ); - -jQuery.fn.extend( { - queue: function( type, data ) { - var setter = 2; - - if ( typeof type !== "string" ) { - data = type; - type = "fx"; - setter--; - } - - if ( arguments.length < setter ) { - return jQuery.queue( this[ 0 ], type ); - } - - return data === undefined ? - this : - this.each( function() { - var queue = jQuery.queue( this, type, data ); - - // Ensure a hooks for this queue - jQuery._queueHooks( this, type ); - - if ( type === "fx" && queue[ 0 ] !== "inprogress" ) { - jQuery.dequeue( this, type ); - } - } ); - }, - dequeue: function( type ) { - return this.each( function() { - jQuery.dequeue( this, type ); - } ); - }, - clearQueue: function( type ) { - return this.queue( type || "fx", [] ); - }, - - // Get a promise resolved when queues of a certain type - // are emptied (fx is the type by default) - promise: function( type, obj ) { - var tmp, - count = 1, - defer = jQuery.Deferred(), - elements = this, - i = this.length, - resolve = function() { - if ( !( --count ) ) { - defer.resolveWith( elements, [ elements ] ); - } - }; - - if ( typeof type !== "string" ) { - obj = type; - type = undefined; - } - type = type || "fx"; - - while ( i-- ) { - tmp = dataPriv.get( elements[ i ], type + "queueHooks" ); - if ( tmp && tmp.empty ) { - count++; - tmp.empty.add( resolve ); - } - } - resolve(); - return defer.promise( obj ); - } -} ); -var pnum = ( /[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/ ).source; - -var rcssNum = new RegExp( "^(?:([+-])=|)(" + pnum + ")([a-z%]*)$", "i" ); - - -var cssExpand = [ "Top", "Right", "Bottom", "Left" ]; - -var documentElement = document.documentElement; - - - - var isAttached = function( elem ) { - return jQuery.contains( elem.ownerDocument, elem ); - }, - composed = { composed: true }; - - // Support: IE 9 - 11+, Edge 12 - 18+, iOS 10.0 - 10.2 only - // Check attachment across shadow DOM boundaries when possible (gh-3504) - // Support: iOS 10.0-10.2 only - // Early iOS 10 versions support `attachShadow` but not `getRootNode`, - // leading to errors. We need to check for `getRootNode`. - if ( documentElement.getRootNode ) { - isAttached = function( elem ) { - return jQuery.contains( elem.ownerDocument, elem ) || - elem.getRootNode( composed ) === elem.ownerDocument; - }; - } -var isHiddenWithinTree = function( elem, el ) { - - // isHiddenWithinTree might be called from jQuery#filter function; - // in that case, element will be second argument - elem = el || elem; - - // Inline style trumps all - return elem.style.display === "none" || - elem.style.display === "" && - - // Otherwise, check computed style - // Support: Firefox <=43 - 45 - // Disconnected elements can have computed display: none, so first confirm that elem is - // in the document. - isAttached( elem ) && - - jQuery.css( elem, "display" ) === "none"; - }; - - - -function adjustCSS( elem, prop, valueParts, tween ) { - var adjusted, scale, - maxIterations = 20, - currentValue = tween ? - function() { - return tween.cur(); - } : - function() { - return jQuery.css( elem, prop, "" ); - }, - initial = currentValue(), - unit = valueParts && valueParts[ 3 ] || ( jQuery.cssNumber[ prop ] ? "" : "px" ), - - // Starting value computation is required for potential unit mismatches - initialInUnit = elem.nodeType && - ( jQuery.cssNumber[ prop ] || unit !== "px" && +initial ) && - rcssNum.exec( jQuery.css( elem, prop ) ); - - if ( initialInUnit && initialInUnit[ 3 ] !== unit ) { - - // Support: Firefox <=54 - // Halve the iteration target value to prevent interference from CSS upper bounds (gh-2144) - initial = initial / 2; - - // Trust units reported by jQuery.css - unit = unit || initialInUnit[ 3 ]; - - // Iteratively approximate from a nonzero starting point - initialInUnit = +initial || 1; - - while ( maxIterations-- ) { - - // Evaluate and update our best guess (doubling guesses that zero out). - // Finish if the scale equals or crosses 1 (making the old*new product non-positive). - jQuery.style( elem, prop, initialInUnit + unit ); - if ( ( 1 - scale ) * ( 1 - ( scale = currentValue() / initial || 0.5 ) ) <= 0 ) { - maxIterations = 0; - } - initialInUnit = initialInUnit / scale; - - } - - initialInUnit = initialInUnit * 2; - jQuery.style( elem, prop, initialInUnit + unit ); - - // Make sure we update the tween properties later on - valueParts = valueParts || []; - } - - if ( valueParts ) { - initialInUnit = +initialInUnit || +initial || 0; - - // Apply relative offset (+=/-=) if specified - adjusted = valueParts[ 1 ] ? - initialInUnit + ( valueParts[ 1 ] + 1 ) * valueParts[ 2 ] : - +valueParts[ 2 ]; - if ( tween ) { - tween.unit = unit; - tween.start = initialInUnit; - tween.end = adjusted; - } - } - return adjusted; -} - - -var defaultDisplayMap = {}; - -function getDefaultDisplay( elem ) { - var temp, - doc = elem.ownerDocument, - nodeName = elem.nodeName, - display = defaultDisplayMap[ nodeName ]; - - if ( display ) { - return display; - } - - temp = doc.body.appendChild( doc.createElement( nodeName ) ); - display = jQuery.css( temp, "display" ); - - temp.parentNode.removeChild( temp ); - - if ( display === "none" ) { - display = "block"; - } - defaultDisplayMap[ nodeName ] = display; - - return display; -} - -function showHide( elements, show ) { - var display, elem, - values = [], - index = 0, - length = elements.length; - - // Determine new display value for elements that need to change - for ( ; index < length; index++ ) { - elem = elements[ index ]; - if ( !elem.style ) { - continue; - } - - display = elem.style.display; - if ( show ) { - - // Since we force visibility upon cascade-hidden elements, an immediate (and slow) - // check is required in this first loop unless we have a nonempty display value (either - // inline or about-to-be-restored) - if ( display === "none" ) { - values[ index ] = dataPriv.get( elem, "display" ) || null; - if ( !values[ index ] ) { - elem.style.display = ""; - } - } - if ( elem.style.display === "" && isHiddenWithinTree( elem ) ) { - values[ index ] = getDefaultDisplay( elem ); - } - } else { - if ( display !== "none" ) { - values[ index ] = "none"; - - // Remember what we're overwriting - dataPriv.set( elem, "display", display ); - } - } - } - - // Set the display of the elements in a second loop to avoid constant reflow - for ( index = 0; index < length; index++ ) { - if ( values[ index ] != null ) { - elements[ index ].style.display = values[ index ]; - } - } - - return elements; -} - -jQuery.fn.extend( { - show: function() { - return showHide( this, true ); - }, - hide: function() { - return showHide( this ); - }, - toggle: function( state ) { - if ( typeof state === "boolean" ) { - return state ? this.show() : this.hide(); - } - - return this.each( function() { - if ( isHiddenWithinTree( this ) ) { - jQuery( this ).show(); - } else { - jQuery( this ).hide(); - } - } ); - } -} ); -var rcheckableType = ( /^(?:checkbox|radio)$/i ); - -var rtagName = ( /<([a-z][^\/\0>\x20\t\r\n\f]*)/i ); - -var rscriptType = ( /^$|^module$|\/(?:java|ecma)script/i ); - - - -( function() { - var fragment = document.createDocumentFragment(), - div = fragment.appendChild( document.createElement( "div" ) ), - input = document.createElement( "input" ); - - // Support: Android 4.0 - 4.3 only - // Check state lost if the name is set (#11217) - // Support: Windows Web Apps (WWA) - // `name` and `type` must use .setAttribute for WWA (#14901) - input.setAttribute( "type", "radio" ); - input.setAttribute( "checked", "checked" ); - input.setAttribute( "name", "t" ); - - div.appendChild( input ); - - // Support: Android <=4.1 only - // Older WebKit doesn't clone checked state correctly in fragments - support.checkClone = div.cloneNode( true ).cloneNode( true ).lastChild.checked; - - // Support: IE <=11 only - // Make sure textarea (and checkbox) defaultValue is properly cloned - div.innerHTML = ""; - support.noCloneChecked = !!div.cloneNode( true ).lastChild.defaultValue; - - // Support: IE <=9 only - // IE <=9 replaces "; - support.option = !!div.lastChild; -} )(); - - -// We have to close these tags to support XHTML (#13200) -var wrapMap = { - - // XHTML parsers do not magically insert elements in the - // same way that tag soup parsers do. So we cannot shorten - // this by omitting or other required elements. - thead: [ 1, "", "
    " ], - col: [ 2, "", "
    " ], - tr: [ 2, "", "
    " ], - td: [ 3, "", "
    " ], - - _default: [ 0, "", "" ] -}; - -wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead; -wrapMap.th = wrapMap.td; - -// Support: IE <=9 only -if ( !support.option ) { - wrapMap.optgroup = wrapMap.option = [ 1, "" ]; -} - - -function getAll( context, tag ) { - - // Support: IE <=9 - 11 only - // Use typeof to avoid zero-argument method invocation on host objects (#15151) - var ret; - - if ( typeof context.getElementsByTagName !== "undefined" ) { - ret = context.getElementsByTagName( tag || "*" ); - - } else if ( typeof context.querySelectorAll !== "undefined" ) { - ret = context.querySelectorAll( tag || "*" ); - - } else { - ret = []; - } - - if ( tag === undefined || tag && nodeName( context, tag ) ) { - return jQuery.merge( [ context ], ret ); - } - - return ret; -} - - -// Mark scripts as having already been evaluated -function setGlobalEval( elems, refElements ) { - var i = 0, - l = elems.length; - - for ( ; i < l; i++ ) { - dataPriv.set( - elems[ i ], - "globalEval", - !refElements || dataPriv.get( refElements[ i ], "globalEval" ) - ); - } -} - - -var rhtml = /<|&#?\w+;/; - -function buildFragment( elems, context, scripts, selection, ignored ) { - var elem, tmp, tag, wrap, attached, j, - fragment = context.createDocumentFragment(), - nodes = [], - i = 0, - l = elems.length; - - for ( ; i < l; i++ ) { - elem = elems[ i ]; - - if ( elem || elem === 0 ) { - - // Add nodes directly - if ( toType( elem ) === "object" ) { - - // Support: Android <=4.0 only, PhantomJS 1 only - // push.apply(_, arraylike) throws on ancient WebKit - jQuery.merge( nodes, elem.nodeType ? [ elem ] : elem ); - - // Convert non-html into a text node - } else if ( !rhtml.test( elem ) ) { - nodes.push( context.createTextNode( elem ) ); - - // Convert html into DOM nodes - } else { - tmp = tmp || fragment.appendChild( context.createElement( "div" ) ); - - // Deserialize a standard representation - tag = ( rtagName.exec( elem ) || [ "", "" ] )[ 1 ].toLowerCase(); - wrap = wrapMap[ tag ] || wrapMap._default; - tmp.innerHTML = wrap[ 1 ] + jQuery.htmlPrefilter( elem ) + wrap[ 2 ]; - - // Descend through wrappers to the right content - j = wrap[ 0 ]; - while ( j-- ) { - tmp = tmp.lastChild; - } - - // Support: Android <=4.0 only, PhantomJS 1 only - // push.apply(_, arraylike) throws on ancient WebKit - jQuery.merge( nodes, tmp.childNodes ); - - // Remember the top-level container - tmp = fragment.firstChild; - - // Ensure the created nodes are orphaned (#12392) - tmp.textContent = ""; - } - } - } - - // Remove wrapper from fragment - fragment.textContent = ""; - - i = 0; - while ( ( elem = nodes[ i++ ] ) ) { - - // Skip elements already in the context collection (trac-4087) - if ( selection && jQuery.inArray( elem, selection ) > -1 ) { - if ( ignored ) { - ignored.push( elem ); - } - continue; - } - - attached = isAttached( elem ); - - // Append to fragment - tmp = getAll( fragment.appendChild( elem ), "script" ); - - // Preserve script evaluation history - if ( attached ) { - setGlobalEval( tmp ); - } - - // Capture executables - if ( scripts ) { - j = 0; - while ( ( elem = tmp[ j++ ] ) ) { - if ( rscriptType.test( elem.type || "" ) ) { - scripts.push( elem ); - } - } - } - } - - return fragment; -} - - -var rtypenamespace = /^([^.]*)(?:\.(.+)|)/; - -function returnTrue() { - return true; -} - -function returnFalse() { - return false; -} - -// Support: IE <=9 - 11+ -// focus() and blur() are asynchronous, except when they are no-op. -// So expect focus to be synchronous when the element is already active, -// and blur to be synchronous when the element is not already active. -// (focus and blur are always synchronous in other supported browsers, -// this just defines when we can count on it). -function expectSync( elem, type ) { - return ( elem === safeActiveElement() ) === ( type === "focus" ); -} - -// Support: IE <=9 only -// Accessing document.activeElement can throw unexpectedly -// https://bugs.jquery.com/ticket/13393 -function safeActiveElement() { - try { - return document.activeElement; - } catch ( err ) { } -} - -function on( elem, types, selector, data, fn, one ) { - var origFn, type; - - // Types can be a map of types/handlers - if ( typeof types === "object" ) { - - // ( types-Object, selector, data ) - if ( typeof selector !== "string" ) { - - // ( types-Object, data ) - data = data || selector; - selector = undefined; - } - for ( type in types ) { - on( elem, type, selector, data, types[ type ], one ); - } - return elem; - } - - if ( data == null && fn == null ) { - - // ( types, fn ) - fn = selector; - data = selector = undefined; - } else if ( fn == null ) { - if ( typeof selector === "string" ) { - - // ( types, selector, fn ) - fn = data; - data = undefined; - } else { - - // ( types, data, fn ) - fn = data; - data = selector; - selector = undefined; - } - } - if ( fn === false ) { - fn = returnFalse; - } else if ( !fn ) { - return elem; - } - - if ( one === 1 ) { - origFn = fn; - fn = function( event ) { - - // Can use an empty set, since event contains the info - jQuery().off( event ); - return origFn.apply( this, arguments ); - }; - - // Use same guid so caller can remove using origFn - fn.guid = origFn.guid || ( origFn.guid = jQuery.guid++ ); - } - return elem.each( function() { - jQuery.event.add( this, types, fn, data, selector ); - } ); -} - -/* - * Helper functions for managing events -- not part of the public interface. - * Props to Dean Edwards' addEvent library for many of the ideas. - */ -jQuery.event = { - - global: {}, - - add: function( elem, types, handler, data, selector ) { - - var handleObjIn, eventHandle, tmp, - events, t, handleObj, - special, handlers, type, namespaces, origType, - elemData = dataPriv.get( elem ); - - // Only attach events to objects that accept data - if ( !acceptData( elem ) ) { - return; - } - - // Caller can pass in an object of custom data in lieu of the handler - if ( handler.handler ) { - handleObjIn = handler; - handler = handleObjIn.handler; - selector = handleObjIn.selector; - } - - // Ensure that invalid selectors throw exceptions at attach time - // Evaluate against documentElement in case elem is a non-element node (e.g., document) - if ( selector ) { - jQuery.find.matchesSelector( documentElement, selector ); - } - - // Make sure that the handler has a unique ID, used to find/remove it later - if ( !handler.guid ) { - handler.guid = jQuery.guid++; - } - - // Init the element's event structure and main handler, if this is the first - if ( !( events = elemData.events ) ) { - events = elemData.events = Object.create( null ); - } - if ( !( eventHandle = elemData.handle ) ) { - eventHandle = elemData.handle = function( e ) { - - // Discard the second event of a jQuery.event.trigger() and - // when an event is called after a page has unloaded - return typeof jQuery !== "undefined" && jQuery.event.triggered !== e.type ? - jQuery.event.dispatch.apply( elem, arguments ) : undefined; - }; - } - - // Handle multiple events separated by a space - types = ( types || "" ).match( rnothtmlwhite ) || [ "" ]; - t = types.length; - while ( t-- ) { - tmp = rtypenamespace.exec( types[ t ] ) || []; - type = origType = tmp[ 1 ]; - namespaces = ( tmp[ 2 ] || "" ).split( "." ).sort(); - - // There *must* be a type, no attaching namespace-only handlers - if ( !type ) { - continue; - } - - // If event changes its type, use the special event handlers for the changed type - special = jQuery.event.special[ type ] || {}; - - // If selector defined, determine special event api type, otherwise given type - type = ( selector ? special.delegateType : special.bindType ) || type; - - // Update special based on newly reset type - special = jQuery.event.special[ type ] || {}; - - // handleObj is passed to all event handlers - handleObj = jQuery.extend( { - type: type, - origType: origType, - data: data, - handler: handler, - guid: handler.guid, - selector: selector, - needsContext: selector && jQuery.expr.match.needsContext.test( selector ), - namespace: namespaces.join( "." ) - }, handleObjIn ); - - // Init the event handler queue if we're the first - if ( !( handlers = events[ type ] ) ) { - handlers = events[ type ] = []; - handlers.delegateCount = 0; - - // Only use addEventListener if the special events handler returns false - if ( !special.setup || - special.setup.call( elem, data, namespaces, eventHandle ) === false ) { - - if ( elem.addEventListener ) { - elem.addEventListener( type, eventHandle ); - } - } - } - - if ( special.add ) { - special.add.call( elem, handleObj ); - - if ( !handleObj.handler.guid ) { - handleObj.handler.guid = handler.guid; - } - } - - // Add to the element's handler list, delegates in front - if ( selector ) { - handlers.splice( handlers.delegateCount++, 0, handleObj ); - } else { - handlers.push( handleObj ); - } - - // Keep track of which events have ever been used, for event optimization - jQuery.event.global[ type ] = true; - } - - }, - - // Detach an event or set of events from an element - remove: function( elem, types, handler, selector, mappedTypes ) { - - var j, origCount, tmp, - events, t, handleObj, - special, handlers, type, namespaces, origType, - elemData = dataPriv.hasData( elem ) && dataPriv.get( elem ); - - if ( !elemData || !( events = elemData.events ) ) { - return; - } - - // Once for each type.namespace in types; type may be omitted - types = ( types || "" ).match( rnothtmlwhite ) || [ "" ]; - t = types.length; - while ( t-- ) { - tmp = rtypenamespace.exec( types[ t ] ) || []; - type = origType = tmp[ 1 ]; - namespaces = ( tmp[ 2 ] || "" ).split( "." ).sort(); - - // Unbind all events (on this namespace, if provided) for the element - if ( !type ) { - for ( type in events ) { - jQuery.event.remove( elem, type + types[ t ], handler, selector, true ); - } - continue; - } - - special = jQuery.event.special[ type ] || {}; - type = ( selector ? special.delegateType : special.bindType ) || type; - handlers = events[ type ] || []; - tmp = tmp[ 2 ] && - new RegExp( "(^|\\.)" + namespaces.join( "\\.(?:.*\\.|)" ) + "(\\.|$)" ); - - // Remove matching events - origCount = j = handlers.length; - while ( j-- ) { - handleObj = handlers[ j ]; - - if ( ( mappedTypes || origType === handleObj.origType ) && - ( !handler || handler.guid === handleObj.guid ) && - ( !tmp || tmp.test( handleObj.namespace ) ) && - ( !selector || selector === handleObj.selector || - selector === "**" && handleObj.selector ) ) { - handlers.splice( j, 1 ); - - if ( handleObj.selector ) { - handlers.delegateCount--; - } - if ( special.remove ) { - special.remove.call( elem, handleObj ); - } - } - } - - // Remove generic event handler if we removed something and no more handlers exist - // (avoids potential for endless recursion during removal of special event handlers) - if ( origCount && !handlers.length ) { - if ( !special.teardown || - special.teardown.call( elem, namespaces, elemData.handle ) === false ) { - - jQuery.removeEvent( elem, type, elemData.handle ); - } - - delete events[ type ]; - } - } - - // Remove data and the expando if it's no longer used - if ( jQuery.isEmptyObject( events ) ) { - dataPriv.remove( elem, "handle events" ); - } - }, - - dispatch: function( nativeEvent ) { - - var i, j, ret, matched, handleObj, handlerQueue, - args = new Array( arguments.length ), - - // Make a writable jQuery.Event from the native event object - event = jQuery.event.fix( nativeEvent ), - - handlers = ( - dataPriv.get( this, "events" ) || Object.create( null ) - )[ event.type ] || [], - special = jQuery.event.special[ event.type ] || {}; - - // Use the fix-ed jQuery.Event rather than the (read-only) native event - args[ 0 ] = event; - - for ( i = 1; i < arguments.length; i++ ) { - args[ i ] = arguments[ i ]; - } - - event.delegateTarget = this; - - // Call the preDispatch hook for the mapped type, and let it bail if desired - if ( special.preDispatch && special.preDispatch.call( this, event ) === false ) { - return; - } - - // Determine handlers - handlerQueue = jQuery.event.handlers.call( this, event, handlers ); - - // Run delegates first; they may want to stop propagation beneath us - i = 0; - while ( ( matched = handlerQueue[ i++ ] ) && !event.isPropagationStopped() ) { - event.currentTarget = matched.elem; - - j = 0; - while ( ( handleObj = matched.handlers[ j++ ] ) && - !event.isImmediatePropagationStopped() ) { - - // If the event is namespaced, then each handler is only invoked if it is - // specially universal or its namespaces are a superset of the event's. - if ( !event.rnamespace || handleObj.namespace === false || - event.rnamespace.test( handleObj.namespace ) ) { - - event.handleObj = handleObj; - event.data = handleObj.data; - - ret = ( ( jQuery.event.special[ handleObj.origType ] || {} ).handle || - handleObj.handler ).apply( matched.elem, args ); - - if ( ret !== undefined ) { - if ( ( event.result = ret ) === false ) { - event.preventDefault(); - event.stopPropagation(); - } - } - } - } - } - - // Call the postDispatch hook for the mapped type - if ( special.postDispatch ) { - special.postDispatch.call( this, event ); - } - - return event.result; - }, - - handlers: function( event, handlers ) { - var i, handleObj, sel, matchedHandlers, matchedSelectors, - handlerQueue = [], - delegateCount = handlers.delegateCount, - cur = event.target; - - // Find delegate handlers - if ( delegateCount && - - // Support: IE <=9 - // Black-hole SVG instance trees (trac-13180) - cur.nodeType && - - // Support: Firefox <=42 - // Suppress spec-violating clicks indicating a non-primary pointer button (trac-3861) - // https://www.w3.org/TR/DOM-Level-3-Events/#event-type-click - // Support: IE 11 only - // ...but not arrow key "clicks" of radio inputs, which can have `button` -1 (gh-2343) - !( event.type === "click" && event.button >= 1 ) ) { - - for ( ; cur !== this; cur = cur.parentNode || this ) { - - // Don't check non-elements (#13208) - // Don't process clicks on disabled elements (#6911, #8165, #11382, #11764) - if ( cur.nodeType === 1 && !( event.type === "click" && cur.disabled === true ) ) { - matchedHandlers = []; - matchedSelectors = {}; - for ( i = 0; i < delegateCount; i++ ) { - handleObj = handlers[ i ]; - - // Don't conflict with Object.prototype properties (#13203) - sel = handleObj.selector + " "; - - if ( matchedSelectors[ sel ] === undefined ) { - matchedSelectors[ sel ] = handleObj.needsContext ? - jQuery( sel, this ).index( cur ) > -1 : - jQuery.find( sel, this, null, [ cur ] ).length; - } - if ( matchedSelectors[ sel ] ) { - matchedHandlers.push( handleObj ); - } - } - if ( matchedHandlers.length ) { - handlerQueue.push( { elem: cur, handlers: matchedHandlers } ); - } - } - } - } - - // Add the remaining (directly-bound) handlers - cur = this; - if ( delegateCount < handlers.length ) { - handlerQueue.push( { elem: cur, handlers: handlers.slice( delegateCount ) } ); - } - - return handlerQueue; - }, - - addProp: function( name, hook ) { - Object.defineProperty( jQuery.Event.prototype, name, { - enumerable: true, - configurable: true, - - get: isFunction( hook ) ? - function() { - if ( this.originalEvent ) { - return hook( this.originalEvent ); - } - } : - function() { - if ( this.originalEvent ) { - return this.originalEvent[ name ]; - } - }, - - set: function( value ) { - Object.defineProperty( this, name, { - enumerable: true, - configurable: true, - writable: true, - value: value - } ); - } - } ); - }, - - fix: function( originalEvent ) { - return originalEvent[ jQuery.expando ] ? - originalEvent : - new jQuery.Event( originalEvent ); - }, - - special: { - load: { - - // Prevent triggered image.load events from bubbling to window.load - noBubble: true - }, - click: { - - // Utilize native event to ensure correct state for checkable inputs - setup: function( data ) { - - // For mutual compressibility with _default, replace `this` access with a local var. - // `|| data` is dead code meant only to preserve the variable through minification. - var el = this || data; - - // Claim the first handler - if ( rcheckableType.test( el.type ) && - el.click && nodeName( el, "input" ) ) { - - // dataPriv.set( el, "click", ... ) - leverageNative( el, "click", returnTrue ); - } - - // Return false to allow normal processing in the caller - return false; - }, - trigger: function( data ) { - - // For mutual compressibility with _default, replace `this` access with a local var. - // `|| data` is dead code meant only to preserve the variable through minification. - var el = this || data; - - // Force setup before triggering a click - if ( rcheckableType.test( el.type ) && - el.click && nodeName( el, "input" ) ) { - - leverageNative( el, "click" ); - } - - // Return non-false to allow normal event-path propagation - return true; - }, - - // For cross-browser consistency, suppress native .click() on links - // Also prevent it if we're currently inside a leveraged native-event stack - _default: function( event ) { - var target = event.target; - return rcheckableType.test( target.type ) && - target.click && nodeName( target, "input" ) && - dataPriv.get( target, "click" ) || - nodeName( target, "a" ); - } - }, - - beforeunload: { - postDispatch: function( event ) { - - // Support: Firefox 20+ - // Firefox doesn't alert if the returnValue field is not set. - if ( event.result !== undefined && event.originalEvent ) { - event.originalEvent.returnValue = event.result; - } - } - } - } -}; - -// Ensure the presence of an event listener that handles manually-triggered -// synthetic events by interrupting progress until reinvoked in response to -// *native* events that it fires directly, ensuring that state changes have -// already occurred before other listeners are invoked. -function leverageNative( el, type, expectSync ) { - - // Missing expectSync indicates a trigger call, which must force setup through jQuery.event.add - if ( !expectSync ) { - if ( dataPriv.get( el, type ) === undefined ) { - jQuery.event.add( el, type, returnTrue ); - } - return; - } - - // Register the controller as a special universal handler for all event namespaces - dataPriv.set( el, type, false ); - jQuery.event.add( el, type, { - namespace: false, - handler: function( event ) { - var notAsync, result, - saved = dataPriv.get( this, type ); - - if ( ( event.isTrigger & 1 ) && this[ type ] ) { - - // Interrupt processing of the outer synthetic .trigger()ed event - // Saved data should be false in such cases, but might be a leftover capture object - // from an async native handler (gh-4350) - if ( !saved.length ) { - - // Store arguments for use when handling the inner native event - // There will always be at least one argument (an event object), so this array - // will not be confused with a leftover capture object. - saved = slice.call( arguments ); - dataPriv.set( this, type, saved ); - - // Trigger the native event and capture its result - // Support: IE <=9 - 11+ - // focus() and blur() are asynchronous - notAsync = expectSync( this, type ); - this[ type ](); - result = dataPriv.get( this, type ); - if ( saved !== result || notAsync ) { - dataPriv.set( this, type, false ); - } else { - result = {}; - } - if ( saved !== result ) { - - // Cancel the outer synthetic event - event.stopImmediatePropagation(); - event.preventDefault(); - - // Support: Chrome 86+ - // In Chrome, if an element having a focusout handler is blurred by - // clicking outside of it, it invokes the handler synchronously. If - // that handler calls `.remove()` on the element, the data is cleared, - // leaving `result` undefined. We need to guard against this. - return result && result.value; - } - - // If this is an inner synthetic event for an event with a bubbling surrogate - // (focus or blur), assume that the surrogate already propagated from triggering the - // native event and prevent that from happening again here. - // This technically gets the ordering wrong w.r.t. to `.trigger()` (in which the - // bubbling surrogate propagates *after* the non-bubbling base), but that seems - // less bad than duplication. - } else if ( ( jQuery.event.special[ type ] || {} ).delegateType ) { - event.stopPropagation(); - } - - // If this is a native event triggered above, everything is now in order - // Fire an inner synthetic event with the original arguments - } else if ( saved.length ) { - - // ...and capture the result - dataPriv.set( this, type, { - value: jQuery.event.trigger( - - // Support: IE <=9 - 11+ - // Extend with the prototype to reset the above stopImmediatePropagation() - jQuery.extend( saved[ 0 ], jQuery.Event.prototype ), - saved.slice( 1 ), - this - ) - } ); - - // Abort handling of the native event - event.stopImmediatePropagation(); - } - } - } ); -} - -jQuery.removeEvent = function( elem, type, handle ) { - - // This "if" is needed for plain objects - if ( elem.removeEventListener ) { - elem.removeEventListener( type, handle ); - } -}; - -jQuery.Event = function( src, props ) { - - // Allow instantiation without the 'new' keyword - if ( !( this instanceof jQuery.Event ) ) { - return new jQuery.Event( src, props ); - } - - // Event object - if ( src && src.type ) { - this.originalEvent = src; - this.type = src.type; - - // Events bubbling up the document may have been marked as prevented - // by a handler lower down the tree; reflect the correct value. - this.isDefaultPrevented = src.defaultPrevented || - src.defaultPrevented === undefined && - - // Support: Android <=2.3 only - src.returnValue === false ? - returnTrue : - returnFalse; - - // Create target properties - // Support: Safari <=6 - 7 only - // Target should not be a text node (#504, #13143) - this.target = ( src.target && src.target.nodeType === 3 ) ? - src.target.parentNode : - src.target; - - this.currentTarget = src.currentTarget; - this.relatedTarget = src.relatedTarget; - - // Event type - } else { - this.type = src; - } - - // Put explicitly provided properties onto the event object - if ( props ) { - jQuery.extend( this, props ); - } - - // Create a timestamp if incoming event doesn't have one - this.timeStamp = src && src.timeStamp || Date.now(); - - // Mark it as fixed - this[ jQuery.expando ] = true; -}; - -// jQuery.Event is based on DOM3 Events as specified by the ECMAScript Language Binding -// https://www.w3.org/TR/2003/WD-DOM-Level-3-Events-20030331/ecma-script-binding.html -jQuery.Event.prototype = { - constructor: jQuery.Event, - isDefaultPrevented: returnFalse, - isPropagationStopped: returnFalse, - isImmediatePropagationStopped: returnFalse, - isSimulated: false, - - preventDefault: function() { - var e = this.originalEvent; - - this.isDefaultPrevented = returnTrue; - - if ( e && !this.isSimulated ) { - e.preventDefault(); - } - }, - stopPropagation: function() { - var e = this.originalEvent; - - this.isPropagationStopped = returnTrue; - - if ( e && !this.isSimulated ) { - e.stopPropagation(); - } - }, - stopImmediatePropagation: function() { - var e = this.originalEvent; - - this.isImmediatePropagationStopped = returnTrue; - - if ( e && !this.isSimulated ) { - e.stopImmediatePropagation(); - } - - this.stopPropagation(); - } -}; - -// Includes all common event props including KeyEvent and MouseEvent specific props -jQuery.each( { - altKey: true, - bubbles: true, - cancelable: true, - changedTouches: true, - ctrlKey: true, - detail: true, - eventPhase: true, - metaKey: true, - pageX: true, - pageY: true, - shiftKey: true, - view: true, - "char": true, - code: true, - charCode: true, - key: true, - keyCode: true, - button: true, - buttons: true, - clientX: true, - clientY: true, - offsetX: true, - offsetY: true, - pointerId: true, - pointerType: true, - screenX: true, - screenY: true, - targetTouches: true, - toElement: true, - touches: true, - which: true -}, jQuery.event.addProp ); - -jQuery.each( { focus: "focusin", blur: "focusout" }, function( type, delegateType ) { - jQuery.event.special[ type ] = { - - // Utilize native event if possible so blur/focus sequence is correct - setup: function() { - - // Claim the first handler - // dataPriv.set( this, "focus", ... ) - // dataPriv.set( this, "blur", ... ) - leverageNative( this, type, expectSync ); - - // Return false to allow normal processing in the caller - return false; - }, - trigger: function() { - - // Force setup before trigger - leverageNative( this, type ); - - // Return non-false to allow normal event-path propagation - return true; - }, - - // Suppress native focus or blur as it's already being fired - // in leverageNative. - _default: function() { - return true; - }, - - delegateType: delegateType - }; -} ); - -// Create mouseenter/leave events using mouseover/out and event-time checks -// so that event delegation works in jQuery. -// Do the same for pointerenter/pointerleave and pointerover/pointerout -// -// Support: Safari 7 only -// Safari sends mouseenter too often; see: -// https://bugs.chromium.org/p/chromium/issues/detail?id=470258 -// for the description of the bug (it existed in older Chrome versions as well). -jQuery.each( { - mouseenter: "mouseover", - mouseleave: "mouseout", - pointerenter: "pointerover", - pointerleave: "pointerout" -}, function( orig, fix ) { - jQuery.event.special[ orig ] = { - delegateType: fix, - bindType: fix, - - handle: function( event ) { - var ret, - target = this, - related = event.relatedTarget, - handleObj = event.handleObj; - - // For mouseenter/leave call the handler if related is outside the target. - // NB: No relatedTarget if the mouse left/entered the browser window - if ( !related || ( related !== target && !jQuery.contains( target, related ) ) ) { - event.type = handleObj.origType; - ret = handleObj.handler.apply( this, arguments ); - event.type = fix; - } - return ret; - } - }; -} ); - -jQuery.fn.extend( { - - on: function( types, selector, data, fn ) { - return on( this, types, selector, data, fn ); - }, - one: function( types, selector, data, fn ) { - return on( this, types, selector, data, fn, 1 ); - }, - off: function( types, selector, fn ) { - var handleObj, type; - if ( types && types.preventDefault && types.handleObj ) { - - // ( event ) dispatched jQuery.Event - handleObj = types.handleObj; - jQuery( types.delegateTarget ).off( - handleObj.namespace ? - handleObj.origType + "." + handleObj.namespace : - handleObj.origType, - handleObj.selector, - handleObj.handler - ); - return this; - } - if ( typeof types === "object" ) { - - // ( types-object [, selector] ) - for ( type in types ) { - this.off( type, selector, types[ type ] ); - } - return this; - } - if ( selector === false || typeof selector === "function" ) { - - // ( types [, fn] ) - fn = selector; - selector = undefined; - } - if ( fn === false ) { - fn = returnFalse; - } - return this.each( function() { - jQuery.event.remove( this, types, fn, selector ); - } ); - } -} ); - - -var - - // Support: IE <=10 - 11, Edge 12 - 13 only - // In IE/Edge using regex groups here causes severe slowdowns. - // See https://connect.microsoft.com/IE/feedback/details/1736512/ - rnoInnerhtml = /\s*$/g; - -// Prefer a tbody over its parent table for containing new rows -function manipulationTarget( elem, content ) { - if ( nodeName( elem, "table" ) && - nodeName( content.nodeType !== 11 ? content : content.firstChild, "tr" ) ) { - - return jQuery( elem ).children( "tbody" )[ 0 ] || elem; - } - - return elem; -} - -// Replace/restore the type attribute of script elements for safe DOM manipulation -function disableScript( elem ) { - elem.type = ( elem.getAttribute( "type" ) !== null ) + "/" + elem.type; - return elem; -} -function restoreScript( elem ) { - if ( ( elem.type || "" ).slice( 0, 5 ) === "true/" ) { - elem.type = elem.type.slice( 5 ); - } else { - elem.removeAttribute( "type" ); - } - - return elem; -} - -function cloneCopyEvent( src, dest ) { - var i, l, type, pdataOld, udataOld, udataCur, events; - - if ( dest.nodeType !== 1 ) { - return; - } - - // 1. Copy private data: events, handlers, etc. - if ( dataPriv.hasData( src ) ) { - pdataOld = dataPriv.get( src ); - events = pdataOld.events; - - if ( events ) { - dataPriv.remove( dest, "handle events" ); - - for ( type in events ) { - for ( i = 0, l = events[ type ].length; i < l; i++ ) { - jQuery.event.add( dest, type, events[ type ][ i ] ); - } - } - } - } - - // 2. Copy user data - if ( dataUser.hasData( src ) ) { - udataOld = dataUser.access( src ); - udataCur = jQuery.extend( {}, udataOld ); - - dataUser.set( dest, udataCur ); - } -} - -// Fix IE bugs, see support tests -function fixInput( src, dest ) { - var nodeName = dest.nodeName.toLowerCase(); - - // Fails to persist the checked state of a cloned checkbox or radio button. - if ( nodeName === "input" && rcheckableType.test( src.type ) ) { - dest.checked = src.checked; - - // Fails to return the selected option to the default selected state when cloning options - } else if ( nodeName === "input" || nodeName === "textarea" ) { - dest.defaultValue = src.defaultValue; - } -} - -function domManip( collection, args, callback, ignored ) { - - // Flatten any nested arrays - args = flat( args ); - - var fragment, first, scripts, hasScripts, node, doc, - i = 0, - l = collection.length, - iNoClone = l - 1, - value = args[ 0 ], - valueIsFunction = isFunction( value ); - - // We can't cloneNode fragments that contain checked, in WebKit - if ( valueIsFunction || - ( l > 1 && typeof value === "string" && - !support.checkClone && rchecked.test( value ) ) ) { - return collection.each( function( index ) { - var self = collection.eq( index ); - if ( valueIsFunction ) { - args[ 0 ] = value.call( this, index, self.html() ); - } - domManip( self, args, callback, ignored ); - } ); - } - - if ( l ) { - fragment = buildFragment( args, collection[ 0 ].ownerDocument, false, collection, ignored ); - first = fragment.firstChild; - - if ( fragment.childNodes.length === 1 ) { - fragment = first; - } - - // Require either new content or an interest in ignored elements to invoke the callback - if ( first || ignored ) { - scripts = jQuery.map( getAll( fragment, "script" ), disableScript ); - hasScripts = scripts.length; - - // Use the original fragment for the last item - // instead of the first because it can end up - // being emptied incorrectly in certain situations (#8070). - for ( ; i < l; i++ ) { - node = fragment; - - if ( i !== iNoClone ) { - node = jQuery.clone( node, true, true ); - - // Keep references to cloned scripts for later restoration - if ( hasScripts ) { - - // Support: Android <=4.0 only, PhantomJS 1 only - // push.apply(_, arraylike) throws on ancient WebKit - jQuery.merge( scripts, getAll( node, "script" ) ); - } - } - - callback.call( collection[ i ], node, i ); - } - - if ( hasScripts ) { - doc = scripts[ scripts.length - 1 ].ownerDocument; - - // Reenable scripts - jQuery.map( scripts, restoreScript ); - - // Evaluate executable scripts on first document insertion - for ( i = 0; i < hasScripts; i++ ) { - node = scripts[ i ]; - if ( rscriptType.test( node.type || "" ) && - !dataPriv.access( node, "globalEval" ) && - jQuery.contains( doc, node ) ) { - - if ( node.src && ( node.type || "" ).toLowerCase() !== "module" ) { - - // Optional AJAX dependency, but won't run scripts if not present - if ( jQuery._evalUrl && !node.noModule ) { - jQuery._evalUrl( node.src, { - nonce: node.nonce || node.getAttribute( "nonce" ) - }, doc ); - } - } else { - DOMEval( node.textContent.replace( rcleanScript, "" ), node, doc ); - } - } - } - } - } - } - - return collection; -} - -function remove( elem, selector, keepData ) { - var node, - nodes = selector ? jQuery.filter( selector, elem ) : elem, - i = 0; - - for ( ; ( node = nodes[ i ] ) != null; i++ ) { - if ( !keepData && node.nodeType === 1 ) { - jQuery.cleanData( getAll( node ) ); - } - - if ( node.parentNode ) { - if ( keepData && isAttached( node ) ) { - setGlobalEval( getAll( node, "script" ) ); - } - node.parentNode.removeChild( node ); - } - } - - return elem; -} - -jQuery.extend( { - htmlPrefilter: function( html ) { - return html; - }, - - clone: function( elem, dataAndEvents, deepDataAndEvents ) { - var i, l, srcElements, destElements, - clone = elem.cloneNode( true ), - inPage = isAttached( elem ); - - // Fix IE cloning issues - if ( !support.noCloneChecked && ( elem.nodeType === 1 || elem.nodeType === 11 ) && - !jQuery.isXMLDoc( elem ) ) { - - // We eschew Sizzle here for performance reasons: https://jsperf.com/getall-vs-sizzle/2 - destElements = getAll( clone ); - srcElements = getAll( elem ); - - for ( i = 0, l = srcElements.length; i < l; i++ ) { - fixInput( srcElements[ i ], destElements[ i ] ); - } - } - - // Copy the events from the original to the clone - if ( dataAndEvents ) { - if ( deepDataAndEvents ) { - srcElements = srcElements || getAll( elem ); - destElements = destElements || getAll( clone ); - - for ( i = 0, l = srcElements.length; i < l; i++ ) { - cloneCopyEvent( srcElements[ i ], destElements[ i ] ); - } - } else { - cloneCopyEvent( elem, clone ); - } - } - - // Preserve script evaluation history - destElements = getAll( clone, "script" ); - if ( destElements.length > 0 ) { - setGlobalEval( destElements, !inPage && getAll( elem, "script" ) ); - } - - // Return the cloned set - return clone; - }, - - cleanData: function( elems ) { - var data, elem, type, - special = jQuery.event.special, - i = 0; - - for ( ; ( elem = elems[ i ] ) !== undefined; i++ ) { - if ( acceptData( elem ) ) { - if ( ( data = elem[ dataPriv.expando ] ) ) { - if ( data.events ) { - for ( type in data.events ) { - if ( special[ type ] ) { - jQuery.event.remove( elem, type ); - - // This is a shortcut to avoid jQuery.event.remove's overhead - } else { - jQuery.removeEvent( elem, type, data.handle ); - } - } - } - - // Support: Chrome <=35 - 45+ - // Assign undefined instead of using delete, see Data#remove - elem[ dataPriv.expando ] = undefined; - } - if ( elem[ dataUser.expando ] ) { - - // Support: Chrome <=35 - 45+ - // Assign undefined instead of using delete, see Data#remove - elem[ dataUser.expando ] = undefined; - } - } - } - } -} ); - -jQuery.fn.extend( { - detach: function( selector ) { - return remove( this, selector, true ); - }, - - remove: function( selector ) { - return remove( this, selector ); - }, - - text: function( value ) { - return access( this, function( value ) { - return value === undefined ? - jQuery.text( this ) : - this.empty().each( function() { - if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { - this.textContent = value; - } - } ); - }, null, value, arguments.length ); - }, - - append: function() { - return domManip( this, arguments, function( elem ) { - if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { - var target = manipulationTarget( this, elem ); - target.appendChild( elem ); - } - } ); - }, - - prepend: function() { - return domManip( this, arguments, function( elem ) { - if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { - var target = manipulationTarget( this, elem ); - target.insertBefore( elem, target.firstChild ); - } - } ); - }, - - before: function() { - return domManip( this, arguments, function( elem ) { - if ( this.parentNode ) { - this.parentNode.insertBefore( elem, this ); - } - } ); - }, - - after: function() { - return domManip( this, arguments, function( elem ) { - if ( this.parentNode ) { - this.parentNode.insertBefore( elem, this.nextSibling ); - } - } ); - }, - - empty: function() { - var elem, - i = 0; - - for ( ; ( elem = this[ i ] ) != null; i++ ) { - if ( elem.nodeType === 1 ) { - - // Prevent memory leaks - jQuery.cleanData( getAll( elem, false ) ); - - // Remove any remaining nodes - elem.textContent = ""; - } - } - - return this; - }, - - clone: function( dataAndEvents, deepDataAndEvents ) { - dataAndEvents = dataAndEvents == null ? false : dataAndEvents; - deepDataAndEvents = deepDataAndEvents == null ? dataAndEvents : deepDataAndEvents; - - return this.map( function() { - return jQuery.clone( this, dataAndEvents, deepDataAndEvents ); - } ); - }, - - html: function( value ) { - return access( this, function( value ) { - var elem = this[ 0 ] || {}, - i = 0, - l = this.length; - - if ( value === undefined && elem.nodeType === 1 ) { - return elem.innerHTML; - } - - // See if we can take a shortcut and just use innerHTML - if ( typeof value === "string" && !rnoInnerhtml.test( value ) && - !wrapMap[ ( rtagName.exec( value ) || [ "", "" ] )[ 1 ].toLowerCase() ] ) { - - value = jQuery.htmlPrefilter( value ); - - try { - for ( ; i < l; i++ ) { - elem = this[ i ] || {}; - - // Remove element nodes and prevent memory leaks - if ( elem.nodeType === 1 ) { - jQuery.cleanData( getAll( elem, false ) ); - elem.innerHTML = value; - } - } - - elem = 0; - - // If using innerHTML throws an exception, use the fallback method - } catch ( e ) {} - } - - if ( elem ) { - this.empty().append( value ); - } - }, null, value, arguments.length ); - }, - - replaceWith: function() { - var ignored = []; - - // Make the changes, replacing each non-ignored context element with the new content - return domManip( this, arguments, function( elem ) { - var parent = this.parentNode; - - if ( jQuery.inArray( this, ignored ) < 0 ) { - jQuery.cleanData( getAll( this ) ); - if ( parent ) { - parent.replaceChild( elem, this ); - } - } - - // Force callback invocation - }, ignored ); - } -} ); - -jQuery.each( { - appendTo: "append", - prependTo: "prepend", - insertBefore: "before", - insertAfter: "after", - replaceAll: "replaceWith" -}, function( name, original ) { - jQuery.fn[ name ] = function( selector ) { - var elems, - ret = [], - insert = jQuery( selector ), - last = insert.length - 1, - i = 0; - - for ( ; i <= last; i++ ) { - elems = i === last ? this : this.clone( true ); - jQuery( insert[ i ] )[ original ]( elems ); - - // Support: Android <=4.0 only, PhantomJS 1 only - // .get() because push.apply(_, arraylike) throws on ancient WebKit - push.apply( ret, elems.get() ); - } - - return this.pushStack( ret ); - }; -} ); -var rnumnonpx = new RegExp( "^(" + pnum + ")(?!px)[a-z%]+$", "i" ); - -var getStyles = function( elem ) { - - // Support: IE <=11 only, Firefox <=30 (#15098, #14150) - // IE throws on elements created in popups - // FF meanwhile throws on frame elements through "defaultView.getComputedStyle" - var view = elem.ownerDocument.defaultView; - - if ( !view || !view.opener ) { - view = window; - } - - return view.getComputedStyle( elem ); - }; - -var swap = function( elem, options, callback ) { - var ret, name, - old = {}; - - // Remember the old values, and insert the new ones - for ( name in options ) { - old[ name ] = elem.style[ name ]; - elem.style[ name ] = options[ name ]; - } - - ret = callback.call( elem ); - - // Revert the old values - for ( name in options ) { - elem.style[ name ] = old[ name ]; - } - - return ret; -}; - - -var rboxStyle = new RegExp( cssExpand.join( "|" ), "i" ); - - - -( function() { - - // Executing both pixelPosition & boxSizingReliable tests require only one layout - // so they're executed at the same time to save the second computation. - function computeStyleTests() { - - // This is a singleton, we need to execute it only once - if ( !div ) { - return; - } - - container.style.cssText = "position:absolute;left:-11111px;width:60px;" + - "margin-top:1px;padding:0;border:0"; - div.style.cssText = - "position:relative;display:block;box-sizing:border-box;overflow:scroll;" + - "margin:auto;border:1px;padding:1px;" + - "width:60%;top:1%"; - documentElement.appendChild( container ).appendChild( div ); - - var divStyle = window.getComputedStyle( div ); - pixelPositionVal = divStyle.top !== "1%"; - - // Support: Android 4.0 - 4.3 only, Firefox <=3 - 44 - reliableMarginLeftVal = roundPixelMeasures( divStyle.marginLeft ) === 12; - - // Support: Android 4.0 - 4.3 only, Safari <=9.1 - 10.1, iOS <=7.0 - 9.3 - // Some styles come back with percentage values, even though they shouldn't - div.style.right = "60%"; - pixelBoxStylesVal = roundPixelMeasures( divStyle.right ) === 36; - - // Support: IE 9 - 11 only - // Detect misreporting of content dimensions for box-sizing:border-box elements - boxSizingReliableVal = roundPixelMeasures( divStyle.width ) === 36; - - // Support: IE 9 only - // Detect overflow:scroll screwiness (gh-3699) - // Support: Chrome <=64 - // Don't get tricked when zoom affects offsetWidth (gh-4029) - div.style.position = "absolute"; - scrollboxSizeVal = roundPixelMeasures( div.offsetWidth / 3 ) === 12; - - documentElement.removeChild( container ); - - // Nullify the div so it wouldn't be stored in the memory and - // it will also be a sign that checks already performed - div = null; - } - - function roundPixelMeasures( measure ) { - return Math.round( parseFloat( measure ) ); - } - - var pixelPositionVal, boxSizingReliableVal, scrollboxSizeVal, pixelBoxStylesVal, - reliableTrDimensionsVal, reliableMarginLeftVal, - container = document.createElement( "div" ), - div = document.createElement( "div" ); - - // Finish early in limited (non-browser) environments - if ( !div.style ) { - return; - } - - // Support: IE <=9 - 11 only - // Style of cloned element affects source element cloned (#8908) - div.style.backgroundClip = "content-box"; - div.cloneNode( true ).style.backgroundClip = ""; - support.clearCloneStyle = div.style.backgroundClip === "content-box"; - - jQuery.extend( support, { - boxSizingReliable: function() { - computeStyleTests(); - return boxSizingReliableVal; - }, - pixelBoxStyles: function() { - computeStyleTests(); - return pixelBoxStylesVal; - }, - pixelPosition: function() { - computeStyleTests(); - return pixelPositionVal; - }, - reliableMarginLeft: function() { - computeStyleTests(); - return reliableMarginLeftVal; - }, - scrollboxSize: function() { - computeStyleTests(); - return scrollboxSizeVal; - }, - - // Support: IE 9 - 11+, Edge 15 - 18+ - // IE/Edge misreport `getComputedStyle` of table rows with width/height - // set in CSS while `offset*` properties report correct values. - // Behavior in IE 9 is more subtle than in newer versions & it passes - // some versions of this test; make sure not to make it pass there! - // - // Support: Firefox 70+ - // Only Firefox includes border widths - // in computed dimensions. (gh-4529) - reliableTrDimensions: function() { - var table, tr, trChild, trStyle; - if ( reliableTrDimensionsVal == null ) { - table = document.createElement( "table" ); - tr = document.createElement( "tr" ); - trChild = document.createElement( "div" ); - - table.style.cssText = "position:absolute;left:-11111px;border-collapse:separate"; - tr.style.cssText = "border:1px solid"; - - // Support: Chrome 86+ - // Height set through cssText does not get applied. - // Computed height then comes back as 0. - tr.style.height = "1px"; - trChild.style.height = "9px"; - - // Support: Android 8 Chrome 86+ - // In our bodyBackground.html iframe, - // display for all div elements is set to "inline", - // which causes a problem only in Android 8 Chrome 86. - // Ensuring the div is display: block - // gets around this issue. - trChild.style.display = "block"; - - documentElement - .appendChild( table ) - .appendChild( tr ) - .appendChild( trChild ); - - trStyle = window.getComputedStyle( tr ); - reliableTrDimensionsVal = ( parseInt( trStyle.height, 10 ) + - parseInt( trStyle.borderTopWidth, 10 ) + - parseInt( trStyle.borderBottomWidth, 10 ) ) === tr.offsetHeight; - - documentElement.removeChild( table ); - } - return reliableTrDimensionsVal; - } - } ); -} )(); - - -function curCSS( elem, name, computed ) { - var width, minWidth, maxWidth, ret, - - // Support: Firefox 51+ - // Retrieving style before computed somehow - // fixes an issue with getting wrong values - // on detached elements - style = elem.style; - - computed = computed || getStyles( elem ); - - // getPropertyValue is needed for: - // .css('filter') (IE 9 only, #12537) - // .css('--customProperty) (#3144) - if ( computed ) { - ret = computed.getPropertyValue( name ) || computed[ name ]; - - if ( ret === "" && !isAttached( elem ) ) { - ret = jQuery.style( elem, name ); - } - - // A tribute to the "awesome hack by Dean Edwards" - // Android Browser returns percentage for some values, - // but width seems to be reliably pixels. - // This is against the CSSOM draft spec: - // https://drafts.csswg.org/cssom/#resolved-values - if ( !support.pixelBoxStyles() && rnumnonpx.test( ret ) && rboxStyle.test( name ) ) { - - // Remember the original values - width = style.width; - minWidth = style.minWidth; - maxWidth = style.maxWidth; - - // Put in the new values to get a computed value out - style.minWidth = style.maxWidth = style.width = ret; - ret = computed.width; - - // Revert the changed values - style.width = width; - style.minWidth = minWidth; - style.maxWidth = maxWidth; - } - } - - return ret !== undefined ? - - // Support: IE <=9 - 11 only - // IE returns zIndex value as an integer. - ret + "" : - ret; -} - - -function addGetHookIf( conditionFn, hookFn ) { - - // Define the hook, we'll check on the first run if it's really needed. - return { - get: function() { - if ( conditionFn() ) { - - // Hook not needed (or it's not possible to use it due - // to missing dependency), remove it. - delete this.get; - return; - } - - // Hook needed; redefine it so that the support test is not executed again. - return ( this.get = hookFn ).apply( this, arguments ); - } - }; -} - - -var cssPrefixes = [ "Webkit", "Moz", "ms" ], - emptyStyle = document.createElement( "div" ).style, - vendorProps = {}; - -// Return a vendor-prefixed property or undefined -function vendorPropName( name ) { - - // Check for vendor prefixed names - var capName = name[ 0 ].toUpperCase() + name.slice( 1 ), - i = cssPrefixes.length; - - while ( i-- ) { - name = cssPrefixes[ i ] + capName; - if ( name in emptyStyle ) { - return name; - } - } -} - -// Return a potentially-mapped jQuery.cssProps or vendor prefixed property -function finalPropName( name ) { - var final = jQuery.cssProps[ name ] || vendorProps[ name ]; - - if ( final ) { - return final; - } - if ( name in emptyStyle ) { - return name; - } - return vendorProps[ name ] = vendorPropName( name ) || name; -} - - -var - - // Swappable if display is none or starts with table - // except "table", "table-cell", or "table-caption" - // See here for display values: https://developer.mozilla.org/en-US/docs/CSS/display - rdisplayswap = /^(none|table(?!-c[ea]).+)/, - rcustomProp = /^--/, - cssShow = { position: "absolute", visibility: "hidden", display: "block" }, - cssNormalTransform = { - letterSpacing: "0", - fontWeight: "400" - }; - -function setPositiveNumber( _elem, value, subtract ) { - - // Any relative (+/-) values have already been - // normalized at this point - var matches = rcssNum.exec( value ); - return matches ? - - // Guard against undefined "subtract", e.g., when used as in cssHooks - Math.max( 0, matches[ 2 ] - ( subtract || 0 ) ) + ( matches[ 3 ] || "px" ) : - value; -} - -function boxModelAdjustment( elem, dimension, box, isBorderBox, styles, computedVal ) { - var i = dimension === "width" ? 1 : 0, - extra = 0, - delta = 0; - - // Adjustment may not be necessary - if ( box === ( isBorderBox ? "border" : "content" ) ) { - return 0; - } - - for ( ; i < 4; i += 2 ) { - - // Both box models exclude margin - if ( box === "margin" ) { - delta += jQuery.css( elem, box + cssExpand[ i ], true, styles ); - } - - // If we get here with a content-box, we're seeking "padding" or "border" or "margin" - if ( !isBorderBox ) { - - // Add padding - delta += jQuery.css( elem, "padding" + cssExpand[ i ], true, styles ); - - // For "border" or "margin", add border - if ( box !== "padding" ) { - delta += jQuery.css( elem, "border" + cssExpand[ i ] + "Width", true, styles ); - - // But still keep track of it otherwise - } else { - extra += jQuery.css( elem, "border" + cssExpand[ i ] + "Width", true, styles ); - } - - // If we get here with a border-box (content + padding + border), we're seeking "content" or - // "padding" or "margin" - } else { - - // For "content", subtract padding - if ( box === "content" ) { - delta -= jQuery.css( elem, "padding" + cssExpand[ i ], true, styles ); - } - - // For "content" or "padding", subtract border - if ( box !== "margin" ) { - delta -= jQuery.css( elem, "border" + cssExpand[ i ] + "Width", true, styles ); - } - } - } - - // Account for positive content-box scroll gutter when requested by providing computedVal - if ( !isBorderBox && computedVal >= 0 ) { - - // offsetWidth/offsetHeight is a rounded sum of content, padding, scroll gutter, and border - // Assuming integer scroll gutter, subtract the rest and round down - delta += Math.max( 0, Math.ceil( - elem[ "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ) ] - - computedVal - - delta - - extra - - 0.5 - - // If offsetWidth/offsetHeight is unknown, then we can't determine content-box scroll gutter - // Use an explicit zero to avoid NaN (gh-3964) - ) ) || 0; - } - - return delta; -} - -function getWidthOrHeight( elem, dimension, extra ) { - - // Start with computed style - var styles = getStyles( elem ), - - // To avoid forcing a reflow, only fetch boxSizing if we need it (gh-4322). - // Fake content-box until we know it's needed to know the true value. - boxSizingNeeded = !support.boxSizingReliable() || extra, - isBorderBox = boxSizingNeeded && - jQuery.css( elem, "boxSizing", false, styles ) === "border-box", - valueIsBorderBox = isBorderBox, - - val = curCSS( elem, dimension, styles ), - offsetProp = "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ); - - // Support: Firefox <=54 - // Return a confounding non-pixel value or feign ignorance, as appropriate. - if ( rnumnonpx.test( val ) ) { - if ( !extra ) { - return val; - } - val = "auto"; - } - - - // Support: IE 9 - 11 only - // Use offsetWidth/offsetHeight for when box sizing is unreliable. - // In those cases, the computed value can be trusted to be border-box. - if ( ( !support.boxSizingReliable() && isBorderBox || - - // Support: IE 10 - 11+, Edge 15 - 18+ - // IE/Edge misreport `getComputedStyle` of table rows with width/height - // set in CSS while `offset*` properties report correct values. - // Interestingly, in some cases IE 9 doesn't suffer from this issue. - !support.reliableTrDimensions() && nodeName( elem, "tr" ) || - - // Fall back to offsetWidth/offsetHeight when value is "auto" - // This happens for inline elements with no explicit setting (gh-3571) - val === "auto" || - - // Support: Android <=4.1 - 4.3 only - // Also use offsetWidth/offsetHeight for misreported inline dimensions (gh-3602) - !parseFloat( val ) && jQuery.css( elem, "display", false, styles ) === "inline" ) && - - // Make sure the element is visible & connected - elem.getClientRects().length ) { - - isBorderBox = jQuery.css( elem, "boxSizing", false, styles ) === "border-box"; - - // Where available, offsetWidth/offsetHeight approximate border box dimensions. - // Where not available (e.g., SVG), assume unreliable box-sizing and interpret the - // retrieved value as a content box dimension. - valueIsBorderBox = offsetProp in elem; - if ( valueIsBorderBox ) { - val = elem[ offsetProp ]; - } - } - - // Normalize "" and auto - val = parseFloat( val ) || 0; - - // Adjust for the element's box model - return ( val + - boxModelAdjustment( - elem, - dimension, - extra || ( isBorderBox ? "border" : "content" ), - valueIsBorderBox, - styles, - - // Provide the current computed size to request scroll gutter calculation (gh-3589) - val - ) - ) + "px"; -} - -jQuery.extend( { - - // Add in style property hooks for overriding the default - // behavior of getting and setting a style property - cssHooks: { - opacity: { - get: function( elem, computed ) { - if ( computed ) { - - // We should always get a number back from opacity - var ret = curCSS( elem, "opacity" ); - return ret === "" ? "1" : ret; - } - } - } - }, - - // Don't automatically add "px" to these possibly-unitless properties - cssNumber: { - "animationIterationCount": true, - "columnCount": true, - "fillOpacity": true, - "flexGrow": true, - "flexShrink": true, - "fontWeight": true, - "gridArea": true, - "gridColumn": true, - "gridColumnEnd": true, - "gridColumnStart": true, - "gridRow": true, - "gridRowEnd": true, - "gridRowStart": true, - "lineHeight": true, - "opacity": true, - "order": true, - "orphans": true, - "widows": true, - "zIndex": true, - "zoom": true - }, - - // Add in properties whose names you wish to fix before - // setting or getting the value - cssProps: {}, - - // Get and set the style property on a DOM Node - style: function( elem, name, value, extra ) { - - // Don't set styles on text and comment nodes - if ( !elem || elem.nodeType === 3 || elem.nodeType === 8 || !elem.style ) { - return; - } - - // Make sure that we're working with the right name - var ret, type, hooks, - origName = camelCase( name ), - isCustomProp = rcustomProp.test( name ), - style = elem.style; - - // Make sure that we're working with the right name. We don't - // want to query the value if it is a CSS custom property - // since they are user-defined. - if ( !isCustomProp ) { - name = finalPropName( origName ); - } - - // Gets hook for the prefixed version, then unprefixed version - hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ]; - - // Check if we're setting a value - if ( value !== undefined ) { - type = typeof value; - - // Convert "+=" or "-=" to relative numbers (#7345) - if ( type === "string" && ( ret = rcssNum.exec( value ) ) && ret[ 1 ] ) { - value = adjustCSS( elem, name, ret ); - - // Fixes bug #9237 - type = "number"; - } - - // Make sure that null and NaN values aren't set (#7116) - if ( value == null || value !== value ) { - return; - } - - // If a number was passed in, add the unit (except for certain CSS properties) - // The isCustomProp check can be removed in jQuery 4.0 when we only auto-append - // "px" to a few hardcoded values. - if ( type === "number" && !isCustomProp ) { - value += ret && ret[ 3 ] || ( jQuery.cssNumber[ origName ] ? "" : "px" ); - } - - // background-* props affect original clone's values - if ( !support.clearCloneStyle && value === "" && name.indexOf( "background" ) === 0 ) { - style[ name ] = "inherit"; - } - - // If a hook was provided, use that value, otherwise just set the specified value - if ( !hooks || !( "set" in hooks ) || - ( value = hooks.set( elem, value, extra ) ) !== undefined ) { - - if ( isCustomProp ) { - style.setProperty( name, value ); - } else { - style[ name ] = value; - } - } - - } else { - - // If a hook was provided get the non-computed value from there - if ( hooks && "get" in hooks && - ( ret = hooks.get( elem, false, extra ) ) !== undefined ) { - - return ret; - } - - // Otherwise just get the value from the style object - return style[ name ]; - } - }, - - css: function( elem, name, extra, styles ) { - var val, num, hooks, - origName = camelCase( name ), - isCustomProp = rcustomProp.test( name ); - - // Make sure that we're working with the right name. We don't - // want to modify the value if it is a CSS custom property - // since they are user-defined. - if ( !isCustomProp ) { - name = finalPropName( origName ); - } - - // Try prefixed name followed by the unprefixed name - hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ]; - - // If a hook was provided get the computed value from there - if ( hooks && "get" in hooks ) { - val = hooks.get( elem, true, extra ); - } - - // Otherwise, if a way to get the computed value exists, use that - if ( val === undefined ) { - val = curCSS( elem, name, styles ); - } - - // Convert "normal" to computed value - if ( val === "normal" && name in cssNormalTransform ) { - val = cssNormalTransform[ name ]; - } - - // Make numeric if forced or a qualifier was provided and val looks numeric - if ( extra === "" || extra ) { - num = parseFloat( val ); - return extra === true || isFinite( num ) ? num || 0 : val; - } - - return val; - } -} ); - -jQuery.each( [ "height", "width" ], function( _i, dimension ) { - jQuery.cssHooks[ dimension ] = { - get: function( elem, computed, extra ) { - if ( computed ) { - - // Certain elements can have dimension info if we invisibly show them - // but it must have a current display style that would benefit - return rdisplayswap.test( jQuery.css( elem, "display" ) ) && - - // Support: Safari 8+ - // Table columns in Safari have non-zero offsetWidth & zero - // getBoundingClientRect().width unless display is changed. - // Support: IE <=11 only - // Running getBoundingClientRect on a disconnected node - // in IE throws an error. - ( !elem.getClientRects().length || !elem.getBoundingClientRect().width ) ? - swap( elem, cssShow, function() { - return getWidthOrHeight( elem, dimension, extra ); - } ) : - getWidthOrHeight( elem, dimension, extra ); - } - }, - - set: function( elem, value, extra ) { - var matches, - styles = getStyles( elem ), - - // Only read styles.position if the test has a chance to fail - // to avoid forcing a reflow. - scrollboxSizeBuggy = !support.scrollboxSize() && - styles.position === "absolute", - - // To avoid forcing a reflow, only fetch boxSizing if we need it (gh-3991) - boxSizingNeeded = scrollboxSizeBuggy || extra, - isBorderBox = boxSizingNeeded && - jQuery.css( elem, "boxSizing", false, styles ) === "border-box", - subtract = extra ? - boxModelAdjustment( - elem, - dimension, - extra, - isBorderBox, - styles - ) : - 0; - - // Account for unreliable border-box dimensions by comparing offset* to computed and - // faking a content-box to get border and padding (gh-3699) - if ( isBorderBox && scrollboxSizeBuggy ) { - subtract -= Math.ceil( - elem[ "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ) ] - - parseFloat( styles[ dimension ] ) - - boxModelAdjustment( elem, dimension, "border", false, styles ) - - 0.5 - ); - } - - // Convert to pixels if value adjustment is needed - if ( subtract && ( matches = rcssNum.exec( value ) ) && - ( matches[ 3 ] || "px" ) !== "px" ) { - - elem.style[ dimension ] = value; - value = jQuery.css( elem, dimension ); - } - - return setPositiveNumber( elem, value, subtract ); - } - }; -} ); - -jQuery.cssHooks.marginLeft = addGetHookIf( support.reliableMarginLeft, - function( elem, computed ) { - if ( computed ) { - return ( parseFloat( curCSS( elem, "marginLeft" ) ) || - elem.getBoundingClientRect().left - - swap( elem, { marginLeft: 0 }, function() { - return elem.getBoundingClientRect().left; - } ) - ) + "px"; - } - } -); - -// These hooks are used by animate to expand properties -jQuery.each( { - margin: "", - padding: "", - border: "Width" -}, function( prefix, suffix ) { - jQuery.cssHooks[ prefix + suffix ] = { - expand: function( value ) { - var i = 0, - expanded = {}, - - // Assumes a single number if not a string - parts = typeof value === "string" ? value.split( " " ) : [ value ]; - - for ( ; i < 4; i++ ) { - expanded[ prefix + cssExpand[ i ] + suffix ] = - parts[ i ] || parts[ i - 2 ] || parts[ 0 ]; - } - - return expanded; - } - }; - - if ( prefix !== "margin" ) { - jQuery.cssHooks[ prefix + suffix ].set = setPositiveNumber; - } -} ); - -jQuery.fn.extend( { - css: function( name, value ) { - return access( this, function( elem, name, value ) { - var styles, len, - map = {}, - i = 0; - - if ( Array.isArray( name ) ) { - styles = getStyles( elem ); - len = name.length; - - for ( ; i < len; i++ ) { - map[ name[ i ] ] = jQuery.css( elem, name[ i ], false, styles ); - } - - return map; - } - - return value !== undefined ? - jQuery.style( elem, name, value ) : - jQuery.css( elem, name ); - }, name, value, arguments.length > 1 ); - } -} ); - - -function Tween( elem, options, prop, end, easing ) { - return new Tween.prototype.init( elem, options, prop, end, easing ); -} -jQuery.Tween = Tween; - -Tween.prototype = { - constructor: Tween, - init: function( elem, options, prop, end, easing, unit ) { - this.elem = elem; - this.prop = prop; - this.easing = easing || jQuery.easing._default; - this.options = options; - this.start = this.now = this.cur(); - this.end = end; - this.unit = unit || ( jQuery.cssNumber[ prop ] ? "" : "px" ); - }, - cur: function() { - var hooks = Tween.propHooks[ this.prop ]; - - return hooks && hooks.get ? - hooks.get( this ) : - Tween.propHooks._default.get( this ); - }, - run: function( percent ) { - var eased, - hooks = Tween.propHooks[ this.prop ]; - - if ( this.options.duration ) { - this.pos = eased = jQuery.easing[ this.easing ]( - percent, this.options.duration * percent, 0, 1, this.options.duration - ); - } else { - this.pos = eased = percent; - } - this.now = ( this.end - this.start ) * eased + this.start; - - if ( this.options.step ) { - this.options.step.call( this.elem, this.now, this ); - } - - if ( hooks && hooks.set ) { - hooks.set( this ); - } else { - Tween.propHooks._default.set( this ); - } - return this; - } -}; - -Tween.prototype.init.prototype = Tween.prototype; - -Tween.propHooks = { - _default: { - get: function( tween ) { - var result; - - // Use a property on the element directly when it is not a DOM element, - // or when there is no matching style property that exists. - if ( tween.elem.nodeType !== 1 || - tween.elem[ tween.prop ] != null && tween.elem.style[ tween.prop ] == null ) { - return tween.elem[ tween.prop ]; - } - - // Passing an empty string as a 3rd parameter to .css will automatically - // attempt a parseFloat and fallback to a string if the parse fails. - // Simple values such as "10px" are parsed to Float; - // complex values such as "rotate(1rad)" are returned as-is. - result = jQuery.css( tween.elem, tween.prop, "" ); - - // Empty strings, null, undefined and "auto" are converted to 0. - return !result || result === "auto" ? 0 : result; - }, - set: function( tween ) { - - // Use step hook for back compat. - // Use cssHook if its there. - // Use .style if available and use plain properties where available. - if ( jQuery.fx.step[ tween.prop ] ) { - jQuery.fx.step[ tween.prop ]( tween ); - } else if ( tween.elem.nodeType === 1 && ( - jQuery.cssHooks[ tween.prop ] || - tween.elem.style[ finalPropName( tween.prop ) ] != null ) ) { - jQuery.style( tween.elem, tween.prop, tween.now + tween.unit ); - } else { - tween.elem[ tween.prop ] = tween.now; - } - } - } -}; - -// Support: IE <=9 only -// Panic based approach to setting things on disconnected nodes -Tween.propHooks.scrollTop = Tween.propHooks.scrollLeft = { - set: function( tween ) { - if ( tween.elem.nodeType && tween.elem.parentNode ) { - tween.elem[ tween.prop ] = tween.now; - } - } -}; - -jQuery.easing = { - linear: function( p ) { - return p; - }, - swing: function( p ) { - return 0.5 - Math.cos( p * Math.PI ) / 2; - }, - _default: "swing" -}; - -jQuery.fx = Tween.prototype.init; - -// Back compat <1.8 extension point -jQuery.fx.step = {}; - - - - -var - fxNow, inProgress, - rfxtypes = /^(?:toggle|show|hide)$/, - rrun = /queueHooks$/; - -function schedule() { - if ( inProgress ) { - if ( document.hidden === false && window.requestAnimationFrame ) { - window.requestAnimationFrame( schedule ); - } else { - window.setTimeout( schedule, jQuery.fx.interval ); - } - - jQuery.fx.tick(); - } -} - -// Animations created synchronously will run synchronously -function createFxNow() { - window.setTimeout( function() { - fxNow = undefined; - } ); - return ( fxNow = Date.now() ); -} - -// Generate parameters to create a standard animation -function genFx( type, includeWidth ) { - var which, - i = 0, - attrs = { height: type }; - - // If we include width, step value is 1 to do all cssExpand values, - // otherwise step value is 2 to skip over Left and Right - includeWidth = includeWidth ? 1 : 0; - for ( ; i < 4; i += 2 - includeWidth ) { - which = cssExpand[ i ]; - attrs[ "margin" + which ] = attrs[ "padding" + which ] = type; - } - - if ( includeWidth ) { - attrs.opacity = attrs.width = type; - } - - return attrs; -} - -function createTween( value, prop, animation ) { - var tween, - collection = ( Animation.tweeners[ prop ] || [] ).concat( Animation.tweeners[ "*" ] ), - index = 0, - length = collection.length; - for ( ; index < length; index++ ) { - if ( ( tween = collection[ index ].call( animation, prop, value ) ) ) { - - // We're done with this property - return tween; - } - } -} - -function defaultPrefilter( elem, props, opts ) { - var prop, value, toggle, hooks, oldfire, propTween, restoreDisplay, display, - isBox = "width" in props || "height" in props, - anim = this, - orig = {}, - style = elem.style, - hidden = elem.nodeType && isHiddenWithinTree( elem ), - dataShow = dataPriv.get( elem, "fxshow" ); - - // Queue-skipping animations hijack the fx hooks - if ( !opts.queue ) { - hooks = jQuery._queueHooks( elem, "fx" ); - if ( hooks.unqueued == null ) { - hooks.unqueued = 0; - oldfire = hooks.empty.fire; - hooks.empty.fire = function() { - if ( !hooks.unqueued ) { - oldfire(); - } - }; - } - hooks.unqueued++; - - anim.always( function() { - - // Ensure the complete handler is called before this completes - anim.always( function() { - hooks.unqueued--; - if ( !jQuery.queue( elem, "fx" ).length ) { - hooks.empty.fire(); - } - } ); - } ); - } - - // Detect show/hide animations - for ( prop in props ) { - value = props[ prop ]; - if ( rfxtypes.test( value ) ) { - delete props[ prop ]; - toggle = toggle || value === "toggle"; - if ( value === ( hidden ? "hide" : "show" ) ) { - - // Pretend to be hidden if this is a "show" and - // there is still data from a stopped show/hide - if ( value === "show" && dataShow && dataShow[ prop ] !== undefined ) { - hidden = true; - - // Ignore all other no-op show/hide data - } else { - continue; - } - } - orig[ prop ] = dataShow && dataShow[ prop ] || jQuery.style( elem, prop ); - } - } - - // Bail out if this is a no-op like .hide().hide() - propTween = !jQuery.isEmptyObject( props ); - if ( !propTween && jQuery.isEmptyObject( orig ) ) { - return; - } - - // Restrict "overflow" and "display" styles during box animations - if ( isBox && elem.nodeType === 1 ) { - - // Support: IE <=9 - 11, Edge 12 - 15 - // Record all 3 overflow attributes because IE does not infer the shorthand - // from identically-valued overflowX and overflowY and Edge just mirrors - // the overflowX value there. - opts.overflow = [ style.overflow, style.overflowX, style.overflowY ]; - - // Identify a display type, preferring old show/hide data over the CSS cascade - restoreDisplay = dataShow && dataShow.display; - if ( restoreDisplay == null ) { - restoreDisplay = dataPriv.get( elem, "display" ); - } - display = jQuery.css( elem, "display" ); - if ( display === "none" ) { - if ( restoreDisplay ) { - display = restoreDisplay; - } else { - - // Get nonempty value(s) by temporarily forcing visibility - showHide( [ elem ], true ); - restoreDisplay = elem.style.display || restoreDisplay; - display = jQuery.css( elem, "display" ); - showHide( [ elem ] ); - } - } - - // Animate inline elements as inline-block - if ( display === "inline" || display === "inline-block" && restoreDisplay != null ) { - if ( jQuery.css( elem, "float" ) === "none" ) { - - // Restore the original display value at the end of pure show/hide animations - if ( !propTween ) { - anim.done( function() { - style.display = restoreDisplay; - } ); - if ( restoreDisplay == null ) { - display = style.display; - restoreDisplay = display === "none" ? "" : display; - } - } - style.display = "inline-block"; - } - } - } - - if ( opts.overflow ) { - style.overflow = "hidden"; - anim.always( function() { - style.overflow = opts.overflow[ 0 ]; - style.overflowX = opts.overflow[ 1 ]; - style.overflowY = opts.overflow[ 2 ]; - } ); - } - - // Implement show/hide animations - propTween = false; - for ( prop in orig ) { - - // General show/hide setup for this element animation - if ( !propTween ) { - if ( dataShow ) { - if ( "hidden" in dataShow ) { - hidden = dataShow.hidden; - } - } else { - dataShow = dataPriv.access( elem, "fxshow", { display: restoreDisplay } ); - } - - // Store hidden/visible for toggle so `.stop().toggle()` "reverses" - if ( toggle ) { - dataShow.hidden = !hidden; - } - - // Show elements before animating them - if ( hidden ) { - showHide( [ elem ], true ); - } - - /* eslint-disable no-loop-func */ - - anim.done( function() { - - /* eslint-enable no-loop-func */ - - // The final step of a "hide" animation is actually hiding the element - if ( !hidden ) { - showHide( [ elem ] ); - } - dataPriv.remove( elem, "fxshow" ); - for ( prop in orig ) { - jQuery.style( elem, prop, orig[ prop ] ); - } - } ); - } - - // Per-property setup - propTween = createTween( hidden ? dataShow[ prop ] : 0, prop, anim ); - if ( !( prop in dataShow ) ) { - dataShow[ prop ] = propTween.start; - if ( hidden ) { - propTween.end = propTween.start; - propTween.start = 0; - } - } - } -} - -function propFilter( props, specialEasing ) { - var index, name, easing, value, hooks; - - // camelCase, specialEasing and expand cssHook pass - for ( index in props ) { - name = camelCase( index ); - easing = specialEasing[ name ]; - value = props[ index ]; - if ( Array.isArray( value ) ) { - easing = value[ 1 ]; - value = props[ index ] = value[ 0 ]; - } - - if ( index !== name ) { - props[ name ] = value; - delete props[ index ]; - } - - hooks = jQuery.cssHooks[ name ]; - if ( hooks && "expand" in hooks ) { - value = hooks.expand( value ); - delete props[ name ]; - - // Not quite $.extend, this won't overwrite existing keys. - // Reusing 'index' because we have the correct "name" - for ( index in value ) { - if ( !( index in props ) ) { - props[ index ] = value[ index ]; - specialEasing[ index ] = easing; - } - } - } else { - specialEasing[ name ] = easing; - } - } -} - -function Animation( elem, properties, options ) { - var result, - stopped, - index = 0, - length = Animation.prefilters.length, - deferred = jQuery.Deferred().always( function() { - - // Don't match elem in the :animated selector - delete tick.elem; - } ), - tick = function() { - if ( stopped ) { - return false; - } - var currentTime = fxNow || createFxNow(), - remaining = Math.max( 0, animation.startTime + animation.duration - currentTime ), - - // Support: Android 2.3 only - // Archaic crash bug won't allow us to use `1 - ( 0.5 || 0 )` (#12497) - temp = remaining / animation.duration || 0, - percent = 1 - temp, - index = 0, - length = animation.tweens.length; - - for ( ; index < length; index++ ) { - animation.tweens[ index ].run( percent ); - } - - deferred.notifyWith( elem, [ animation, percent, remaining ] ); - - // If there's more to do, yield - if ( percent < 1 && length ) { - return remaining; - } - - // If this was an empty animation, synthesize a final progress notification - if ( !length ) { - deferred.notifyWith( elem, [ animation, 1, 0 ] ); - } - - // Resolve the animation and report its conclusion - deferred.resolveWith( elem, [ animation ] ); - return false; - }, - animation = deferred.promise( { - elem: elem, - props: jQuery.extend( {}, properties ), - opts: jQuery.extend( true, { - specialEasing: {}, - easing: jQuery.easing._default - }, options ), - originalProperties: properties, - originalOptions: options, - startTime: fxNow || createFxNow(), - duration: options.duration, - tweens: [], - createTween: function( prop, end ) { - var tween = jQuery.Tween( elem, animation.opts, prop, end, - animation.opts.specialEasing[ prop ] || animation.opts.easing ); - animation.tweens.push( tween ); - return tween; - }, - stop: function( gotoEnd ) { - var index = 0, - - // If we are going to the end, we want to run all the tweens - // otherwise we skip this part - length = gotoEnd ? animation.tweens.length : 0; - if ( stopped ) { - return this; - } - stopped = true; - for ( ; index < length; index++ ) { - animation.tweens[ index ].run( 1 ); - } - - // Resolve when we played the last frame; otherwise, reject - if ( gotoEnd ) { - deferred.notifyWith( elem, [ animation, 1, 0 ] ); - deferred.resolveWith( elem, [ animation, gotoEnd ] ); - } else { - deferred.rejectWith( elem, [ animation, gotoEnd ] ); - } - return this; - } - } ), - props = animation.props; - - propFilter( props, animation.opts.specialEasing ); - - for ( ; index < length; index++ ) { - result = Animation.prefilters[ index ].call( animation, elem, props, animation.opts ); - if ( result ) { - if ( isFunction( result.stop ) ) { - jQuery._queueHooks( animation.elem, animation.opts.queue ).stop = - result.stop.bind( result ); - } - return result; - } - } - - jQuery.map( props, createTween, animation ); - - if ( isFunction( animation.opts.start ) ) { - animation.opts.start.call( elem, animation ); - } - - // Attach callbacks from options - animation - .progress( animation.opts.progress ) - .done( animation.opts.done, animation.opts.complete ) - .fail( animation.opts.fail ) - .always( animation.opts.always ); - - jQuery.fx.timer( - jQuery.extend( tick, { - elem: elem, - anim: animation, - queue: animation.opts.queue - } ) - ); - - return animation; -} - -jQuery.Animation = jQuery.extend( Animation, { - - tweeners: { - "*": [ function( prop, value ) { - var tween = this.createTween( prop, value ); - adjustCSS( tween.elem, prop, rcssNum.exec( value ), tween ); - return tween; - } ] - }, - - tweener: function( props, callback ) { - if ( isFunction( props ) ) { - callback = props; - props = [ "*" ]; - } else { - props = props.match( rnothtmlwhite ); - } - - var prop, - index = 0, - length = props.length; - - for ( ; index < length; index++ ) { - prop = props[ index ]; - Animation.tweeners[ prop ] = Animation.tweeners[ prop ] || []; - Animation.tweeners[ prop ].unshift( callback ); - } - }, - - prefilters: [ defaultPrefilter ], - - prefilter: function( callback, prepend ) { - if ( prepend ) { - Animation.prefilters.unshift( callback ); - } else { - Animation.prefilters.push( callback ); - } - } -} ); - -jQuery.speed = function( speed, easing, fn ) { - var opt = speed && typeof speed === "object" ? jQuery.extend( {}, speed ) : { - complete: fn || !fn && easing || - isFunction( speed ) && speed, - duration: speed, - easing: fn && easing || easing && !isFunction( easing ) && easing - }; - - // Go to the end state if fx are off - if ( jQuery.fx.off ) { - opt.duration = 0; - - } else { - if ( typeof opt.duration !== "number" ) { - if ( opt.duration in jQuery.fx.speeds ) { - opt.duration = jQuery.fx.speeds[ opt.duration ]; - - } else { - opt.duration = jQuery.fx.speeds._default; - } - } - } - - // Normalize opt.queue - true/undefined/null -> "fx" - if ( opt.queue == null || opt.queue === true ) { - opt.queue = "fx"; - } - - // Queueing - opt.old = opt.complete; - - opt.complete = function() { - if ( isFunction( opt.old ) ) { - opt.old.call( this ); - } - - if ( opt.queue ) { - jQuery.dequeue( this, opt.queue ); - } - }; - - return opt; -}; - -jQuery.fn.extend( { - fadeTo: function( speed, to, easing, callback ) { - - // Show any hidden elements after setting opacity to 0 - return this.filter( isHiddenWithinTree ).css( "opacity", 0 ).show() - - // Animate to the value specified - .end().animate( { opacity: to }, speed, easing, callback ); - }, - animate: function( prop, speed, easing, callback ) { - var empty = jQuery.isEmptyObject( prop ), - optall = jQuery.speed( speed, easing, callback ), - doAnimation = function() { - - // Operate on a copy of prop so per-property easing won't be lost - var anim = Animation( this, jQuery.extend( {}, prop ), optall ); - - // Empty animations, or finishing resolves immediately - if ( empty || dataPriv.get( this, "finish" ) ) { - anim.stop( true ); - } - }; - - doAnimation.finish = doAnimation; - - return empty || optall.queue === false ? - this.each( doAnimation ) : - this.queue( optall.queue, doAnimation ); - }, - stop: function( type, clearQueue, gotoEnd ) { - var stopQueue = function( hooks ) { - var stop = hooks.stop; - delete hooks.stop; - stop( gotoEnd ); - }; - - if ( typeof type !== "string" ) { - gotoEnd = clearQueue; - clearQueue = type; - type = undefined; - } - if ( clearQueue ) { - this.queue( type || "fx", [] ); - } - - return this.each( function() { - var dequeue = true, - index = type != null && type + "queueHooks", - timers = jQuery.timers, - data = dataPriv.get( this ); - - if ( index ) { - if ( data[ index ] && data[ index ].stop ) { - stopQueue( data[ index ] ); - } - } else { - for ( index in data ) { - if ( data[ index ] && data[ index ].stop && rrun.test( index ) ) { - stopQueue( data[ index ] ); - } - } - } - - for ( index = timers.length; index--; ) { - if ( timers[ index ].elem === this && - ( type == null || timers[ index ].queue === type ) ) { - - timers[ index ].anim.stop( gotoEnd ); - dequeue = false; - timers.splice( index, 1 ); - } - } - - // Start the next in the queue if the last step wasn't forced. - // Timers currently will call their complete callbacks, which - // will dequeue but only if they were gotoEnd. - if ( dequeue || !gotoEnd ) { - jQuery.dequeue( this, type ); - } - } ); - }, - finish: function( type ) { - if ( type !== false ) { - type = type || "fx"; - } - return this.each( function() { - var index, - data = dataPriv.get( this ), - queue = data[ type + "queue" ], - hooks = data[ type + "queueHooks" ], - timers = jQuery.timers, - length = queue ? queue.length : 0; - - // Enable finishing flag on private data - data.finish = true; - - // Empty the queue first - jQuery.queue( this, type, [] ); - - if ( hooks && hooks.stop ) { - hooks.stop.call( this, true ); - } - - // Look for any active animations, and finish them - for ( index = timers.length; index--; ) { - if ( timers[ index ].elem === this && timers[ index ].queue === type ) { - timers[ index ].anim.stop( true ); - timers.splice( index, 1 ); - } - } - - // Look for any animations in the old queue and finish them - for ( index = 0; index < length; index++ ) { - if ( queue[ index ] && queue[ index ].finish ) { - queue[ index ].finish.call( this ); - } - } - - // Turn off finishing flag - delete data.finish; - } ); - } -} ); - -jQuery.each( [ "toggle", "show", "hide" ], function( _i, name ) { - var cssFn = jQuery.fn[ name ]; - jQuery.fn[ name ] = function( speed, easing, callback ) { - return speed == null || typeof speed === "boolean" ? - cssFn.apply( this, arguments ) : - this.animate( genFx( name, true ), speed, easing, callback ); - }; -} ); - -// Generate shortcuts for custom animations -jQuery.each( { - slideDown: genFx( "show" ), - slideUp: genFx( "hide" ), - slideToggle: genFx( "toggle" ), - fadeIn: { opacity: "show" }, - fadeOut: { opacity: "hide" }, - fadeToggle: { opacity: "toggle" } -}, function( name, props ) { - jQuery.fn[ name ] = function( speed, easing, callback ) { - return this.animate( props, speed, easing, callback ); - }; -} ); - -jQuery.timers = []; -jQuery.fx.tick = function() { - var timer, - i = 0, - timers = jQuery.timers; - - fxNow = Date.now(); - - for ( ; i < timers.length; i++ ) { - timer = timers[ i ]; - - // Run the timer and safely remove it when done (allowing for external removal) - if ( !timer() && timers[ i ] === timer ) { - timers.splice( i--, 1 ); - } - } - - if ( !timers.length ) { - jQuery.fx.stop(); - } - fxNow = undefined; -}; - -jQuery.fx.timer = function( timer ) { - jQuery.timers.push( timer ); - jQuery.fx.start(); -}; - -jQuery.fx.interval = 13; -jQuery.fx.start = function() { - if ( inProgress ) { - return; - } - - inProgress = true; - schedule(); -}; - -jQuery.fx.stop = function() { - inProgress = null; -}; - -jQuery.fx.speeds = { - slow: 600, - fast: 200, - - // Default speed - _default: 400 -}; - - -// Based off of the plugin by Clint Helfers, with permission. -// https://web.archive.org/web/20100324014747/http://blindsignals.com/index.php/2009/07/jquery-delay/ -jQuery.fn.delay = function( time, type ) { - time = jQuery.fx ? jQuery.fx.speeds[ time ] || time : time; - type = type || "fx"; - - return this.queue( type, function( next, hooks ) { - var timeout = window.setTimeout( next, time ); - hooks.stop = function() { - window.clearTimeout( timeout ); - }; - } ); -}; - - -( function() { - var input = document.createElement( "input" ), - select = document.createElement( "select" ), - opt = select.appendChild( document.createElement( "option" ) ); - - input.type = "checkbox"; - - // Support: Android <=4.3 only - // Default value for a checkbox should be "on" - support.checkOn = input.value !== ""; - - // Support: IE <=11 only - // Must access selectedIndex to make default options select - support.optSelected = opt.selected; - - // Support: IE <=11 only - // An input loses its value after becoming a radio - input = document.createElement( "input" ); - input.value = "t"; - input.type = "radio"; - support.radioValue = input.value === "t"; -} )(); - - -var boolHook, - attrHandle = jQuery.expr.attrHandle; - -jQuery.fn.extend( { - attr: function( name, value ) { - return access( this, jQuery.attr, name, value, arguments.length > 1 ); - }, - - removeAttr: function( name ) { - return this.each( function() { - jQuery.removeAttr( this, name ); - } ); - } -} ); - -jQuery.extend( { - attr: function( elem, name, value ) { - var ret, hooks, - nType = elem.nodeType; - - // Don't get/set attributes on text, comment and attribute nodes - if ( nType === 3 || nType === 8 || nType === 2 ) { - return; - } - - // Fallback to prop when attributes are not supported - if ( typeof elem.getAttribute === "undefined" ) { - return jQuery.prop( elem, name, value ); - } - - // Attribute hooks are determined by the lowercase version - // Grab necessary hook if one is defined - if ( nType !== 1 || !jQuery.isXMLDoc( elem ) ) { - hooks = jQuery.attrHooks[ name.toLowerCase() ] || - ( jQuery.expr.match.bool.test( name ) ? boolHook : undefined ); - } - - if ( value !== undefined ) { - if ( value === null ) { - jQuery.removeAttr( elem, name ); - return; - } - - if ( hooks && "set" in hooks && - ( ret = hooks.set( elem, value, name ) ) !== undefined ) { - return ret; - } - - elem.setAttribute( name, value + "" ); - return value; - } - - if ( hooks && "get" in hooks && ( ret = hooks.get( elem, name ) ) !== null ) { - return ret; - } - - ret = jQuery.find.attr( elem, name ); - - // Non-existent attributes return null, we normalize to undefined - return ret == null ? undefined : ret; - }, - - attrHooks: { - type: { - set: function( elem, value ) { - if ( !support.radioValue && value === "radio" && - nodeName( elem, "input" ) ) { - var val = elem.value; - elem.setAttribute( "type", value ); - if ( val ) { - elem.value = val; - } - return value; - } - } - } - }, - - removeAttr: function( elem, value ) { - var name, - i = 0, - - // Attribute names can contain non-HTML whitespace characters - // https://html.spec.whatwg.org/multipage/syntax.html#attributes-2 - attrNames = value && value.match( rnothtmlwhite ); - - if ( attrNames && elem.nodeType === 1 ) { - while ( ( name = attrNames[ i++ ] ) ) { - elem.removeAttribute( name ); - } - } - } -} ); - -// Hooks for boolean attributes -boolHook = { - set: function( elem, value, name ) { - if ( value === false ) { - - // Remove boolean attributes when set to false - jQuery.removeAttr( elem, name ); - } else { - elem.setAttribute( name, name ); - } - return name; - } -}; - -jQuery.each( jQuery.expr.match.bool.source.match( /\w+/g ), function( _i, name ) { - var getter = attrHandle[ name ] || jQuery.find.attr; - - attrHandle[ name ] = function( elem, name, isXML ) { - var ret, handle, - lowercaseName = name.toLowerCase(); - - if ( !isXML ) { - - // Avoid an infinite loop by temporarily removing this function from the getter - handle = attrHandle[ lowercaseName ]; - attrHandle[ lowercaseName ] = ret; - ret = getter( elem, name, isXML ) != null ? - lowercaseName : - null; - attrHandle[ lowercaseName ] = handle; - } - return ret; - }; -} ); - - - - -var rfocusable = /^(?:input|select|textarea|button)$/i, - rclickable = /^(?:a|area)$/i; - -jQuery.fn.extend( { - prop: function( name, value ) { - return access( this, jQuery.prop, name, value, arguments.length > 1 ); - }, - - removeProp: function( name ) { - return this.each( function() { - delete this[ jQuery.propFix[ name ] || name ]; - } ); - } -} ); - -jQuery.extend( { - prop: function( elem, name, value ) { - var ret, hooks, - nType = elem.nodeType; - - // Don't get/set properties on text, comment and attribute nodes - if ( nType === 3 || nType === 8 || nType === 2 ) { - return; - } - - if ( nType !== 1 || !jQuery.isXMLDoc( elem ) ) { - - // Fix name and attach hooks - name = jQuery.propFix[ name ] || name; - hooks = jQuery.propHooks[ name ]; - } - - if ( value !== undefined ) { - if ( hooks && "set" in hooks && - ( ret = hooks.set( elem, value, name ) ) !== undefined ) { - return ret; - } - - return ( elem[ name ] = value ); - } - - if ( hooks && "get" in hooks && ( ret = hooks.get( elem, name ) ) !== null ) { - return ret; - } - - return elem[ name ]; - }, - - propHooks: { - tabIndex: { - get: function( elem ) { - - // Support: IE <=9 - 11 only - // elem.tabIndex doesn't always return the - // correct value when it hasn't been explicitly set - // https://web.archive.org/web/20141116233347/http://fluidproject.org/blog/2008/01/09/getting-setting-and-removing-tabindex-values-with-javascript/ - // Use proper attribute retrieval(#12072) - var tabindex = jQuery.find.attr( elem, "tabindex" ); - - if ( tabindex ) { - return parseInt( tabindex, 10 ); - } - - if ( - rfocusable.test( elem.nodeName ) || - rclickable.test( elem.nodeName ) && - elem.href - ) { - return 0; - } - - return -1; - } - } - }, - - propFix: { - "for": "htmlFor", - "class": "className" - } -} ); - -// Support: IE <=11 only -// Accessing the selectedIndex property -// forces the browser to respect setting selected -// on the option -// The getter ensures a default option is selected -// when in an optgroup -// eslint rule "no-unused-expressions" is disabled for this code -// since it considers such accessions noop -if ( !support.optSelected ) { - jQuery.propHooks.selected = { - get: function( elem ) { - - /* eslint no-unused-expressions: "off" */ - - var parent = elem.parentNode; - if ( parent && parent.parentNode ) { - parent.parentNode.selectedIndex; - } - return null; - }, - set: function( elem ) { - - /* eslint no-unused-expressions: "off" */ - - var parent = elem.parentNode; - if ( parent ) { - parent.selectedIndex; - - if ( parent.parentNode ) { - parent.parentNode.selectedIndex; - } - } - } - }; -} - -jQuery.each( [ - "tabIndex", - "readOnly", - "maxLength", - "cellSpacing", - "cellPadding", - "rowSpan", - "colSpan", - "useMap", - "frameBorder", - "contentEditable" -], function() { - jQuery.propFix[ this.toLowerCase() ] = this; -} ); - - - - - // Strip and collapse whitespace according to HTML spec - // https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace - function stripAndCollapse( value ) { - var tokens = value.match( rnothtmlwhite ) || []; - return tokens.join( " " ); - } - - -function getClass( elem ) { - return elem.getAttribute && elem.getAttribute( "class" ) || ""; -} - -function classesToArray( value ) { - if ( Array.isArray( value ) ) { - return value; - } - if ( typeof value === "string" ) { - return value.match( rnothtmlwhite ) || []; - } - return []; -} - -jQuery.fn.extend( { - addClass: function( value ) { - var classes, elem, cur, curValue, clazz, j, finalValue, - i = 0; - - if ( isFunction( value ) ) { - return this.each( function( j ) { - jQuery( this ).addClass( value.call( this, j, getClass( this ) ) ); - } ); - } - - classes = classesToArray( value ); - - if ( classes.length ) { - while ( ( elem = this[ i++ ] ) ) { - curValue = getClass( elem ); - cur = elem.nodeType === 1 && ( " " + stripAndCollapse( curValue ) + " " ); - - if ( cur ) { - j = 0; - while ( ( clazz = classes[ j++ ] ) ) { - if ( cur.indexOf( " " + clazz + " " ) < 0 ) { - cur += clazz + " "; - } - } - - // Only assign if different to avoid unneeded rendering. - finalValue = stripAndCollapse( cur ); - if ( curValue !== finalValue ) { - elem.setAttribute( "class", finalValue ); - } - } - } - } - - return this; - }, - - removeClass: function( value ) { - var classes, elem, cur, curValue, clazz, j, finalValue, - i = 0; - - if ( isFunction( value ) ) { - return this.each( function( j ) { - jQuery( this ).removeClass( value.call( this, j, getClass( this ) ) ); - } ); - } - - if ( !arguments.length ) { - return this.attr( "class", "" ); - } - - classes = classesToArray( value ); - - if ( classes.length ) { - while ( ( elem = this[ i++ ] ) ) { - curValue = getClass( elem ); - - // This expression is here for better compressibility (see addClass) - cur = elem.nodeType === 1 && ( " " + stripAndCollapse( curValue ) + " " ); - - if ( cur ) { - j = 0; - while ( ( clazz = classes[ j++ ] ) ) { - - // Remove *all* instances - while ( cur.indexOf( " " + clazz + " " ) > -1 ) { - cur = cur.replace( " " + clazz + " ", " " ); - } - } - - // Only assign if different to avoid unneeded rendering. - finalValue = stripAndCollapse( cur ); - if ( curValue !== finalValue ) { - elem.setAttribute( "class", finalValue ); - } - } - } - } - - return this; - }, - - toggleClass: function( value, stateVal ) { - var type = typeof value, - isValidValue = type === "string" || Array.isArray( value ); - - if ( typeof stateVal === "boolean" && isValidValue ) { - return stateVal ? this.addClass( value ) : this.removeClass( value ); - } - - if ( isFunction( value ) ) { - return this.each( function( i ) { - jQuery( this ).toggleClass( - value.call( this, i, getClass( this ), stateVal ), - stateVal - ); - } ); - } - - return this.each( function() { - var className, i, self, classNames; - - if ( isValidValue ) { - - // Toggle individual class names - i = 0; - self = jQuery( this ); - classNames = classesToArray( value ); - - while ( ( className = classNames[ i++ ] ) ) { - - // Check each className given, space separated list - if ( self.hasClass( className ) ) { - self.removeClass( className ); - } else { - self.addClass( className ); - } - } - - // Toggle whole class name - } else if ( value === undefined || type === "boolean" ) { - className = getClass( this ); - if ( className ) { - - // Store className if set - dataPriv.set( this, "__className__", className ); - } - - // If the element has a class name or if we're passed `false`, - // then remove the whole classname (if there was one, the above saved it). - // Otherwise bring back whatever was previously saved (if anything), - // falling back to the empty string if nothing was stored. - if ( this.setAttribute ) { - this.setAttribute( "class", - className || value === false ? - "" : - dataPriv.get( this, "__className__" ) || "" - ); - } - } - } ); - }, - - hasClass: function( selector ) { - var className, elem, - i = 0; - - className = " " + selector + " "; - while ( ( elem = this[ i++ ] ) ) { - if ( elem.nodeType === 1 && - ( " " + stripAndCollapse( getClass( elem ) ) + " " ).indexOf( className ) > -1 ) { - return true; - } - } - - return false; - } -} ); - - - - -var rreturn = /\r/g; - -jQuery.fn.extend( { - val: function( value ) { - var hooks, ret, valueIsFunction, - elem = this[ 0 ]; - - if ( !arguments.length ) { - if ( elem ) { - hooks = jQuery.valHooks[ elem.type ] || - jQuery.valHooks[ elem.nodeName.toLowerCase() ]; - - if ( hooks && - "get" in hooks && - ( ret = hooks.get( elem, "value" ) ) !== undefined - ) { - return ret; - } - - ret = elem.value; - - // Handle most common string cases - if ( typeof ret === "string" ) { - return ret.replace( rreturn, "" ); - } - - // Handle cases where value is null/undef or number - return ret == null ? "" : ret; - } - - return; - } - - valueIsFunction = isFunction( value ); - - return this.each( function( i ) { - var val; - - if ( this.nodeType !== 1 ) { - return; - } - - if ( valueIsFunction ) { - val = value.call( this, i, jQuery( this ).val() ); - } else { - val = value; - } - - // Treat null/undefined as ""; convert numbers to string - if ( val == null ) { - val = ""; - - } else if ( typeof val === "number" ) { - val += ""; - - } else if ( Array.isArray( val ) ) { - val = jQuery.map( val, function( value ) { - return value == null ? "" : value + ""; - } ); - } - - hooks = jQuery.valHooks[ this.type ] || jQuery.valHooks[ this.nodeName.toLowerCase() ]; - - // If set returns undefined, fall back to normal setting - if ( !hooks || !( "set" in hooks ) || hooks.set( this, val, "value" ) === undefined ) { - this.value = val; - } - } ); - } -} ); - -jQuery.extend( { - valHooks: { - option: { - get: function( elem ) { - - var val = jQuery.find.attr( elem, "value" ); - return val != null ? - val : - - // Support: IE <=10 - 11 only - // option.text throws exceptions (#14686, #14858) - // Strip and collapse whitespace - // https://html.spec.whatwg.org/#strip-and-collapse-whitespace - stripAndCollapse( jQuery.text( elem ) ); - } - }, - select: { - get: function( elem ) { - var value, option, i, - options = elem.options, - index = elem.selectedIndex, - one = elem.type === "select-one", - values = one ? null : [], - max = one ? index + 1 : options.length; - - if ( index < 0 ) { - i = max; - - } else { - i = one ? index : 0; - } - - // Loop through all the selected options - for ( ; i < max; i++ ) { - option = options[ i ]; - - // Support: IE <=9 only - // IE8-9 doesn't update selected after form reset (#2551) - if ( ( option.selected || i === index ) && - - // Don't return options that are disabled or in a disabled optgroup - !option.disabled && - ( !option.parentNode.disabled || - !nodeName( option.parentNode, "optgroup" ) ) ) { - - // Get the specific value for the option - value = jQuery( option ).val(); - - // We don't need an array for one selects - if ( one ) { - return value; - } - - // Multi-Selects return an array - values.push( value ); - } - } - - return values; - }, - - set: function( elem, value ) { - var optionSet, option, - options = elem.options, - values = jQuery.makeArray( value ), - i = options.length; - - while ( i-- ) { - option = options[ i ]; - - /* eslint-disable no-cond-assign */ - - if ( option.selected = - jQuery.inArray( jQuery.valHooks.option.get( option ), values ) > -1 - ) { - optionSet = true; - } - - /* eslint-enable no-cond-assign */ - } - - // Force browsers to behave consistently when non-matching value is set - if ( !optionSet ) { - elem.selectedIndex = -1; - } - return values; - } - } - } -} ); - -// Radios and checkboxes getter/setter -jQuery.each( [ "radio", "checkbox" ], function() { - jQuery.valHooks[ this ] = { - set: function( elem, value ) { - if ( Array.isArray( value ) ) { - return ( elem.checked = jQuery.inArray( jQuery( elem ).val(), value ) > -1 ); - } - } - }; - if ( !support.checkOn ) { - jQuery.valHooks[ this ].get = function( elem ) { - return elem.getAttribute( "value" ) === null ? "on" : elem.value; - }; - } -} ); - - - - -// Return jQuery for attributes-only inclusion - - -support.focusin = "onfocusin" in window; - - -var rfocusMorph = /^(?:focusinfocus|focusoutblur)$/, - stopPropagationCallback = function( e ) { - e.stopPropagation(); - }; - -jQuery.extend( jQuery.event, { - - trigger: function( event, data, elem, onlyHandlers ) { - - var i, cur, tmp, bubbleType, ontype, handle, special, lastElement, - eventPath = [ elem || document ], - type = hasOwn.call( event, "type" ) ? event.type : event, - namespaces = hasOwn.call( event, "namespace" ) ? event.namespace.split( "." ) : []; - - cur = lastElement = tmp = elem = elem || document; - - // Don't do events on text and comment nodes - if ( elem.nodeType === 3 || elem.nodeType === 8 ) { - return; - } - - // focus/blur morphs to focusin/out; ensure we're not firing them right now - if ( rfocusMorph.test( type + jQuery.event.triggered ) ) { - return; - } - - if ( type.indexOf( "." ) > -1 ) { - - // Namespaced trigger; create a regexp to match event type in handle() - namespaces = type.split( "." ); - type = namespaces.shift(); - namespaces.sort(); - } - ontype = type.indexOf( ":" ) < 0 && "on" + type; - - // Caller can pass in a jQuery.Event object, Object, or just an event type string - event = event[ jQuery.expando ] ? - event : - new jQuery.Event( type, typeof event === "object" && event ); - - // Trigger bitmask: & 1 for native handlers; & 2 for jQuery (always true) - event.isTrigger = onlyHandlers ? 2 : 3; - event.namespace = namespaces.join( "." ); - event.rnamespace = event.namespace ? - new RegExp( "(^|\\.)" + namespaces.join( "\\.(?:.*\\.|)" ) + "(\\.|$)" ) : - null; - - // Clean up the event in case it is being reused - event.result = undefined; - if ( !event.target ) { - event.target = elem; - } - - // Clone any incoming data and prepend the event, creating the handler arg list - data = data == null ? - [ event ] : - jQuery.makeArray( data, [ event ] ); - - // Allow special events to draw outside the lines - special = jQuery.event.special[ type ] || {}; - if ( !onlyHandlers && special.trigger && special.trigger.apply( elem, data ) === false ) { - return; - } - - // Determine event propagation path in advance, per W3C events spec (#9951) - // Bubble up to document, then to window; watch for a global ownerDocument var (#9724) - if ( !onlyHandlers && !special.noBubble && !isWindow( elem ) ) { - - bubbleType = special.delegateType || type; - if ( !rfocusMorph.test( bubbleType + type ) ) { - cur = cur.parentNode; - } - for ( ; cur; cur = cur.parentNode ) { - eventPath.push( cur ); - tmp = cur; - } - - // Only add window if we got to document (e.g., not plain obj or detached DOM) - if ( tmp === ( elem.ownerDocument || document ) ) { - eventPath.push( tmp.defaultView || tmp.parentWindow || window ); - } - } - - // Fire handlers on the event path - i = 0; - while ( ( cur = eventPath[ i++ ] ) && !event.isPropagationStopped() ) { - lastElement = cur; - event.type = i > 1 ? - bubbleType : - special.bindType || type; - - // jQuery handler - handle = ( dataPriv.get( cur, "events" ) || Object.create( null ) )[ event.type ] && - dataPriv.get( cur, "handle" ); - if ( handle ) { - handle.apply( cur, data ); - } - - // Native handler - handle = ontype && cur[ ontype ]; - if ( handle && handle.apply && acceptData( cur ) ) { - event.result = handle.apply( cur, data ); - if ( event.result === false ) { - event.preventDefault(); - } - } - } - event.type = type; - - // If nobody prevented the default action, do it now - if ( !onlyHandlers && !event.isDefaultPrevented() ) { - - if ( ( !special._default || - special._default.apply( eventPath.pop(), data ) === false ) && - acceptData( elem ) ) { - - // Call a native DOM method on the target with the same name as the event. - // Don't do default actions on window, that's where global variables be (#6170) - if ( ontype && isFunction( elem[ type ] ) && !isWindow( elem ) ) { - - // Don't re-trigger an onFOO event when we call its FOO() method - tmp = elem[ ontype ]; - - if ( tmp ) { - elem[ ontype ] = null; - } - - // Prevent re-triggering of the same event, since we already bubbled it above - jQuery.event.triggered = type; - - if ( event.isPropagationStopped() ) { - lastElement.addEventListener( type, stopPropagationCallback ); - } - - elem[ type ](); - - if ( event.isPropagationStopped() ) { - lastElement.removeEventListener( type, stopPropagationCallback ); - } - - jQuery.event.triggered = undefined; - - if ( tmp ) { - elem[ ontype ] = tmp; - } - } - } - } - - return event.result; - }, - - // Piggyback on a donor event to simulate a different one - // Used only for `focus(in | out)` events - simulate: function( type, elem, event ) { - var e = jQuery.extend( - new jQuery.Event(), - event, - { - type: type, - isSimulated: true - } - ); - - jQuery.event.trigger( e, null, elem ); - } - -} ); - -jQuery.fn.extend( { - - trigger: function( type, data ) { - return this.each( function() { - jQuery.event.trigger( type, data, this ); - } ); - }, - triggerHandler: function( type, data ) { - var elem = this[ 0 ]; - if ( elem ) { - return jQuery.event.trigger( type, data, elem, true ); - } - } -} ); - - -// Support: Firefox <=44 -// Firefox doesn't have focus(in | out) events -// Related ticket - https://bugzilla.mozilla.org/show_bug.cgi?id=687787 -// -// Support: Chrome <=48 - 49, Safari <=9.0 - 9.1 -// focus(in | out) events fire after focus & blur events, -// which is spec violation - http://www.w3.org/TR/DOM-Level-3-Events/#events-focusevent-event-order -// Related ticket - https://bugs.chromium.org/p/chromium/issues/detail?id=449857 -if ( !support.focusin ) { - jQuery.each( { focus: "focusin", blur: "focusout" }, function( orig, fix ) { - - // Attach a single capturing handler on the document while someone wants focusin/focusout - var handler = function( event ) { - jQuery.event.simulate( fix, event.target, jQuery.event.fix( event ) ); - }; - - jQuery.event.special[ fix ] = { - setup: function() { - - // Handle: regular nodes (via `this.ownerDocument`), window - // (via `this.document`) & document (via `this`). - var doc = this.ownerDocument || this.document || this, - attaches = dataPriv.access( doc, fix ); - - if ( !attaches ) { - doc.addEventListener( orig, handler, true ); - } - dataPriv.access( doc, fix, ( attaches || 0 ) + 1 ); - }, - teardown: function() { - var doc = this.ownerDocument || this.document || this, - attaches = dataPriv.access( doc, fix ) - 1; - - if ( !attaches ) { - doc.removeEventListener( orig, handler, true ); - dataPriv.remove( doc, fix ); - - } else { - dataPriv.access( doc, fix, attaches ); - } - } - }; - } ); -} -var location = window.location; - -var nonce = { guid: Date.now() }; - -var rquery = ( /\?/ ); - - - -// Cross-browser xml parsing -jQuery.parseXML = function( data ) { - var xml, parserErrorElem; - if ( !data || typeof data !== "string" ) { - return null; - } - - // Support: IE 9 - 11 only - // IE throws on parseFromString with invalid input. - try { - xml = ( new window.DOMParser() ).parseFromString( data, "text/xml" ); - } catch ( e ) {} - - parserErrorElem = xml && xml.getElementsByTagName( "parsererror" )[ 0 ]; - if ( !xml || parserErrorElem ) { - jQuery.error( "Invalid XML: " + ( - parserErrorElem ? - jQuery.map( parserErrorElem.childNodes, function( el ) { - return el.textContent; - } ).join( "\n" ) : - data - ) ); - } - return xml; -}; - - -var - rbracket = /\[\]$/, - rCRLF = /\r?\n/g, - rsubmitterTypes = /^(?:submit|button|image|reset|file)$/i, - rsubmittable = /^(?:input|select|textarea|keygen)/i; - -function buildParams( prefix, obj, traditional, add ) { - var name; - - if ( Array.isArray( obj ) ) { - - // Serialize array item. - jQuery.each( obj, function( i, v ) { - if ( traditional || rbracket.test( prefix ) ) { - - // Treat each array item as a scalar. - add( prefix, v ); - - } else { - - // Item is non-scalar (array or object), encode its numeric index. - buildParams( - prefix + "[" + ( typeof v === "object" && v != null ? i : "" ) + "]", - v, - traditional, - add - ); - } - } ); - - } else if ( !traditional && toType( obj ) === "object" ) { - - // Serialize object item. - for ( name in obj ) { - buildParams( prefix + "[" + name + "]", obj[ name ], traditional, add ); - } - - } else { - - // Serialize scalar item. - add( prefix, obj ); - } -} - -// Serialize an array of form elements or a set of -// key/values into a query string -jQuery.param = function( a, traditional ) { - var prefix, - s = [], - add = function( key, valueOrFunction ) { - - // If value is a function, invoke it and use its return value - var value = isFunction( valueOrFunction ) ? - valueOrFunction() : - valueOrFunction; - - s[ s.length ] = encodeURIComponent( key ) + "=" + - encodeURIComponent( value == null ? "" : value ); - }; - - if ( a == null ) { - return ""; - } - - // If an array was passed in, assume that it is an array of form elements. - if ( Array.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) { - - // Serialize the form elements - jQuery.each( a, function() { - add( this.name, this.value ); - } ); - - } else { - - // If traditional, encode the "old" way (the way 1.3.2 or older - // did it), otherwise encode params recursively. - for ( prefix in a ) { - buildParams( prefix, a[ prefix ], traditional, add ); - } - } - - // Return the resulting serialization - return s.join( "&" ); -}; - -jQuery.fn.extend( { - serialize: function() { - return jQuery.param( this.serializeArray() ); - }, - serializeArray: function() { - return this.map( function() { - - // Can add propHook for "elements" to filter or add form elements - var elements = jQuery.prop( this, "elements" ); - return elements ? jQuery.makeArray( elements ) : this; - } ).filter( function() { - var type = this.type; - - // Use .is( ":disabled" ) so that fieldset[disabled] works - return this.name && !jQuery( this ).is( ":disabled" ) && - rsubmittable.test( this.nodeName ) && !rsubmitterTypes.test( type ) && - ( this.checked || !rcheckableType.test( type ) ); - } ).map( function( _i, elem ) { - var val = jQuery( this ).val(); - - if ( val == null ) { - return null; - } - - if ( Array.isArray( val ) ) { - return jQuery.map( val, function( val ) { - return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; - } ); - } - - return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; - } ).get(); - } -} ); - - -var - r20 = /%20/g, - rhash = /#.*$/, - rantiCache = /([?&])_=[^&]*/, - rheaders = /^(.*?):[ \t]*([^\r\n]*)$/mg, - - // #7653, #8125, #8152: local protocol detection - rlocalProtocol = /^(?:about|app|app-storage|.+-extension|file|res|widget):$/, - rnoContent = /^(?:GET|HEAD)$/, - rprotocol = /^\/\//, - - /* Prefilters - * 1) They are useful to introduce custom dataTypes (see ajax/jsonp.js for an example) - * 2) These are called: - * - BEFORE asking for a transport - * - AFTER param serialization (s.data is a string if s.processData is true) - * 3) key is the dataType - * 4) the catchall symbol "*" can be used - * 5) execution will start with transport dataType and THEN continue down to "*" if needed - */ - prefilters = {}, - - /* Transports bindings - * 1) key is the dataType - * 2) the catchall symbol "*" can be used - * 3) selection will start with transport dataType and THEN go to "*" if needed - */ - transports = {}, - - // Avoid comment-prolog char sequence (#10098); must appease lint and evade compression - allTypes = "*/".concat( "*" ), - - // Anchor tag for parsing the document origin - originAnchor = document.createElement( "a" ); - -originAnchor.href = location.href; - -// Base "constructor" for jQuery.ajaxPrefilter and jQuery.ajaxTransport -function addToPrefiltersOrTransports( structure ) { - - // dataTypeExpression is optional and defaults to "*" - return function( dataTypeExpression, func ) { - - if ( typeof dataTypeExpression !== "string" ) { - func = dataTypeExpression; - dataTypeExpression = "*"; - } - - var dataType, - i = 0, - dataTypes = dataTypeExpression.toLowerCase().match( rnothtmlwhite ) || []; - - if ( isFunction( func ) ) { - - // For each dataType in the dataTypeExpression - while ( ( dataType = dataTypes[ i++ ] ) ) { - - // Prepend if requested - if ( dataType[ 0 ] === "+" ) { - dataType = dataType.slice( 1 ) || "*"; - ( structure[ dataType ] = structure[ dataType ] || [] ).unshift( func ); - - // Otherwise append - } else { - ( structure[ dataType ] = structure[ dataType ] || [] ).push( func ); - } - } - } - }; -} - -// Base inspection function for prefilters and transports -function inspectPrefiltersOrTransports( structure, options, originalOptions, jqXHR ) { - - var inspected = {}, - seekingTransport = ( structure === transports ); - - function inspect( dataType ) { - var selected; - inspected[ dataType ] = true; - jQuery.each( structure[ dataType ] || [], function( _, prefilterOrFactory ) { - var dataTypeOrTransport = prefilterOrFactory( options, originalOptions, jqXHR ); - if ( typeof dataTypeOrTransport === "string" && - !seekingTransport && !inspected[ dataTypeOrTransport ] ) { - - options.dataTypes.unshift( dataTypeOrTransport ); - inspect( dataTypeOrTransport ); - return false; - } else if ( seekingTransport ) { - return !( selected = dataTypeOrTransport ); - } - } ); - return selected; - } - - return inspect( options.dataTypes[ 0 ] ) || !inspected[ "*" ] && inspect( "*" ); -} - -// A special extend for ajax options -// that takes "flat" options (not to be deep extended) -// Fixes #9887 -function ajaxExtend( target, src ) { - var key, deep, - flatOptions = jQuery.ajaxSettings.flatOptions || {}; - - for ( key in src ) { - if ( src[ key ] !== undefined ) { - ( flatOptions[ key ] ? target : ( deep || ( deep = {} ) ) )[ key ] = src[ key ]; - } - } - if ( deep ) { - jQuery.extend( true, target, deep ); - } - - return target; -} - -/* Handles responses to an ajax request: - * - finds the right dataType (mediates between content-type and expected dataType) - * - returns the corresponding response - */ -function ajaxHandleResponses( s, jqXHR, responses ) { - - var ct, type, finalDataType, firstDataType, - contents = s.contents, - dataTypes = s.dataTypes; - - // Remove auto dataType and get content-type in the process - while ( dataTypes[ 0 ] === "*" ) { - dataTypes.shift(); - if ( ct === undefined ) { - ct = s.mimeType || jqXHR.getResponseHeader( "Content-Type" ); - } - } - - // Check if we're dealing with a known content-type - if ( ct ) { - for ( type in contents ) { - if ( contents[ type ] && contents[ type ].test( ct ) ) { - dataTypes.unshift( type ); - break; - } - } - } - - // Check to see if we have a response for the expected dataType - if ( dataTypes[ 0 ] in responses ) { - finalDataType = dataTypes[ 0 ]; - } else { - - // Try convertible dataTypes - for ( type in responses ) { - if ( !dataTypes[ 0 ] || s.converters[ type + " " + dataTypes[ 0 ] ] ) { - finalDataType = type; - break; - } - if ( !firstDataType ) { - firstDataType = type; - } - } - - // Or just use first one - finalDataType = finalDataType || firstDataType; - } - - // If we found a dataType - // We add the dataType to the list if needed - // and return the corresponding response - if ( finalDataType ) { - if ( finalDataType !== dataTypes[ 0 ] ) { - dataTypes.unshift( finalDataType ); - } - return responses[ finalDataType ]; - } -} - -/* Chain conversions given the request and the original response - * Also sets the responseXXX fields on the jqXHR instance - */ -function ajaxConvert( s, response, jqXHR, isSuccess ) { - var conv2, current, conv, tmp, prev, - converters = {}, - - // Work with a copy of dataTypes in case we need to modify it for conversion - dataTypes = s.dataTypes.slice(); - - // Create converters map with lowercased keys - if ( dataTypes[ 1 ] ) { - for ( conv in s.converters ) { - converters[ conv.toLowerCase() ] = s.converters[ conv ]; - } - } - - current = dataTypes.shift(); - - // Convert to each sequential dataType - while ( current ) { - - if ( s.responseFields[ current ] ) { - jqXHR[ s.responseFields[ current ] ] = response; - } - - // Apply the dataFilter if provided - if ( !prev && isSuccess && s.dataFilter ) { - response = s.dataFilter( response, s.dataType ); - } - - prev = current; - current = dataTypes.shift(); - - if ( current ) { - - // There's only work to do if current dataType is non-auto - if ( current === "*" ) { - - current = prev; - - // Convert response if prev dataType is non-auto and differs from current - } else if ( prev !== "*" && prev !== current ) { - - // Seek a direct converter - conv = converters[ prev + " " + current ] || converters[ "* " + current ]; - - // If none found, seek a pair - if ( !conv ) { - for ( conv2 in converters ) { - - // If conv2 outputs current - tmp = conv2.split( " " ); - if ( tmp[ 1 ] === current ) { - - // If prev can be converted to accepted input - conv = converters[ prev + " " + tmp[ 0 ] ] || - converters[ "* " + tmp[ 0 ] ]; - if ( conv ) { - - // Condense equivalence converters - if ( conv === true ) { - conv = converters[ conv2 ]; - - // Otherwise, insert the intermediate dataType - } else if ( converters[ conv2 ] !== true ) { - current = tmp[ 0 ]; - dataTypes.unshift( tmp[ 1 ] ); - } - break; - } - } - } - } - - // Apply converter (if not an equivalence) - if ( conv !== true ) { - - // Unless errors are allowed to bubble, catch and return them - if ( conv && s.throws ) { - response = conv( response ); - } else { - try { - response = conv( response ); - } catch ( e ) { - return { - state: "parsererror", - error: conv ? e : "No conversion from " + prev + " to " + current - }; - } - } - } - } - } - } - - return { state: "success", data: response }; -} - -jQuery.extend( { - - // Counter for holding the number of active queries - active: 0, - - // Last-Modified header cache for next request - lastModified: {}, - etag: {}, - - ajaxSettings: { - url: location.href, - type: "GET", - isLocal: rlocalProtocol.test( location.protocol ), - global: true, - processData: true, - async: true, - contentType: "application/x-www-form-urlencoded; charset=UTF-8", - - /* - timeout: 0, - data: null, - dataType: null, - username: null, - password: null, - cache: null, - throws: false, - traditional: false, - headers: {}, - */ - - accepts: { - "*": allTypes, - text: "text/plain", - html: "text/html", - xml: "application/xml, text/xml", - json: "application/json, text/javascript" - }, - - contents: { - xml: /\bxml\b/, - html: /\bhtml/, - json: /\bjson\b/ - }, - - responseFields: { - xml: "responseXML", - text: "responseText", - json: "responseJSON" - }, - - // Data converters - // Keys separate source (or catchall "*") and destination types with a single space - converters: { - - // Convert anything to text - "* text": String, - - // Text to html (true = no transformation) - "text html": true, - - // Evaluate text as a json expression - "text json": JSON.parse, - - // Parse text as xml - "text xml": jQuery.parseXML - }, - - // For options that shouldn't be deep extended: - // you can add your own custom options here if - // and when you create one that shouldn't be - // deep extended (see ajaxExtend) - flatOptions: { - url: true, - context: true - } - }, - - // Creates a full fledged settings object into target - // with both ajaxSettings and settings fields. - // If target is omitted, writes into ajaxSettings. - ajaxSetup: function( target, settings ) { - return settings ? - - // Building a settings object - ajaxExtend( ajaxExtend( target, jQuery.ajaxSettings ), settings ) : - - // Extending ajaxSettings - ajaxExtend( jQuery.ajaxSettings, target ); - }, - - ajaxPrefilter: addToPrefiltersOrTransports( prefilters ), - ajaxTransport: addToPrefiltersOrTransports( transports ), - - // Main method - ajax: function( url, options ) { - - // If url is an object, simulate pre-1.5 signature - if ( typeof url === "object" ) { - options = url; - url = undefined; - } - - // Force options to be an object - options = options || {}; - - var transport, - - // URL without anti-cache param - cacheURL, - - // Response headers - responseHeadersString, - responseHeaders, - - // timeout handle - timeoutTimer, - - // Url cleanup var - urlAnchor, - - // Request state (becomes false upon send and true upon completion) - completed, - - // To know if global events are to be dispatched - fireGlobals, - - // Loop variable - i, - - // uncached part of the url - uncached, - - // Create the final options object - s = jQuery.ajaxSetup( {}, options ), - - // Callbacks context - callbackContext = s.context || s, - - // Context for global events is callbackContext if it is a DOM node or jQuery collection - globalEventContext = s.context && - ( callbackContext.nodeType || callbackContext.jquery ) ? - jQuery( callbackContext ) : - jQuery.event, - - // Deferreds - deferred = jQuery.Deferred(), - completeDeferred = jQuery.Callbacks( "once memory" ), - - // Status-dependent callbacks - statusCode = s.statusCode || {}, - - // Headers (they are sent all at once) - requestHeaders = {}, - requestHeadersNames = {}, - - // Default abort message - strAbort = "canceled", - - // Fake xhr - jqXHR = { - readyState: 0, - - // Builds headers hashtable if needed - getResponseHeader: function( key ) { - var match; - if ( completed ) { - if ( !responseHeaders ) { - responseHeaders = {}; - while ( ( match = rheaders.exec( responseHeadersString ) ) ) { - responseHeaders[ match[ 1 ].toLowerCase() + " " ] = - ( responseHeaders[ match[ 1 ].toLowerCase() + " " ] || [] ) - .concat( match[ 2 ] ); - } - } - match = responseHeaders[ key.toLowerCase() + " " ]; - } - return match == null ? null : match.join( ", " ); - }, - - // Raw string - getAllResponseHeaders: function() { - return completed ? responseHeadersString : null; - }, - - // Caches the header - setRequestHeader: function( name, value ) { - if ( completed == null ) { - name = requestHeadersNames[ name.toLowerCase() ] = - requestHeadersNames[ name.toLowerCase() ] || name; - requestHeaders[ name ] = value; - } - return this; - }, - - // Overrides response content-type header - overrideMimeType: function( type ) { - if ( completed == null ) { - s.mimeType = type; - } - return this; - }, - - // Status-dependent callbacks - statusCode: function( map ) { - var code; - if ( map ) { - if ( completed ) { - - // Execute the appropriate callbacks - jqXHR.always( map[ jqXHR.status ] ); - } else { - - // Lazy-add the new callbacks in a way that preserves old ones - for ( code in map ) { - statusCode[ code ] = [ statusCode[ code ], map[ code ] ]; - } - } - } - return this; - }, - - // Cancel the request - abort: function( statusText ) { - var finalText = statusText || strAbort; - if ( transport ) { - transport.abort( finalText ); - } - done( 0, finalText ); - return this; - } - }; - - // Attach deferreds - deferred.promise( jqXHR ); - - // Add protocol if not provided (prefilters might expect it) - // Handle falsy url in the settings object (#10093: consistency with old signature) - // We also use the url parameter if available - s.url = ( ( url || s.url || location.href ) + "" ) - .replace( rprotocol, location.protocol + "//" ); - - // Alias method option to type as per ticket #12004 - s.type = options.method || options.type || s.method || s.type; - - // Extract dataTypes list - s.dataTypes = ( s.dataType || "*" ).toLowerCase().match( rnothtmlwhite ) || [ "" ]; - - // A cross-domain request is in order when the origin doesn't match the current origin. - if ( s.crossDomain == null ) { - urlAnchor = document.createElement( "a" ); - - // Support: IE <=8 - 11, Edge 12 - 15 - // IE throws exception on accessing the href property if url is malformed, - // e.g. http://example.com:80x/ - try { - urlAnchor.href = s.url; - - // Support: IE <=8 - 11 only - // Anchor's host property isn't correctly set when s.url is relative - urlAnchor.href = urlAnchor.href; - s.crossDomain = originAnchor.protocol + "//" + originAnchor.host !== - urlAnchor.protocol + "//" + urlAnchor.host; - } catch ( e ) { - - // If there is an error parsing the URL, assume it is crossDomain, - // it can be rejected by the transport if it is invalid - s.crossDomain = true; - } - } - - // Convert data if not already a string - if ( s.data && s.processData && typeof s.data !== "string" ) { - s.data = jQuery.param( s.data, s.traditional ); - } - - // Apply prefilters - inspectPrefiltersOrTransports( prefilters, s, options, jqXHR ); - - // If request was aborted inside a prefilter, stop there - if ( completed ) { - return jqXHR; - } - - // We can fire global events as of now if asked to - // Don't fire events if jQuery.event is undefined in an AMD-usage scenario (#15118) - fireGlobals = jQuery.event && s.global; - - // Watch for a new set of requests - if ( fireGlobals && jQuery.active++ === 0 ) { - jQuery.event.trigger( "ajaxStart" ); - } - - // Uppercase the type - s.type = s.type.toUpperCase(); - - // Determine if request has content - s.hasContent = !rnoContent.test( s.type ); - - // Save the URL in case we're toying with the If-Modified-Since - // and/or If-None-Match header later on - // Remove hash to simplify url manipulation - cacheURL = s.url.replace( rhash, "" ); - - // More options handling for requests with no content - if ( !s.hasContent ) { - - // Remember the hash so we can put it back - uncached = s.url.slice( cacheURL.length ); - - // If data is available and should be processed, append data to url - if ( s.data && ( s.processData || typeof s.data === "string" ) ) { - cacheURL += ( rquery.test( cacheURL ) ? "&" : "?" ) + s.data; - - // #9682: remove data so that it's not used in an eventual retry - delete s.data; - } - - // Add or update anti-cache param if needed - if ( s.cache === false ) { - cacheURL = cacheURL.replace( rantiCache, "$1" ); - uncached = ( rquery.test( cacheURL ) ? "&" : "?" ) + "_=" + ( nonce.guid++ ) + - uncached; - } - - // Put hash and anti-cache on the URL that will be requested (gh-1732) - s.url = cacheURL + uncached; - - // Change '%20' to '+' if this is encoded form body content (gh-2658) - } else if ( s.data && s.processData && - ( s.contentType || "" ).indexOf( "application/x-www-form-urlencoded" ) === 0 ) { - s.data = s.data.replace( r20, "+" ); - } - - // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. - if ( s.ifModified ) { - if ( jQuery.lastModified[ cacheURL ] ) { - jqXHR.setRequestHeader( "If-Modified-Since", jQuery.lastModified[ cacheURL ] ); - } - if ( jQuery.etag[ cacheURL ] ) { - jqXHR.setRequestHeader( "If-None-Match", jQuery.etag[ cacheURL ] ); - } - } - - // Set the correct header, if data is being sent - if ( s.data && s.hasContent && s.contentType !== false || options.contentType ) { - jqXHR.setRequestHeader( "Content-Type", s.contentType ); - } - - // Set the Accepts header for the server, depending on the dataType - jqXHR.setRequestHeader( - "Accept", - s.dataTypes[ 0 ] && s.accepts[ s.dataTypes[ 0 ] ] ? - s.accepts[ s.dataTypes[ 0 ] ] + - ( s.dataTypes[ 0 ] !== "*" ? ", " + allTypes + "; q=0.01" : "" ) : - s.accepts[ "*" ] - ); - - // Check for headers option - for ( i in s.headers ) { - jqXHR.setRequestHeader( i, s.headers[ i ] ); - } - - // Allow custom headers/mimetypes and early abort - if ( s.beforeSend && - ( s.beforeSend.call( callbackContext, jqXHR, s ) === false || completed ) ) { - - // Abort if not done already and return - return jqXHR.abort(); - } - - // Aborting is no longer a cancellation - strAbort = "abort"; - - // Install callbacks on deferreds - completeDeferred.add( s.complete ); - jqXHR.done( s.success ); - jqXHR.fail( s.error ); - - // Get transport - transport = inspectPrefiltersOrTransports( transports, s, options, jqXHR ); - - // If no transport, we auto-abort - if ( !transport ) { - done( -1, "No Transport" ); - } else { - jqXHR.readyState = 1; - - // Send global event - if ( fireGlobals ) { - globalEventContext.trigger( "ajaxSend", [ jqXHR, s ] ); - } - - // If request was aborted inside ajaxSend, stop there - if ( completed ) { - return jqXHR; - } - - // Timeout - if ( s.async && s.timeout > 0 ) { - timeoutTimer = window.setTimeout( function() { - jqXHR.abort( "timeout" ); - }, s.timeout ); - } - - try { - completed = false; - transport.send( requestHeaders, done ); - } catch ( e ) { - - // Rethrow post-completion exceptions - if ( completed ) { - throw e; - } - - // Propagate others as results - done( -1, e ); - } - } - - // Callback for when everything is done - function done( status, nativeStatusText, responses, headers ) { - var isSuccess, success, error, response, modified, - statusText = nativeStatusText; - - // Ignore repeat invocations - if ( completed ) { - return; - } - - completed = true; - - // Clear timeout if it exists - if ( timeoutTimer ) { - window.clearTimeout( timeoutTimer ); - } - - // Dereference transport for early garbage collection - // (no matter how long the jqXHR object will be used) - transport = undefined; - - // Cache response headers - responseHeadersString = headers || ""; - - // Set readyState - jqXHR.readyState = status > 0 ? 4 : 0; - - // Determine if successful - isSuccess = status >= 200 && status < 300 || status === 304; - - // Get response data - if ( responses ) { - response = ajaxHandleResponses( s, jqXHR, responses ); - } - - // Use a noop converter for missing script but not if jsonp - if ( !isSuccess && - jQuery.inArray( "script", s.dataTypes ) > -1 && - jQuery.inArray( "json", s.dataTypes ) < 0 ) { - s.converters[ "text script" ] = function() {}; - } - - // Convert no matter what (that way responseXXX fields are always set) - response = ajaxConvert( s, response, jqXHR, isSuccess ); - - // If successful, handle type chaining - if ( isSuccess ) { - - // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. - if ( s.ifModified ) { - modified = jqXHR.getResponseHeader( "Last-Modified" ); - if ( modified ) { - jQuery.lastModified[ cacheURL ] = modified; - } - modified = jqXHR.getResponseHeader( "etag" ); - if ( modified ) { - jQuery.etag[ cacheURL ] = modified; - } - } - - // if no content - if ( status === 204 || s.type === "HEAD" ) { - statusText = "nocontent"; - - // if not modified - } else if ( status === 304 ) { - statusText = "notmodified"; - - // If we have data, let's convert it - } else { - statusText = response.state; - success = response.data; - error = response.error; - isSuccess = !error; - } - } else { - - // Extract error from statusText and normalize for non-aborts - error = statusText; - if ( status || !statusText ) { - statusText = "error"; - if ( status < 0 ) { - status = 0; - } - } - } - - // Set data for the fake xhr object - jqXHR.status = status; - jqXHR.statusText = ( nativeStatusText || statusText ) + ""; - - // Success/Error - if ( isSuccess ) { - deferred.resolveWith( callbackContext, [ success, statusText, jqXHR ] ); - } else { - deferred.rejectWith( callbackContext, [ jqXHR, statusText, error ] ); - } - - // Status-dependent callbacks - jqXHR.statusCode( statusCode ); - statusCode = undefined; - - if ( fireGlobals ) { - globalEventContext.trigger( isSuccess ? "ajaxSuccess" : "ajaxError", - [ jqXHR, s, isSuccess ? success : error ] ); - } - - // Complete - completeDeferred.fireWith( callbackContext, [ jqXHR, statusText ] ); - - if ( fireGlobals ) { - globalEventContext.trigger( "ajaxComplete", [ jqXHR, s ] ); - - // Handle the global AJAX counter - if ( !( --jQuery.active ) ) { - jQuery.event.trigger( "ajaxStop" ); - } - } - } - - return jqXHR; - }, - - getJSON: function( url, data, callback ) { - return jQuery.get( url, data, callback, "json" ); - }, - - getScript: function( url, callback ) { - return jQuery.get( url, undefined, callback, "script" ); - } -} ); - -jQuery.each( [ "get", "post" ], function( _i, method ) { - jQuery[ method ] = function( url, data, callback, type ) { - - // Shift arguments if data argument was omitted - if ( isFunction( data ) ) { - type = type || callback; - callback = data; - data = undefined; - } - - // The url can be an options object (which then must have .url) - return jQuery.ajax( jQuery.extend( { - url: url, - type: method, - dataType: type, - data: data, - success: callback - }, jQuery.isPlainObject( url ) && url ) ); - }; -} ); - -jQuery.ajaxPrefilter( function( s ) { - var i; - for ( i in s.headers ) { - if ( i.toLowerCase() === "content-type" ) { - s.contentType = s.headers[ i ] || ""; - } - } -} ); - - -jQuery._evalUrl = function( url, options, doc ) { - return jQuery.ajax( { - url: url, - - // Make this explicit, since user can override this through ajaxSetup (#11264) - type: "GET", - dataType: "script", - cache: true, - async: false, - global: false, - - // Only evaluate the response if it is successful (gh-4126) - // dataFilter is not invoked for failure responses, so using it instead - // of the default converter is kludgy but it works. - converters: { - "text script": function() {} - }, - dataFilter: function( response ) { - jQuery.globalEval( response, options, doc ); - } - } ); -}; - - -jQuery.fn.extend( { - wrapAll: function( html ) { - var wrap; - - if ( this[ 0 ] ) { - if ( isFunction( html ) ) { - html = html.call( this[ 0 ] ); - } - - // The elements to wrap the target around - wrap = jQuery( html, this[ 0 ].ownerDocument ).eq( 0 ).clone( true ); - - if ( this[ 0 ].parentNode ) { - wrap.insertBefore( this[ 0 ] ); - } - - wrap.map( function() { - var elem = this; - - while ( elem.firstElementChild ) { - elem = elem.firstElementChild; - } - - return elem; - } ).append( this ); - } - - return this; - }, - - wrapInner: function( html ) { - if ( isFunction( html ) ) { - return this.each( function( i ) { - jQuery( this ).wrapInner( html.call( this, i ) ); - } ); - } - - return this.each( function() { - var self = jQuery( this ), - contents = self.contents(); - - if ( contents.length ) { - contents.wrapAll( html ); - - } else { - self.append( html ); - } - } ); - }, - - wrap: function( html ) { - var htmlIsFunction = isFunction( html ); - - return this.each( function( i ) { - jQuery( this ).wrapAll( htmlIsFunction ? html.call( this, i ) : html ); - } ); - }, - - unwrap: function( selector ) { - this.parent( selector ).not( "body" ).each( function() { - jQuery( this ).replaceWith( this.childNodes ); - } ); - return this; - } -} ); - - -jQuery.expr.pseudos.hidden = function( elem ) { - return !jQuery.expr.pseudos.visible( elem ); -}; -jQuery.expr.pseudos.visible = function( elem ) { - return !!( elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length ); -}; - - - - -jQuery.ajaxSettings.xhr = function() { - try { - return new window.XMLHttpRequest(); - } catch ( e ) {} -}; - -var xhrSuccessStatus = { - - // File protocol always yields status code 0, assume 200 - 0: 200, - - // Support: IE <=9 only - // #1450: sometimes IE returns 1223 when it should be 204 - 1223: 204 - }, - xhrSupported = jQuery.ajaxSettings.xhr(); - -support.cors = !!xhrSupported && ( "withCredentials" in xhrSupported ); -support.ajax = xhrSupported = !!xhrSupported; - -jQuery.ajaxTransport( function( options ) { - var callback, errorCallback; - - // Cross domain only allowed if supported through XMLHttpRequest - if ( support.cors || xhrSupported && !options.crossDomain ) { - return { - send: function( headers, complete ) { - var i, - xhr = options.xhr(); - - xhr.open( - options.type, - options.url, - options.async, - options.username, - options.password - ); - - // Apply custom fields if provided - if ( options.xhrFields ) { - for ( i in options.xhrFields ) { - xhr[ i ] = options.xhrFields[ i ]; - } - } - - // Override mime type if needed - if ( options.mimeType && xhr.overrideMimeType ) { - xhr.overrideMimeType( options.mimeType ); - } - - // X-Requested-With header - // For cross-domain requests, seeing as conditions for a preflight are - // akin to a jigsaw puzzle, we simply never set it to be sure. - // (it can always be set on a per-request basis or even using ajaxSetup) - // For same-domain requests, won't change header if already provided. - if ( !options.crossDomain && !headers[ "X-Requested-With" ] ) { - headers[ "X-Requested-With" ] = "XMLHttpRequest"; - } - - // Set headers - for ( i in headers ) { - xhr.setRequestHeader( i, headers[ i ] ); - } - - // Callback - callback = function( type ) { - return function() { - if ( callback ) { - callback = errorCallback = xhr.onload = - xhr.onerror = xhr.onabort = xhr.ontimeout = - xhr.onreadystatechange = null; - - if ( type === "abort" ) { - xhr.abort(); - } else if ( type === "error" ) { - - // Support: IE <=9 only - // On a manual native abort, IE9 throws - // errors on any property access that is not readyState - if ( typeof xhr.status !== "number" ) { - complete( 0, "error" ); - } else { - complete( - - // File: protocol always yields status 0; see #8605, #14207 - xhr.status, - xhr.statusText - ); - } - } else { - complete( - xhrSuccessStatus[ xhr.status ] || xhr.status, - xhr.statusText, - - // Support: IE <=9 only - // IE9 has no XHR2 but throws on binary (trac-11426) - // For XHR2 non-text, let the caller handle it (gh-2498) - ( xhr.responseType || "text" ) !== "text" || - typeof xhr.responseText !== "string" ? - { binary: xhr.response } : - { text: xhr.responseText }, - xhr.getAllResponseHeaders() - ); - } - } - }; - }; - - // Listen to events - xhr.onload = callback(); - errorCallback = xhr.onerror = xhr.ontimeout = callback( "error" ); - - // Support: IE 9 only - // Use onreadystatechange to replace onabort - // to handle uncaught aborts - if ( xhr.onabort !== undefined ) { - xhr.onabort = errorCallback; - } else { - xhr.onreadystatechange = function() { - - // Check readyState before timeout as it changes - if ( xhr.readyState === 4 ) { - - // Allow onerror to be called first, - // but that will not handle a native abort - // Also, save errorCallback to a variable - // as xhr.onerror cannot be accessed - window.setTimeout( function() { - if ( callback ) { - errorCallback(); - } - } ); - } - }; - } - - // Create the abort callback - callback = callback( "abort" ); - - try { - - // Do send the request (this may raise an exception) - xhr.send( options.hasContent && options.data || null ); - } catch ( e ) { - - // #14683: Only rethrow if this hasn't been notified as an error yet - if ( callback ) { - throw e; - } - } - }, - - abort: function() { - if ( callback ) { - callback(); - } - } - }; - } -} ); - - - - -// Prevent auto-execution of scripts when no explicit dataType was provided (See gh-2432) -jQuery.ajaxPrefilter( function( s ) { - if ( s.crossDomain ) { - s.contents.script = false; - } -} ); - -// Install script dataType -jQuery.ajaxSetup( { - accepts: { - script: "text/javascript, application/javascript, " + - "application/ecmascript, application/x-ecmascript" - }, - contents: { - script: /\b(?:java|ecma)script\b/ - }, - converters: { - "text script": function( text ) { - jQuery.globalEval( text ); - return text; - } - } -} ); - -// Handle cache's special case and crossDomain -jQuery.ajaxPrefilter( "script", function( s ) { - if ( s.cache === undefined ) { - s.cache = false; - } - if ( s.crossDomain ) { - s.type = "GET"; - } -} ); - -// Bind script tag hack transport -jQuery.ajaxTransport( "script", function( s ) { - - // This transport only deals with cross domain or forced-by-attrs requests - if ( s.crossDomain || s.scriptAttrs ) { - var script, callback; - return { - send: function( _, complete ) { - script = jQuery( " - - - - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: generics, tidymodels, tidyverse, and usethis. - -The broom package provides tools to summarize key information about models in tidy `tibble()`s. The package provides three verbs, or "tidiers," to help make model objects easier to work with: - -* `tidy()` summarizes information about model components -* `glance()` reports information about the entire model -* `augment()` adds information about observations to a dataset - -Each of the three verbs above are _generic_, in that they do not define a procedure to tidy a given model object, but instead redirect to the relevant _method_ implemented to tidy a specific type of model object. The broom package provides methods for model objects from over 100 modeling packages along with nearly all of the model objects in the stats package that comes with base R. However, for maintainability purposes, the broom package authors now ask that requests for new methods be first directed to the parent package (i.e. the package that supplies the model object) rather than to broom. New methods will generally only be integrated into broom in the case that the requester has already asked the maintainers of the model-owning package to implement tidier methods in the parent package. - -We'd like to make implementing external tidier methods as painless as possible. The general process for doing so is: - -* re-export the tidier generics -* implement tidying methods -* document the new methods - -In this article, we'll walk through each of the above steps in detail, giving examples and pointing out helpful functions when possible. - -## Re-export the tidier generics - -The first step is to re-export the generic functions for `tidy()`, `glance()`, and/or `augment()`. You could do so from `broom` itself, but we've provided an alternative, much lighter dependency called `generics`. - -First you'll need to add the [generics](https://github.com/r-lib/generics) package to `Imports`. We recommend using the [usethis](https://github.com/r-lib/usethis) package for this: - - -```r -usethis::use_package("generics", "Imports") -``` - -Next, you'll need to re-export the appropriate tidying methods. If you plan to implement a `glance()` method, for example, you can re-export the `glance()` generic by adding the following somewhere inside the `/R` folder of your package: - - -```r -#' @importFrom generics glance -#' @export -generics::glance -``` - -Oftentimes it doesn't make sense to define one or more of these methods for a particular model. In this case, only implement the methods that do make sense. - -{{% warning %}} Please do not define `tidy()`, `glance()`, or `augment()` generics in your package. This will result in namespace conflicts whenever your package is used along other packages that also export tidying methods. {{%/ warning %}} - -## Implement tidying methods - -You'll now need to implement specific tidying methods for each of the generics you've re-exported in the above step. For each of `tidy()`, `glance()`, and `augment()`, we'll walk through the big picture, an example, and helpful resources. - -In this article, we'll use the base R dataset `trees`, giving the tree girth (in inches), height (in feet), and volume (in cubic feet), to fit an example linear model using the base R `lm()` function. - - -```r -# load in the trees dataset -data(trees) - -# take a look! -str(trees) -#> 'data.frame': 31 obs. of 3 variables: -#> $ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ... -#> $ Height: num 70 65 63 72 81 83 66 75 80 75 ... -#> $ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ... - -# fit the timber volume as a function of girth and height -trees_model <- lm(Volume ~ Girth + Height, data = trees) -``` - -Let's take a look at the `summary()` of our `trees_model` fit. - - -```r -summary(trees_model) -#> -#> Call: -#> lm(formula = Volume ~ Girth + Height, data = trees) -#> -#> Residuals: -#> Min 1Q Median 3Q Max -#> -6.406 -2.649 -0.288 2.200 8.485 -#> -#> Coefficients: -#> Estimate Std. Error t value Pr(>|t|) -#> (Intercept) -57.988 8.638 -6.71 2.7e-07 *** -#> Girth 4.708 0.264 17.82 < 2e-16 *** -#> Height 0.339 0.130 2.61 0.014 * -#> --- -#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -#> -#> Residual standard error: 3.88 on 28 degrees of freedom -#> Multiple R-squared: 0.948, Adjusted R-squared: 0.944 -#> F-statistic: 255 on 2 and 28 DF, p-value: <2e-16 -``` - -This output gives some summary statistics on the residuals (which would be described more fully in an `augment()` output), model coefficients (which, in this case, make up the `tidy()` output), and some model-level summarizations such as RSE, `\(R^2\)`, etc. (which make up the `glance()` output.) - -### Implementing the `tidy()` method - -The `tidy(x, ...)` method will return a tibble where each row contains information about a component of the model. The `x` input is a model object, and the dots (`...`) are an optional argument to supply additional information to any calls inside your method. New `tidy()` methods can take additional arguments, but _must_ include the `x` and `...` arguments to be compatible with the generic function. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).) Examples of model components include regression coefficients (for regression models), clusters (for classification/clustering models), etc. These `tidy()` methods are useful for inspecting model details and creating custom model visualizations. - -Returning to the example of our linear model on timber volume, we'd like to extract information on the model components. In this example, the components are the regression coefficients. After taking a look at the model object and its `summary()`, you might notice that you can extract the regression coefficients as follows: - - -```r -summary(trees_model)$coefficients -#> Estimate Std. Error t value Pr(>|t|) -#> (Intercept) -57.988 8.638 -6.71 2.75e-07 -#> Girth 4.708 0.264 17.82 8.22e-17 -#> Height 0.339 0.130 2.61 1.45e-02 -``` - -This object contains the model coefficients as a table, where the information giving which coefficient is being described in each row is given in the row names. Converting to a tibble where the row names are contained in a column, you might write: - - -```r -trees_model_tidy <- summary(trees_model)$coefficients %>% - as_tibble(rownames = "term") - -trees_model_tidy -#> # A tibble: 3 × 5 -#> term Estimate `Std. Error` `t value` `Pr(>|t|)` -#> -#> 1 (Intercept) -58.0 8.64 -6.71 2.75e- 7 -#> 2 Girth 4.71 0.264 17.8 8.22e-17 -#> 3 Height 0.339 0.130 2.61 1.45e- 2 -``` - -The broom package standardizes common column names used to describe coefficients. In this case, the column names are: - - -```r -colnames(trees_model_tidy) <- c("term", "estimate", "std.error", "statistic", "p.value") -``` - -A glossary giving the currently acceptable column names outputted by `tidy()` methods can be found [at the end of this article](#glossary). As a rule of thumb, column names resulting from `tidy()` methods should be all lowercase and contain only alphanumerics or periods (though there are plenty of exceptions). - -Finally, it is common for `tidy()` methods to include an option to calculate confidence/credible intervals for each component based on the model, when possible. In this example, the `confint()` function can be used to calculate confidence intervals from a model object resulting from `lm()`: - - -```r -confint(trees_model) -#> 2.5 % 97.5 % -#> (Intercept) -75.6823 -40.293 -#> Girth 4.1668 5.249 -#> Height 0.0726 0.606 -``` - -With these considerations in mind, a reasonable `tidy()` method for `lm()` might look something like: - - -```r -tidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) { - - result <- summary(x)$coefficients %>% - tibble::as_tibble(rownames = "term") %>% - dplyr::rename(estimate = Estimate, - std.error = `Std. Error`, - statistic = `t value`, - p.value = `Pr(>|t|)`) - - if (conf.int) { - ci <- confint(x, level = conf.level) - result <- dplyr::left_join(result, ci, by = "term") - } - - result -} -``` - -{{% note %}} If you're interested, the actual `tidy.lm()` source can be found [here](https://github.com/tidymodels/broom/blob/master/R/stats-lm-tidiers.R)! It's not too different from the version above except for some argument checking and additional columns. {{%/ note %}} - -With this method exported, then, if a user calls `tidy(fit)`, where `fit` is an output from `lm()`, the `tidy()` generic would "redirect" the call to the `tidy.lm()` function above. - -Some things to keep in mind while writing your `tidy()` method: - -* Sometimes a model will have several different types of components. For example, in mixed models, there is different information associated with fixed effects and random effects. Since this information doesn't have the same interpretation, it doesn't make sense to summarize the fixed and random effects in the same table. In cases like this you should add an argument that allows the user to specify which type of information they want. For example, you might implement an interface along the lines of: - - -```r -model <- mixed_model(...) -tidy(model, effects = "fixed") -tidy(model, effects = "random") -``` - -* How are missing values encoded in the model object and its `summary()`? Ensure that rows are included even when the associated model component is missing or rank deficient. -* Are there other measures specific to each component that could reasonably be expected to be included in their summarizations? Some common arguments to `tidy()` methods include: - - `conf.int`: A logical indicating whether or not to calculate confidence/credible intervals. This should default to `FALSE`. - - `conf.level`: The confidence level to use for the interval when `conf.int = TRUE`. Typically defaults to `.95`. - - `exponentiate`: A logical indicating whether or not model terms should be presented on an exponential scale (typical for logistic regression). - -### Implementing the `glance()` method - -`glance()` returns a one-row tibble providing model-level summarizations (e.g. goodness of fit measures and related statistics). This is useful to check for model misspecification and to compare many models. Again, the `x` input is a model object, and the `...` is an optional argument to supply additional information to any calls inside your method. New `glance()` methods can also take additional arguments and _must_ include the `x` and `...` arguments. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).) - -Returning to the `trees_model` example, we could pull out the `\(R^2\)` value with the following code: - - -```r -summary(trees_model)$r.squared -#> [1] 0.948 -``` - -Similarly, for the adjusted `\(R^2\)`: - - -```r -summary(trees_model)$adj.r.squared -#> [1] 0.944 -``` - -Unfortunately, for many model objects, the extraction of model-level information is largely a manual process. You will likely need to build a `tibble()` element-by-element by subsetting the `summary()` object repeatedly. The `with()` function, however, can help make this process a bit less tedious by evaluating expressions inside of the `summary(trees_model)` environment. To grab those those same two model elements from above using `with()`: - - -```r -with(summary(trees_model), - tibble::tibble(r.squared = r.squared, - adj.r.squared = adj.r.squared)) -#> # A tibble: 1 × 2 -#> r.squared adj.r.squared -#> -#> 1 0.948 0.944 -``` - -A reasonable `glance()` method for `lm()`, then, might look something like: - - -```r -glance.lm <- function(x, ...) { - with( - summary(x), - tibble::tibble( - r.squared = r.squared, - adj.r.squared = adj.r.squared, - sigma = sigma, - statistic = fstatistic["value"], - p.value = pf( - fstatistic["value"], - fstatistic["numdf"], - fstatistic["dendf"], - lower.tail = FALSE - ), - df = fstatistic["numdf"], - logLik = as.numeric(stats::logLik(x)), - AIC = stats::AIC(x), - BIC = stats::BIC(x), - deviance = stats::deviance(x), - df.residual = df.residual(x), - nobs = stats::nobs(x) - ) - ) -} -``` - -{{% note %}} This is the actual definition of `glance.lm()` provided by broom! {{%/ note %}} - -Some things to keep in mind while writing `glance()` methods: -* Output should not include the name of the modeling function or any arguments given to the modeling function. -* In some cases, you may wish to provide model-level diagnostics not returned by the original object. For example, the above `glance.lm()` calculates `AIC` and `BIC` from the model fit. If these are easy to compute, feel free to add them. However, tidier methods are generally not an appropriate place to implement complex or time consuming calculations. -* The `glance` method should always return the same columns in the same order when given an object of a given model class. If a summary metric (such as `AIC`) is not defined in certain circumstances, use `NA`. - -### Implementing the `augment()` method - -`augment()` methods add columns to a dataset containing information such as fitted values, residuals or cluster assignments. All columns added to a dataset have a `.` prefix to prevent existing columns from being overwritten. (Currently acceptable column names are given in [the glossary](#glossary).) The `x` and `...` arguments share their meaning with the two functions described above. `augment` methods also optionally accept a `data` argument that is a `data.frame` (or `tibble`) to add observation-level information to, returning a `tibble` object with the same number of rows as `data`. Many `augment()` methods also accept a `newdata` argument, following the same conventions as the `data` argument, except with the underlying assumption that the model has not "seen" the data yet. As a result, `newdata` arguments need not contain the response columns in `data`. Only one of `data` or `newdata` should be supplied. A full glossary of acceptable arguments to `augment()` methods can be found at [the end of this article](#glossary). - -If a `data` argument is not specified, `augment()` should try to reconstruct the original data as much as possible from the model object. This may not always be possible, and often it will not be possible to recover columns not used by the model. - -With this is mind, we can look back to our `trees_model` example. For one, the `model` element inside of the `trees_model` object will allow us to recover the original data: - - -```r -trees_model$model -#> Volume Girth Height -#> 1 10.3 8.3 70 -#> 2 10.3 8.6 65 -#> 3 10.2 8.8 63 -#> 4 16.4 10.5 72 -#> 5 18.8 10.7 81 -#> 6 19.7 10.8 83 -#> 7 15.6 11.0 66 -#> 8 18.2 11.0 75 -#> 9 22.6 11.1 80 -#> 10 19.9 11.2 75 -#> 11 24.2 11.3 79 -#> 12 21.0 11.4 76 -#> 13 21.4 11.4 76 -#> 14 21.3 11.7 69 -#> 15 19.1 12.0 75 -#> 16 22.2 12.9 74 -#> 17 33.8 12.9 85 -#> 18 27.4 13.3 86 -#> 19 25.7 13.7 71 -#> 20 24.9 13.8 64 -#> 21 34.5 14.0 78 -#> 22 31.7 14.2 80 -#> 23 36.3 14.5 74 -#> 24 38.3 16.0 72 -#> 25 42.6 16.3 77 -#> 26 55.4 17.3 81 -#> 27 55.7 17.5 82 -#> 28 58.3 17.9 80 -#> 29 51.5 18.0 80 -#> 30 51.0 18.0 80 -#> 31 77.0 20.6 87 -``` - -Similarly, the fitted values and residuals can be accessed with the following code: - - -```r -head(trees_model$fitted.values) -#> 1 2 3 4 5 6 -#> 4.84 4.55 4.82 15.87 19.87 21.02 -head(trees_model$residuals) -#> 1 2 3 4 5 6 -#> 5.462 5.746 5.383 0.526 -1.069 -1.318 -``` - -As with `glance()` methods, it's fine (and encouraged!) to include common metrics associated with observations if they are not computationally intensive to compute. A common metric associated with linear models, for example, is the standard error of fitted values: - - -```r -se.fit <- predict(trees_model, newdata = trees, se.fit = TRUE)$se.fit %>% - unname() - -head(se.fit) -#> [1] 1.321 1.489 1.633 0.944 1.348 1.532 -``` - -Thus, a reasonable `augment()` method for `lm` might look something like this: - - -```r -augment.lm <- function(x, data = x$model, newdata = NULL, ...) { - if (is.null(newdata)) { - dplyr::bind_cols(tibble::as_tibble(data), - tibble::tibble(.fitted = x$fitted.values, - .se.fit = predict(x, - newdata = data, - se.fit = TRUE)$se.fit, - .resid = x$residuals)) - } else { - predictions <- predict(x, newdata = newdata, se.fit = TRUE) - dplyr::bind_cols(tibble::as_tibble(newdata), - tibble::tibble(.fitted = predictions$fit, - .se.fit = predictions$se.fit)) - } -} -``` - -Some other things to keep in mind while writing `augment()` methods: -* The `newdata` argument should default to `NULL`. Users should only ever specify one of `data` or `newdata`. Providing both `data` and `newdata` should result in an error. The `newdata` argument should accept both `data.frame`s and `tibble`s. -* Data given to the `data` argument must have both the original predictors and the original response. Data given to the `newdata` argument only needs to have the original predictors. This is important because there may be important information associated with training data that is not associated with test data. This means that the `original_data` object in `augment(model, data = original_data)` should provide `.fitted` and `.resid` columns (in most cases), whereas `test_data` in `augment(model, data = test_data)` only needs a `.fitted` column, even if the response is present in `test_data`. -* If the `data` or `newdata` is specified as a `data.frame` with rownames, `augment` should return them in a column called `.rownames`. -* For observations where no fitted values or summaries are available (where there's missing data, for example), return `NA`. -* *The `augment()` method should always return as many rows as were in `data` or `newdata`*, depending on which is supplied - -{{% note %}} The recommended interface and functionality for `augment()` methods may change soon. {{%/ note %}} - -## Document the new methods - -The only remaining step is to integrate the new methods into the parent package! To do so, just drop the methods into a `.R` file inside of the `/R` folder and document them using roxygen2. If you're unfamiliar with the process of documenting objects, you can read more about it [here](http://r-pkgs.had.co.nz/man.html). Here's an example of how our `tidy.lm()` method might be documented: - - -```r -#' Tidy a(n) lm object -#' -#' @param x A `lm` object. -#' @param conf.int Logical indicating whether or not to include -#' a confidence interval in the tidied output. Defaults to FALSE. -#' @param conf.level The confidence level to use for the confidence -#' interval if conf.int = TRUE. Must be strictly greater than 0 -#' and less than 1. Defaults to 0.95, which corresponds to a -#' 95 percent confidence interval. -#' @param ... Unused, included for generic consistency only. -#' @return A tidy [tibble::tibble()] summarizing component-level -#' information about the model -#' -#' @examples -#' # load the trees dataset -#' data(trees) -#' -#' # fit a linear model on timber volume -#' trees_model <- lm(Volume ~ Girth + Height, data = trees) -#' -#' # summarize model coefficients in a tidy tibble! -#' tidy(trees_model) -#' -#' @export -tidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) { - - # ... the rest of the function definition goes here! -``` - -Once you've documented each of your new methods and executed `devtools::document()`, you're done! Congrats on implementing your own broom tidier methods for a new model object! - -## Glossaries: argument and column names {#glossary} - - - -Tidier methods have a standardized set of acceptable argument and output column names. The currently acceptable argument names by tidier method are: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Method Argument
    tidy alpha
    tidy boot_se
    tidy by_class
    tidy col.names
    tidy component
    tidy conf.int
    tidy conf.level
    tidy conf.method
    tidy conf.type
    tidy diagonal
    tidy droppars
    tidy effects
    tidy ess
    tidy estimate.method
    tidy exponentiate
    tidy fe
    tidy include_studies
    tidy instruments
    tidy intervals
    tidy matrix
    tidy measure
    tidy na.rm
    tidy object
    tidy p.values
    tidy par_type
    tidy parameters
    tidy parametric
    tidy pars
    tidy prob
    tidy region
    tidy return_zeros
    tidy rhat
    tidy robust
    tidy scales
    tidy se.type
    tidy strata
    tidy test
    tidy trim
    tidy upper
    glance deviance
    glance diagnostics
    glance looic
    glance mcmc
    glance test
    glance x
    augment data
    augment interval
    augment newdata
    augment se_fit
    augment type.predict
    augment type.residuals
    augment weights
    - -The currently acceptable column names by tidier method are: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Method Column
    tidy acf
    tidy adj.p.value
    tidy alternative
    tidy at.value
    tidy at.variable
    tidy atmean
    tidy autocorrelation
    tidy bias
    tidy ci.width
    tidy class
    tidy cluster
    tidy coef.type
    tidy column1
    tidy column2
    tidy comp
    tidy comparison
    tidy component
    tidy conf.high
    tidy conf.low
    tidy contrast
    tidy cumulative
    tidy cutoff
    tidy delta
    tidy den.df
    tidy denominator
    tidy dev.ratio
    tidy df
    tidy distance
    tidy estimate
    tidy estimate1
    tidy estimate2
    tidy event
    tidy exp
    tidy expected
    tidy fpr
    tidy freq
    tidy GCV
    tidy group
    tidy group1
    tidy group2
    tidy index
    tidy item1
    tidy item2
    tidy kendall_score
    tidy lag
    tidy lambda
    tidy letters
    tidy lhs
    tidy logLik
    tidy mcmc.error
    tidy mean
    tidy meansq
    tidy method
    tidy n
    tidy N
    tidy n.censor
    tidy n.event
    tidy n.risk
    tidy null.value
    tidy num.df
    tidy nzero
    tidy obs
    tidy op
    tidy outcome
    tidy p
    tidy p.value
    tidy p.value.Sargan
    tidy p.value.weakinst
    tidy p.value.Wu.Hausman
    tidy parameter
    tidy PC
    tidy percent
    tidy power
    tidy proportion
    tidy pyears
    tidy response
    tidy rhs
    tidy robust.se
    tidy row
    tidy scale
    tidy sd
    tidy series
    tidy sig.level
    tidy size
    tidy spec
    tidy state
    tidy statistic
    tidy statistic.Sargan
    tidy statistic.weakinst
    tidy statistic.Wu.Hausman
    tidy std_estimate
    tidy std.all
    tidy std.dev
    tidy std.error
    tidy std.lv
    tidy std.nox
    tidy step
    tidy strata
    tidy stratum
    tidy study
    tidy sumsq
    tidy tau
    tidy term
    tidy time
    tidy tpr
    tidy type
    tidy uniqueness
    tidy value
    tidy var_kendall_score
    tidy variable
    tidy variance
    tidy withinss
    tidy y.level
    tidy y.value
    tidy z
    glance adj.r.squared
    glance agfi
    glance AIC
    glance AICc
    glance alpha
    glance alternative
    glance autocorrelation
    glance avg.silhouette.width
    glance betweenss
    glance BIC
    glance cfi
    glance chi.squared
    glance chisq
    glance cochran.qe
    glance cochran.qm
    glance conf.high
    glance conf.low
    glance converged
    glance convergence
    glance crit
    glance cv.crit
    glance den.df
    glance deviance
    glance df
    glance df.null
    glance df.residual
    glance dw.original
    glance dw.transformed
    glance edf
    glance estimator
    glance events
    glance finTol
    glance function.count
    glance G
    glance g.squared
    glance gamma
    glance gradient.count
    glance H
    glance h.squared
    glance hypvol
    glance i.squared
    glance independence
    glance isConv
    glance iter
    glance iterations
    glance kHKB
    glance kLW
    glance lag.order
    glance lambda
    glance lambda.1se
    glance lambda.min
    glance lambdaGCV
    glance logLik
    glance max.cluster.size
    glance max.hazard
    glance max.time
    glance maxit
    glance MCMC.burnin
    glance MCMC.interval
    glance MCMC.samplesize
    glance measure
    glance median
    glance method
    glance min.hazard
    glance min.time
    glance missing_method
    glance model
    glance n
    glance n.clusters
    glance n.factors
    glance n.max
    glance n.start
    glance nevent
    glance nexcluded
    glance ngroups
    glance nobs
    glance norig
    glance npar
    glance npasses
    glance null.deviance
    glance nulldev
    glance num.df
    glance number.interaction
    glance offtable
    glance p.value
    glance p.value.cochran.qe
    glance p.value.cochran.qm
    glance p.value.original
    glance p.value.Sargan
    glance p.value.transformed
    glance p.value.weak.instr
    glance p.value.Wu.Hausman
    glance parameter
    glance pen.crit
    glance power
    glance power.reached
    glance pseudo.r.squared
    glance r.squared
    glance records
    glance residual.deviance
    glance rho
    glance rho2
    glance rho20
    glance rmean
    glance rmean.std.error
    glance rmsea
    glance rmsea.conf.high
    glance rscore
    glance score
    glance sigma
    glance sigma2_j
    glance spar
    glance srmr
    glance statistic
    glance statistic.Sargan
    glance statistic.weak.instr
    glance statistic.Wu.Hausman
    glance tau
    glance tau.squared
    glance tau.squared.se
    glance theta
    glance timepoints
    glance tli
    glance tot.withinss
    glance total
    glance total.variance
    glance totss
    glance value
    glance within.r.squared
    augment .class
    augment .cluster
    augment .cochran.qe.loo
    augment .col.prop
    augment .conf.high
    augment .conf.low
    augment .cooksd
    augment .cov.ratio
    augment .cred.high
    augment .cred.low
    augment .dffits
    augment .expected
    augment .fitted
    augment .fitted_j_0
    augment .fitted_j_1
    augment .hat
    augment .lower
    augment .moderator
    augment .moderator.level
    augment .observed
    augment .probability
    augment .prop
    augment .remainder
    augment .resid
    augment .resid_j_0
    augment .resid_j_1
    augment .row.prop
    augment .rownames
    augment .se.fit
    augment .seasadj
    augment .seasonal
    augment .sigma
    augment .std.resid
    augment .tau
    augment .tau.squared.loo
    augment .trend
    augment .uncertainty
    augment .upper
    augment .weight
    - -The [alexpghayes/modeltests](https://github.com/alexpghayes/modeltests) package provides unit testing infrastructure to check your new tidier methods. Please file an issue there to request new arguments/columns to be added to the glossaries! - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> generics * 0.1.3 2022-07-05 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/learn/develop/metrics/index.Rmarkdown b/content/learn/develop/metrics/index.Rmarkdown deleted file mode 100644 index d9cf01fd..00000000 --- a/content/learn/develop/metrics/index.Rmarkdown +++ /dev/null @@ -1,483 +0,0 @@ ---- -title: "Custom performance metrics" -tags: [yardstick] -categories: [] -type: learn-subsection -weight: 3 -description: | - Create a new performance metric and integrate it with yardstick functions. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(tidymodels) -library(rlang) - -pkgs <- c("tidymodels", "rlang") -``` - - -## Introduction - -`r req_pkgs(pkgs)` - -The [yardstick](https://yardstick.tidymodels.org/) package already includes a large number of metrics, but there's obviously a chance that you might have a custom metric that hasn't been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics. - -Why create custom metrics? With the infrastructure yardstick provides, you get: - -- Standardization between your metric and other preexisting metrics -- Automatic error handling for types and lengths -- Automatic selection of binary / multiclass metric implementations -- Support for `NA` handling -- Support for grouped data frames -- Support for use alongside other metrics in `metric_set()` - -The implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of `roc_auc()` after reading this vignette if you want to work on a class probability metric. - -## Numeric example: MSE - -Mean squared error (sometimes MSE or from here on, `mse()`) is a numeric metric that measures the average of the squared errors. Numeric metrics are generally the simplest to create with yardstick, as they do not have multiclass implementations. The formula for `mse()` is: - -$$ MSE = \frac{1}{N} \sum_{i=1}^{N} (truth_i - estimate_i) ^ 2 = mean( (truth - estimate) ^ 2) $$ - -All metrics should have a data frame version, and a vector version. The data frame version here will be named `mse()`, and the vector version will be `mse_vec()`. - -### Vector implementation - -To start, create the vector version. Generally, all metrics have the same arguments unless the metric requires an extra parameter (such as `beta` in `f_meas()`). To create the vector function, you need to do two things: - -1) Use `check_numeric_metric()` to validate the input types. -2) Use `yardstick_remove_missing()` and `yardstick_any_missing()` to setup handling for missing values. -3) Create an internal implementation function, `mse_impl()`, and use. - -Below, `mse_impl()` contains the actual implementation of the metric, and takes `truth` and `estimate` as arguments along with any metric specific arguments. Optionally `case_weights` if the calculations supports it. - -The yardstick function `check_numeric_metric()` takes `truth`, `estimate` and `case_weights`, and validates that they are the right type, and are the same length. - -The `yardstick_remove_missing()` and `yardstick_any_missing()` yardstick functions are used to handle missing values in a consistent way, similarly to how the other metrics handle them. The code below is typically copy pasted from function to function, but certain types of metrics might want to deviate from this pattern. - -You are required to supply a `case_weights` argument to `mse_vec()` for the functions to work with yardstick. If your metric in question doesn't support case weights, you can error if they are passed, or simply ignore it. - -```{r} -library(tidymodels) - -mse_impl <- function(truth, estimate, case_weights = NULL) { - mean((truth - estimate) ^ 2) -} - -mse_vec <- function(truth, estimate, na_rm = TRUE, case_weights = NULL, ...) { - check_numeric_metric(truth, estimate, case_weights) - - if (na_rm) { - result <- yardstick_remove_missing(truth, estimate, case_weights) - - truth <- result$truth - estimate <- result$estimate - case_weights <- result$case_weights - } else if (yardstick_any_missing(truth, estimate, case_weights)) { - return(NA_real_) - } - - mse_impl(truth, estimate, case_weights = case_weights) -} -``` - -At this point, you've created the vector version of the mean squared error metric. - -```{r} -data("solubility_test") - -mse_vec( - truth = solubility_test$solubility, - estimate = solubility_test$prediction -) -``` - -Intelligent error handling is immediately available. - -```{r, error = TRUE} -mse_vec(truth = "apple", estimate = 1) - -mse_vec(truth = 1, estimate = factor("xyz")) -``` - -`NA` values are removed if `na_rm = TRUE` (the default). If `na_rm = FALSE` and any `NA` values are detected, then the metric automatically returns `NA`. - -```{r} -# NA values removed -mse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5)) - -# NA returned -mse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5), na_rm = FALSE) -``` - -### Data frame implementation - -The data frame version of the metric should be fairly simple. It is a generic function with a `data.frame` method that calls the yardstick helper, `numeric_metric_summarizer()`, and passes along the `mse_vec()` function to it along with versions of `truth` and `estimate` that have been wrapped in `rlang::enquo()` and then unquoted with `!!` so that non-standard evaluation can be supported. - -```{r} -library(rlang) - -mse <- function(data, ...) { - UseMethod("mse") -} - -mse <- new_numeric_metric(mse, direction = "minimize") - -mse.data.frame <- function(data, truth, estimate, na_rm = TRUE, case_weights = NULL, ...) { - - numeric_metric_summarizer( - name = "mse", - fn = mse_vec, - data = data, - truth = !!enquo(truth), - estimate = !!enquo(estimate), - na_rm = na_rm, - case_weights = !!enquo(case_weights) - ) -} -``` - -And that's it. The yardstick package handles the rest. - -```{r, error = TRUE} -mse(solubility_test, truth = solubility, estimate = prediction) - -# Error handling -mse(solubility_test, truth = solubility, estimate = factor("xyz")) -``` - -Let's test it out on a grouped data frame. - -```{r, message=FALSE} -library(dplyr) - -set.seed(1234) -size <- 100 -times <- 10 - -# create 10 resamples -solubility_resampled <- bind_rows( - replicate( - n = times, - expr = sample_n(solubility_test, size, replace = TRUE), - simplify = FALSE - ), - .id = "resample" -) - -solubility_resampled %>% - group_by(resample) %>% - mse(solubility, prediction) -``` - -## Class example: miss rate - -Miss rate is another name for the false negative rate, and is a classification metric in the same family as `sens()` and `spec()`. It follows the formula: - -$$ miss\_rate = \frac{FN}{FN + TP} $$ - -This metric, like other classification metrics, is more easily computed when expressed as a confusion matrix. As you will see in the example, you can achieve this with a call to `base::table(estimate, truth)` which correctly puts the "correct" result in the columns of the confusion matrix. - -Classification metrics are more complicated than numeric ones because you have to think about extensions to the multiclass case. For now, let's start with the binary case. - -### Vector implementation - -The vector implementation for classification metrics initially has a very similar setup as the numeric metrics. It used `check_class_metric()` instead of `check_numeric_metric()`. It has an additional argument, `estimator` that determines the type of estimator to use (binary or some kind of multiclass implementation or averaging). This argument is auto-selected for the user, so default it to `NULL`. Additionally, pass it along to `check_class_metric()` so that it can check the provided `estimator` against the classes of `truth` and `estimate` to see if they are allowed. - -```{r} -# Logic for `event_level` -event_col <- function(xtab, event_level) { - if (identical(event_level, "first")) { - colnames(xtab)[[1]] - } else { - colnames(xtab)[[2]] - } -} - -miss_rate_impl <- function(truth, estimate, event_level) { - # Create - xtab <- table(estimate, truth) - col <- event_col(xtab, event_level) - col2 <- setdiff(colnames(xtab), col) - - tp <- xtab[col, col] - fn <- xtab[col2, col] - - fn / (fn + tp) -} - -miss_rate_vec <- function(truth, - estimate, - estimator = NULL, - na_rm = TRUE, - case_weights = NULL, - event_level = "first", - ...) { - estimator <- finalize_estimator(truth, estimator) - - check_class_metric(truth, estimate, case_weights, estimator) - - if (na_rm) { - result <- yardstick_remove_missing(truth, estimate, case_weights) - - truth <- result$truth - estimate <- result$estimate - case_weights <- result$case_weights - } else if (yardstick_any_missing(truth, estimate, case_weights)) { - return(NA_real_) - } - - miss_rate_impl(truth, estimate, event_level) -} -``` - -Another change from the numeric metric is that a call to `finalize_estimator()` is made. This is the infrastructure that auto-selects the type of estimator to use. - -```{r} -data("two_class_example") -miss_rate_vec(two_class_example$truth, two_class_example$predicted) -``` - -What happens if you try and pass in a multiclass result? - -```{r} -data("hpc_cv") -fold1 <- filter(hpc_cv, Resample == "Fold01") -miss_rate_vec(fold1$obs, fold1$pred) -``` - -This isn't great, as currently multiclass `miss_rate()` isn't supported and it would have been better to throw an error if the `estimator` was not `"binary"`. Currently, `finalize_estimator()` uses its default implementation which selected `"macro"` as the `estimator` since `truth` was a factor with more than 2 classes. When we implement multiclass averaging, this is what you want, but if your metric only works with a binary implementation (or has other specialized multiclass versions), you might want to guard against this. - -To fix this, a generic counterpart to `finalize_estimator()`, called `finalize_estimator_internal()`, exists that helps you restrict the input types. If you provide a method to `finalize_estimator_internal()` where the method name is the same as your metric name, and then set the `metric_class` argument in `finalize_estimator()` to be the same thing, you can control how the auto-selection of the `estimator` is handled. - -Don't worry about the `metric_dispatcher` argument. This is handled for you and just exists as a dummy argument to dispatch off of. - -It is also good practice to call `validate_estimator()` which handles the case where a user passed in the estimator themselves. This validates that the supplied `estimator` is one of the allowed types and error otherwise. - -```{r, error = TRUE} -finalize_estimator_internal.miss_rate <- function(metric_dispatcher, x, estimator, call) { - - validate_estimator(estimator, estimator_override = "binary") - if (!is.null(estimator)) { - return(estimator) - } - - lvls <- levels(x) - if (length(lvls) > 2) { - stop("A multiclass `truth` input was provided, but only `binary` is supported.") - } - "binary" -} - -miss_rate_vec <- function(truth, - estimate, - estimator = NULL, - na_rm = TRUE, - case_weights = NULL, - event_level = "first", - ...) { - # calls finalize_estimator_internal() internally - estimator <- finalize_estimator(truth, estimator, metric_class = "miss_rate") - - check_class_metric(truth, estimate, case_weights, estimator) - - if (na_rm) { - result <- yardstick_remove_missing(truth, estimate, case_weights) - - truth <- result$truth - estimate <- result$estimate - case_weights <- result$case_weights - } else if (yardstick_any_missing(truth, estimate, case_weights)) { - return(NA_real_) - } - - miss_rate_impl(truth, estimate, event_level) -} - -# Error thrown by our custom handler -miss_rate_vec(fold1$obs, fold1$pred) - -# Error thrown by validate_estimator() -miss_rate_vec(fold1$obs, fold1$pred, estimator = "macro") -``` - -### Supporting multiclass miss rate - -Like many other classification metrics such as `precision()` or `recall()`, miss rate does not have a natural multiclass extension, but one can be created using methods such as macro, weighted macro, and micro averaging. If you have not, I encourage you to read `vignette("multiclass", "yardstick")` for more information about how these methods work. - -Generally, they require more effort to get right than the binary case, especially if you want to have a performant version. Luckily, a somewhat standard template is used in yardstick and can be used here as well. - -Let's first remove the "binary" restriction we created earlier. - -```{r} -rm(finalize_estimator_internal.miss_rate) -``` - -The main changes below are: - -- The binary implementation is moved to `miss_rate_binary()`. - -- `miss_rate_estimator_impl()` is a helper function for switching between binary and multiclass implementations. It also applies the weighting required for multiclass estimators. It is called from `miss_rate_impl()` and also accepts the `estimator` argument using R's function scoping rules. - -- `miss_rate_multiclass()` provides the implementation for the multiclass case. It calculates the true positive and false negative values as vectors with one value per class. For the macro case, it returns a vector of miss rate calculations, and for micro, it first sums the individual pieces and returns a single miss rate calculation. In the macro case, the vector is then weighted appropriately in `miss_rate_estimator_impl()` depending on whether or not it was macro or weighted macro. - -```{r} -miss_rate_vec <- function(truth, - estimate, - estimator = NULL, - na_rm = TRUE, - case_weights = NULL, - event_level = "first", - ...) { - # calls finalize_estimator_internal() internally - estimator <- finalize_estimator(truth, estimator, metric_class = "miss_rate") - - check_class_metric(truth, estimate, case_weights, estimator) - - if (na_rm) { - result <- yardstick_remove_missing(truth, estimate, case_weights) - - truth <- result$truth - estimate <- result$estimate - case_weights <- result$case_weights - } else if (yardstick_any_missing(truth, estimate, case_weights)) { - return(NA_real_) - } - - miss_rate_impl(truth, estimate, estimator, event_level) -} - -miss_rate_impl <- function(truth, estimate, estimator, event_level) { - xtab <- table(estimate, truth) - # Rather than implement the actual method here, we rely on - # an *_estimator_impl() function that can handle binary - # and multiclass cases - miss_rate_estimator_impl(xtab, estimator, event_level) -} - -# This function switches between binary and multiclass implementations -miss_rate_estimator_impl <- function(data, estimator, event_level) { - if(estimator == "binary") { - miss_rate_binary(data, event_level) - } else { - # Encapsulates the macro, macro weighted, and micro cases - wt <- get_weights(data, estimator) - res <- miss_rate_multiclass(data, estimator) - weighted.mean(res, wt) - } -} - -miss_rate_binary <- function(data, event_level) { - col <- event_col(data, event_level) - col2 <- setdiff(colnames(data), col) - - tp <- data[col, col] - fn <- data[col2, col] - - fn / (fn + tp) -} - -miss_rate_multiclass <- function(data, estimator) { - - # We need tp and fn for all classes individually - # we can get this by taking advantage of the fact - # that tp + fn = colSums(data) - tp <- diag(data) - tpfn <- colSums(data) - fn <- tpfn - tp - - # If using a micro estimator, we sum the individual - # pieces before performing the miss rate calculation - if (estimator == "micro") { - tp <- sum(tp) - fn <- sum(fn) - } - - # return the vector - tp / (tp + fn) -} -``` - -For the macro case, this separation of weighting from the core implementation might seem strange, but there is good reason for it. Some metrics are combinations of other metrics, and it is nice to be able to reuse code when calculating more complex metrics. For example, `f_meas()` is a combination of `recall()` and `precision()`. When calculating a macro averaged `f_meas()`, the weighting must be applied 1 time, at the very end of the calculation. `recall_multiclass()` and `precision_multiclass()` are defined similarly to how `miss_rate_multiclass()` is defined and returns the unweighted vector of calculations. This means we can directly use this in `f_meas()`, and then weight everything once at the end of that calculation. - -Let's try it out now: - -```{r} -# two class -miss_rate_vec(two_class_example$truth, two_class_example$predicted) - -# multiclass -miss_rate_vec(fold1$obs, fold1$pred) -``` - -#### Data frame implementation - -Luckily, the data frame implementation is as simple as the numeric case, we just need to add an extra `estimator` argument and pass that through. - -```{r} -miss_rate <- function(data, ...) { - UseMethod("miss_rate") -} - -miss_rate <- new_class_metric(miss_rate, direction = "minimize") - -miss_rate.data.frame <- function(data, - truth, - estimate, - estimator = NULL, - na_rm = TRUE, - case_weights = NULL, - event_level = "first", - ...) { - class_metric_summarizer( - name = "miss_rate", - fn = miss_rate_vec, - data = data, - truth = !!enquo(truth), - estimate = !!enquo(estimate), - estimator = estimator, - na_rm = na_rm, - case_weights = !!enquo(case_weights), - event_level = event_level - ) -} -``` - -```{r, error = TRUE} -# Macro weighted automatically selected -fold1 %>% - miss_rate(obs, pred) - -# Switch to micro -fold1 %>% - miss_rate(obs, pred, estimator = "micro") - -# Macro weighted by resample -hpc_cv %>% - group_by(Resample) %>% - miss_rate(obs, pred, estimator = "macro_weighted") - -# Error handling -miss_rate(hpc_cv, obs, VF) -``` - -## Using custom metrics - -The `metric_set()` function validates that all metric functions are of the same metric type by checking the class of the function. If any metrics are not of the right class, `metric_set()` fails. By using `new_numeric_metric()` and `new_class_metric()` in the above custom metrics, they work out of the box without any additional adjustments. - -```{r} -numeric_mets <- metric_set(mse, rmse) - -numeric_mets(solubility_test, solubility, prediction) -``` - - -## Session information - -```{r si, echo = FALSE} -small_session("yardstick") -``` diff --git a/content/learn/develop/metrics/index.markdown b/content/learn/develop/metrics/index.markdown deleted file mode 100644 index 6efac9d6..00000000 --- a/content/learn/develop/metrics/index.markdown +++ /dev/null @@ -1,591 +0,0 @@ ---- -title: "Custom performance metrics" -tags: [yardstick] -categories: [] -type: learn-subsection -weight: 3 -description: | - Create a new performance metric and integrate it with yardstick functions. ---- - - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: rlang and tidymodels. - -The [yardstick](https://yardstick.tidymodels.org/) package already includes a large number of metrics, but there's obviously a chance that you might have a custom metric that hasn't been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics. - -Why create custom metrics? With the infrastructure yardstick provides, you get: - -- Standardization between your metric and other preexisting metrics -- Automatic error handling for types and lengths -- Automatic selection of binary / multiclass metric implementations -- Support for `NA` handling -- Support for grouped data frames -- Support for use alongside other metrics in `metric_set()` - -The implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of `roc_auc()` after reading this vignette if you want to work on a class probability metric. - -## Numeric example: MSE - -Mean squared error (sometimes MSE or from here on, `mse()`) is a numeric metric that measures the average of the squared errors. Numeric metrics are generally the simplest to create with yardstick, as they do not have multiclass implementations. The formula for `mse()` is: - -$$ MSE = \frac{1}{N} \sum_{i=1}^{N} (truth_i - estimate_i) ^ 2 = mean( (truth - estimate) ^ 2) $$ - -All metrics should have a data frame version, and a vector version. The data frame version here will be named `mse()`, and the vector version will be `mse_vec()`. - -### Vector implementation - -To start, create the vector version. Generally, all metrics have the same arguments unless the metric requires an extra parameter (such as `beta` in `f_meas()`). To create the vector function, you need to do two things: - -1) Use `check_numeric_metric()` to validate the input types. -2) Use `yardstick_remove_missing()` and `yardstick_any_missing()` to setup handling for missing values. -3) Create an internal implementation function, `mse_impl()`, and use. - -Below, `mse_impl()` contains the actual implementation of the metric, and takes `truth` and `estimate` as arguments along with any metric specific arguments. Optionally `case_weights` if the calculations supports it. - -The yardstick function `check_numeric_metric()` takes `truth`, `estimate` and `case_weights`, and validates that they are the right type, and are the same length. - -The `yardstick_remove_missing()` and `yardstick_any_missing()` yardstick functions are used to handle missing values in a consistent way, similarly to how the other metrics handle them. The code below is typically copy pasted from function to function, but certain types of metrics might want to deviate from this pattern. - -You are required to supply a `case_weights` argument to `mse_vec()` for the functions to work with yardstick. If your metric in question doesn't support case weights, you can error if they are passed, or simply ignore it. - - -```r -library(tidymodels) - -mse_impl <- function(truth, estimate, case_weights = NULL) { - mean((truth - estimate) ^ 2) -} - -mse_vec <- function(truth, estimate, na_rm = TRUE, case_weights = NULL, ...) { - check_numeric_metric(truth, estimate, case_weights) - - if (na_rm) { - result <- yardstick_remove_missing(truth, estimate, case_weights) - - truth <- result$truth - estimate <- result$estimate - case_weights <- result$case_weights - } else if (yardstick_any_missing(truth, estimate, case_weights)) { - return(NA_real_) - } - - mse_impl(truth, estimate, case_weights = case_weights) -} -``` - -At this point, you've created the vector version of the mean squared error metric. - - -```r -data("solubility_test") - -mse_vec( - truth = solubility_test$solubility, - estimate = solubility_test$prediction -) -#> [1] 0.521 -``` - -Intelligent error handling is immediately available. - - -```r -mse_vec(truth = "apple", estimate = 1) -#> Error in `mse_vec()`: -#> ! `truth` should be a numeric, not a `character`. - -mse_vec(truth = 1, estimate = factor("xyz")) -#> Error in `mse_vec()`: -#> ! `estimate` should be a numeric, not a `factor`. -``` - -`NA` values are removed if `na_rm = TRUE` (the default). If `na_rm = FALSE` and any `NA` values are detected, then the metric automatically returns `NA`. - - -```r -# NA values removed -mse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5)) -#> [1] 0.01 - -# NA returned -mse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5), na_rm = FALSE) -#> [1] NA -``` - -### Data frame implementation - -The data frame version of the metric should be fairly simple. It is a generic function with a `data.frame` method that calls the yardstick helper, `numeric_metric_summarizer()`, and passes along the `mse_vec()` function to it along with versions of `truth` and `estimate` that have been wrapped in `rlang::enquo()` and then unquoted with `!!` so that non-standard evaluation can be supported. - - -```r -library(rlang) - -mse <- function(data, ...) { - UseMethod("mse") -} - -mse <- new_numeric_metric(mse, direction = "minimize") - -mse.data.frame <- function(data, truth, estimate, na_rm = TRUE, case_weights = NULL, ...) { - - numeric_metric_summarizer( - name = "mse", - fn = mse_vec, - data = data, - truth = !!enquo(truth), - estimate = !!enquo(estimate), - na_rm = na_rm, - case_weights = !!enquo(case_weights) - ) -} -``` - -And that's it. The yardstick package handles the rest. - - -```r -mse(solubility_test, truth = solubility, estimate = prediction) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 mse standard 0.521 - -# Error handling -mse(solubility_test, truth = solubility, estimate = factor("xyz")) -#> Error in `mse()`: -#> ! Can't subset columns that don't exist. -#> ✖ Column `xyz` doesn't exist. -``` - -Let's test it out on a grouped data frame. - - -```r -library(dplyr) - -set.seed(1234) -size <- 100 -times <- 10 - -# create 10 resamples -solubility_resampled <- bind_rows( - replicate( - n = times, - expr = sample_n(solubility_test, size, replace = TRUE), - simplify = FALSE - ), - .id = "resample" -) - -solubility_resampled %>% - group_by(resample) %>% - mse(solubility, prediction) -#> # A tibble: 10 × 4 -#> resample .metric .estimator .estimate -#> -#> 1 1 mse standard 0.512 -#> 2 10 mse standard 0.454 -#> 3 2 mse standard 0.513 -#> 4 3 mse standard 0.414 -#> 5 4 mse standard 0.543 -#> 6 5 mse standard 0.456 -#> 7 6 mse standard 0.652 -#> 8 7 mse standard 0.642 -#> 9 8 mse standard 0.404 -#> 10 9 mse standard 0.479 -``` - -## Class example: miss rate - -Miss rate is another name for the false negative rate, and is a classification metric in the same family as `sens()` and `spec()`. It follows the formula: - -$$ miss\_rate = \frac{FN}{FN + TP} $$ - -This metric, like other classification metrics, is more easily computed when expressed as a confusion matrix. As you will see in the example, you can achieve this with a call to `base::table(estimate, truth)` which correctly puts the "correct" result in the columns of the confusion matrix. - -Classification metrics are more complicated than numeric ones because you have to think about extensions to the multiclass case. For now, let's start with the binary case. - -### Vector implementation - -The vector implementation for classification metrics initially has a very similar setup as the numeric metrics. It used `check_class_metric()` instead of `check_numeric_metric()`. It has an additional argument, `estimator` that determines the type of estimator to use (binary or some kind of multiclass implementation or averaging). This argument is auto-selected for the user, so default it to `NULL`. Additionally, pass it along to `check_class_metric()` so that it can check the provided `estimator` against the classes of `truth` and `estimate` to see if they are allowed. - - -```r -# Logic for `event_level` -event_col <- function(xtab, event_level) { - if (identical(event_level, "first")) { - colnames(xtab)[[1]] - } else { - colnames(xtab)[[2]] - } -} - -miss_rate_impl <- function(truth, estimate, event_level) { - # Create - xtab <- table(estimate, truth) - col <- event_col(xtab, event_level) - col2 <- setdiff(colnames(xtab), col) - - tp <- xtab[col, col] - fn <- xtab[col2, col] - - fn / (fn + tp) -} - -miss_rate_vec <- function(truth, - estimate, - estimator = NULL, - na_rm = TRUE, - case_weights = NULL, - event_level = "first", - ...) { - estimator <- finalize_estimator(truth, estimator) - - check_class_metric(truth, estimate, case_weights, estimator) - - if (na_rm) { - result <- yardstick_remove_missing(truth, estimate, case_weights) - - truth <- result$truth - estimate <- result$estimate - case_weights <- result$case_weights - } else if (yardstick_any_missing(truth, estimate, case_weights)) { - return(NA_real_) - } - - miss_rate_impl(truth, estimate, event_level) -} -``` - -Another change from the numeric metric is that a call to `finalize_estimator()` is made. This is the infrastructure that auto-selects the type of estimator to use. - - -```r -data("two_class_example") -miss_rate_vec(two_class_example$truth, two_class_example$predicted) -#> [1] 0.12 -``` - -What happens if you try and pass in a multiclass result? - - -```r -data("hpc_cv") -fold1 <- filter(hpc_cv, Resample == "Fold01") -miss_rate_vec(fold1$obs, fold1$pred) -#> F M L -#> 0.0621 0.0000 0.0000 -``` - -This isn't great, as currently multiclass `miss_rate()` isn't supported and it would have been better to throw an error if the `estimator` was not `"binary"`. Currently, `finalize_estimator()` uses its default implementation which selected `"macro"` as the `estimator` since `truth` was a factor with more than 2 classes. When we implement multiclass averaging, this is what you want, but if your metric only works with a binary implementation (or has other specialized multiclass versions), you might want to guard against this. - -To fix this, a generic counterpart to `finalize_estimator()`, called `finalize_estimator_internal()`, exists that helps you restrict the input types. If you provide a method to `finalize_estimator_internal()` where the method name is the same as your metric name, and then set the `metric_class` argument in `finalize_estimator()` to be the same thing, you can control how the auto-selection of the `estimator` is handled. - -Don't worry about the `metric_dispatcher` argument. This is handled for you and just exists as a dummy argument to dispatch off of. - -It is also good practice to call `validate_estimator()` which handles the case where a user passed in the estimator themselves. This validates that the supplied `estimator` is one of the allowed types and error otherwise. - - -```r -finalize_estimator_internal.miss_rate <- function(metric_dispatcher, x, estimator, call) { - - validate_estimator(estimator, estimator_override = "binary") - if (!is.null(estimator)) { - return(estimator) - } - - lvls <- levels(x) - if (length(lvls) > 2) { - stop("A multiclass `truth` input was provided, but only `binary` is supported.") - } - "binary" -} - -miss_rate_vec <- function(truth, - estimate, - estimator = NULL, - na_rm = TRUE, - case_weights = NULL, - event_level = "first", - ...) { - # calls finalize_estimator_internal() internally - estimator <- finalize_estimator(truth, estimator, metric_class = "miss_rate") - - check_class_metric(truth, estimate, case_weights, estimator) - - if (na_rm) { - result <- yardstick_remove_missing(truth, estimate, case_weights) - - truth <- result$truth - estimate <- result$estimate - case_weights <- result$case_weights - } else if (yardstick_any_missing(truth, estimate, case_weights)) { - return(NA_real_) - } - - miss_rate_impl(truth, estimate, event_level) -} - -# Error thrown by our custom handler -miss_rate_vec(fold1$obs, fold1$pred) -#> Error in finalize_estimator_internal.miss_rate(metric_dispatcher, x, estimator, : A multiclass `truth` input was provided, but only `binary` is supported. - -# Error thrown by validate_estimator() -miss_rate_vec(fold1$obs, fold1$pred, estimator = "macro") -#> Error in `finalize_estimator_internal()`: -#> ! `estimator` must be one of: "binary". Not "macro". -``` - -### Supporting multiclass miss rate - -Like many other classification metrics such as `precision()` or `recall()`, miss rate does not have a natural multiclass extension, but one can be created using methods such as macro, weighted macro, and micro averaging. If you have not, I encourage you to read `vignette("multiclass", "yardstick")` for more information about how these methods work. - -Generally, they require more effort to get right than the binary case, especially if you want to have a performant version. Luckily, a somewhat standard template is used in yardstick and can be used here as well. - -Let's first remove the "binary" restriction we created earlier. - - -```r -rm(finalize_estimator_internal.miss_rate) -``` - -The main changes below are: - -- The binary implementation is moved to `miss_rate_binary()`. - -- `miss_rate_estimator_impl()` is a helper function for switching between binary and multiclass implementations. It also applies the weighting required for multiclass estimators. It is called from `miss_rate_impl()` and also accepts the `estimator` argument using R's function scoping rules. - -- `miss_rate_multiclass()` provides the implementation for the multiclass case. It calculates the true positive and false negative values as vectors with one value per class. For the macro case, it returns a vector of miss rate calculations, and for micro, it first sums the individual pieces and returns a single miss rate calculation. In the macro case, the vector is then weighted appropriately in `miss_rate_estimator_impl()` depending on whether or not it was macro or weighted macro. - - -```r -miss_rate_vec <- function(truth, - estimate, - estimator = NULL, - na_rm = TRUE, - case_weights = NULL, - event_level = "first", - ...) { - # calls finalize_estimator_internal() internally - estimator <- finalize_estimator(truth, estimator, metric_class = "miss_rate") - - check_class_metric(truth, estimate, case_weights, estimator) - - if (na_rm) { - result <- yardstick_remove_missing(truth, estimate, case_weights) - - truth <- result$truth - estimate <- result$estimate - case_weights <- result$case_weights - } else if (yardstick_any_missing(truth, estimate, case_weights)) { - return(NA_real_) - } - - miss_rate_impl(truth, estimate, estimator, event_level) -} - -miss_rate_impl <- function(truth, estimate, estimator, event_level) { - xtab <- table(estimate, truth) - # Rather than implement the actual method here, we rely on - # an *_estimator_impl() function that can handle binary - # and multiclass cases - miss_rate_estimator_impl(xtab, estimator, event_level) -} - -# This function switches between binary and multiclass implementations -miss_rate_estimator_impl <- function(data, estimator, event_level) { - if(estimator == "binary") { - miss_rate_binary(data, event_level) - } else { - # Encapsulates the macro, macro weighted, and micro cases - wt <- get_weights(data, estimator) - res <- miss_rate_multiclass(data, estimator) - weighted.mean(res, wt) - } -} - -miss_rate_binary <- function(data, event_level) { - col <- event_col(data, event_level) - col2 <- setdiff(colnames(data), col) - - tp <- data[col, col] - fn <- data[col2, col] - - fn / (fn + tp) -} - -miss_rate_multiclass <- function(data, estimator) { - - # We need tp and fn for all classes individually - # we can get this by taking advantage of the fact - # that tp + fn = colSums(data) - tp <- diag(data) - tpfn <- colSums(data) - fn <- tpfn - tp - - # If using a micro estimator, we sum the individual - # pieces before performing the miss rate calculation - if (estimator == "micro") { - tp <- sum(tp) - fn <- sum(fn) - } - - # return the vector - tp / (tp + fn) -} -``` - -For the macro case, this separation of weighting from the core implementation might seem strange, but there is good reason for it. Some metrics are combinations of other metrics, and it is nice to be able to reuse code when calculating more complex metrics. For example, `f_meas()` is a combination of `recall()` and `precision()`. When calculating a macro averaged `f_meas()`, the weighting must be applied 1 time, at the very end of the calculation. `recall_multiclass()` and `precision_multiclass()` are defined similarly to how `miss_rate_multiclass()` is defined and returns the unweighted vector of calculations. This means we can directly use this in `f_meas()`, and then weight everything once at the end of that calculation. - -Let's try it out now: - - -```r -# two class -miss_rate_vec(two_class_example$truth, two_class_example$predicted) -#> [1] 0.12 - -# multiclass -miss_rate_vec(fold1$obs, fold1$pred) -#> [1] 0.548 -``` - -#### Data frame implementation - -Luckily, the data frame implementation is as simple as the numeric case, we just need to add an extra `estimator` argument and pass that through. - - -```r -miss_rate <- function(data, ...) { - UseMethod("miss_rate") -} - -miss_rate <- new_class_metric(miss_rate, direction = "minimize") - -miss_rate.data.frame <- function(data, - truth, - estimate, - estimator = NULL, - na_rm = TRUE, - case_weights = NULL, - event_level = "first", - ...) { - class_metric_summarizer( - name = "miss_rate", - fn = miss_rate_vec, - data = data, - truth = !!enquo(truth), - estimate = !!enquo(estimate), - estimator = estimator, - na_rm = na_rm, - case_weights = !!enquo(case_weights), - event_level = event_level - ) -} -``` - - -```r -# Macro weighted automatically selected -fold1 %>% - miss_rate(obs, pred) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 miss_rate macro 0.548 - -# Switch to micro -fold1 %>% - miss_rate(obs, pred, estimator = "micro") -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 miss_rate micro 0.726 - -# Macro weighted by resample -hpc_cv %>% - group_by(Resample) %>% - miss_rate(obs, pred, estimator = "macro_weighted") -#> # A tibble: 10 × 4 -#> Resample .metric .estimator .estimate -#> -#> 1 Fold01 miss_rate macro_weighted 0.726 -#> 2 Fold02 miss_rate macro_weighted 0.712 -#> 3 Fold03 miss_rate macro_weighted 0.758 -#> 4 Fold04 miss_rate macro_weighted 0.712 -#> 5 Fold05 miss_rate macro_weighted 0.712 -#> 6 Fold06 miss_rate macro_weighted 0.697 -#> 7 Fold07 miss_rate macro_weighted 0.675 -#> 8 Fold08 miss_rate macro_weighted 0.721 -#> 9 Fold09 miss_rate macro_weighted 0.673 -#> 10 Fold10 miss_rate macro_weighted 0.699 - -# Error handling -miss_rate(hpc_cv, obs, VF) -#> Error in `miss_rate()`: -#> ! `estimate` should be a factor, not a `numeric`. -``` - -## Using custom metrics - -The `metric_set()` function validates that all metric functions are of the same metric type by checking the class of the function. If any metrics are not of the right class, `metric_set()` fails. By using `new_numeric_metric()` and `new_class_metric()` in the above custom metrics, they work out of the box without any additional adjustments. - - -```r -numeric_mets <- metric_set(mse, rmse) - -numeric_mets(solubility_test, solubility, prediction) -#> # A tibble: 2 × 3 -#> .metric .estimator .estimate -#> -#> 1 mse standard 0.521 -#> 2 rmse standard 0.722 -``` - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Monterey 12.6 -#> system aarch64, darwin20 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2023-04-26 -#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.2.0) -#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.2.0) -#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.2.1) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.2.0) -#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.0) -#> recipes * 1.0.6.9000 2023-04-25 [1] local -#> rlang * 1.1.0 2023-03-14 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.0) -#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.2.1) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.2.0) -#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.2.0) -#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.2.0) -#> -#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.2/library -#> [2] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/learn/develop/models/index.Rmarkdown b/content/learn/develop/models/index.Rmarkdown deleted file mode 100644 index 4a563484..00000000 --- a/content/learn/develop/models/index.Rmarkdown +++ /dev/null @@ -1,611 +0,0 @@ ---- -title: "How to build a parsnip model" -tags: [parsnip] -categories: [] -type: learn-subsection -weight: 2 -description: | - Create a parsnip model function from an existing model implementation. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(mda) -library(tidymodels) - -pkgs <- c("tidymodels", "mda", "modeldata") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - -## Introduction - -`r req_pkgs(pkgs)` - -The parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this: - - * It eliminates a lot of duplicate code. - * Since the expressions are not evaluated until fitting, it eliminates many package dependencies. - -A parsnip model function is itself very general. For example, the `logistic_reg()` function itself doesn't have any model code within it. Instead, each model function is associated with one or more computational _engines_. These might be different R packages or some function in another language (that can be evaluated by R). - -This article describes the process of creating a new model function. Before proceeding, take a minute and read our [guidelines on creating modeling packages](https://tidymodels.github.io/model-implementation-principles/) to understand the general themes and conventions that we use. - -## An example model - -As an example, we'll create a function for _mixture discriminant analysis_. There are [a few packages](http://search.r-project.org/cgi-bin/namazu.cgi?query=%22mixture+discriminant%22&max=100&result=normal&sort=score&idxname=functions) that implement this but we'll focus on `mda::mda`: - -```{r mda-str} -str(mda::mda) -``` - -The main hyperparameter is the number of subclasses. We'll name our function `discrim_mixture`. - -## Aspects of models - -Before proceeding, it helps to to review how parsnip categorizes models: - -* The model _type_ is related to the structural aspect of the model. For example, the model type `linear_reg` represents linear models (slopes and intercepts) that model a numeric outcome. Other model types in the package are `nearest_neighbor`, `decision_tree`, and so on. - -* Within a model type is the _mode_, related to the modeling goal. Currently the two modes in the package are regression and classification. Some models have methods for both models (e.g. nearest neighbors) while others have only a single mode (e.g. logistic regression). - -* The computation _engine_ is a combination of the estimation method and the implementation. For example, for linear regression, one engine is `"lm"` which uses ordinary least squares analysis via the `lm()` function. Another engine is `"stan"` which uses the Stan infrastructure to estimate parameters using Bayes rule. - -When adding a model into parsnip, the user has to specify which modes and engines are used. The package also enables users to add a new mode or engine to an existing model. - -## The general process - -The parsnip package stores information about the models in an internal environment object. The environment can be accessed via the function `get_model_env()`. The package includes a variety of functions that can get or set the different aspects of the models. - -If you are adding a new model from your own package, you can use these functions to add new entries into the model environment. - -### Step 1. Register the model, modes, and arguments - -We will add the MDA model using the model type `discrim_mixture`. Since this is a classification method, we only have to register a single mode: - -```{r mda-reg} -library(tidymodels) -set_new_model("discrim_mixture") -set_model_mode(model = "discrim_mixture", mode = "classification") -set_model_engine( - "discrim_mixture", - mode = "classification", - eng = "mda" -) -set_dependency("discrim_mixture", eng = "mda", pkg = "mda") -``` - -These functions should silently finish. There is also a function that can be used to show what aspects of the model have been added to parsnip: - -```{r mda-show-1} -show_model_info("discrim_mixture") -``` - -The next step would be to declare the main arguments to the model. These are declared independent of the mode. To specify the argument, there are a few slots to fill in: - - * The name that parsnip uses for the argument. In general, we try to use non-jargony names for arguments (e.g. "penalty" instead of "lambda" for regularized regression). We recommend consulting [the model argument table available here](/find/parsnip/) to see if an existing argument name can be used before creating a new one. - - * The argument name that is used by the underlying modeling function. - - * A function reference for a _constructor_ that will be used to generate tuning parameter values. This should be a character vector with a named element called `fun` that is the constructor function. There is an optional element `pkg` that can be used to call the function using its namespace. If referencing functions from the dials package, quantitative parameters can have additional arguments in the list for `trans` and `range` while qualitative parameters can pass `values` via this list. - - * A logical value for whether the argument can be used to generate multiple predictions for a single R object. For example, for boosted trees, if a model is fit with 10 boosting iterations, many modeling packages allow the model object to make predictions for any iterations less than the one used to fit the model. In general this is not the case so one would use `has_submodels = FALSE`. - -For `mda::mda()`, the main tuning parameter is `subclasses` which we will rewrite as `sub_classes`. - -```{r mda-args} -set_model_arg( - model = "discrim_mixture", - eng = "mda", - parsnip = "sub_classes", - original = "subclasses", - func = list(pkg = "foo", fun = "bar"), - has_submodel = FALSE -) -show_model_info("discrim_mixture") -``` - -### Step 2. Create the model function - -This is a fairly simple function that can follow a basic template. The main arguments to our function will be: - - * The mode. If the model can do more than one mode, you might default this to "unknown". In our case, since it is only a classification model, it makes sense to default it to that mode so that the users won't have to specify it. - - * The argument names (`sub_classes` here). These should be defaulted to `NULL`. - -A basic version of the function is: - -```{r model-fun} -discrim_mixture <- - function(mode = "classification", sub_classes = NULL) { - # Check for correct mode - if (mode != "classification") { - rlang::abort("`mode` should be 'classification'") - } - - # Capture the arguments in quosures - args <- list(sub_classes = rlang::enquo(sub_classes)) - - # Save some empty slots for future parts of the specification - new_model_spec( - "discrim_mixture", - args = args, - eng_args = NULL, - mode = mode, - method = NULL, - engine = NULL - ) - } -``` - -This is pretty simple since the data are not exposed to this function. - -{{% warning %}} We strongly suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former return better traceback results and have safer defaults for handling call objects. {{%/ warning %}} - -### Step 3. Add a fit module - -Now that parsnip knows about the model, mode, and engine, we can give it the information on fitting the model for our engine. The information needed to fit the model is contained in another list. The elements are: - - * `interface` is a single character value that could be "formula", "data.frame", or "matrix". This defines the type of interface used by the underlying fit function (`mda::mda`, in this case). This helps the translation of the data to be in an appropriate format for the that function. - - * `protect` is an optional list of function arguments that **should not be changeable** by the user. In this case, we probably don't want users to pass data values to these arguments (until the `fit()` function is called). - - * `func` is the package and name of the function that will be called. If you are using a locally defined function, only `fun` is required. - - * `defaults` is an optional list of arguments to the fit function that the user can change, but whose defaults can be set here. This isn't needed in this case, but is described later in this document. - -For the first engine: - -```{r fit-mod} -set_fit( - model = "discrim_mixture", - eng = "mda", - mode = "classification", - value = list( - interface = "formula", - protect = c("formula", "data"), - func = c(pkg = "mda", fun = "mda"), - defaults = list() - ) -) - -show_model_info("discrim_mixture") -``` - -We also set up the information on how the predictors should be handled. These options ensure that the data that parsnip gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly. - - * `predictor_indicators` describes whether and how to create indicator/dummy variables from factor predictors. There are three options: `"none"` (do not expand factor predictors), `"traditional"` (apply the standard `model.matrix()` encodings), and `"one_hot"` (create the complete set including the baseline level for all factors). - - * `compute_intercept` controls whether `model.matrix()` should include the intercept in its formula. This affects more than the inclusion of an intercept column. With an intercept, `model.matrix()` computes dummy variables for all but one factor level. Without an intercept, `model.matrix()` computes a full set of indicators for the first factor variable, but an incomplete set for the remainder. - - * `remove_intercept` removes the intercept column *after* `model.matrix()` is finished. This can be useful if the model function (e.g. `lm()`) automatically generates an intercept. - -* `allow_sparse_x` specifies whether the model can accommodate a sparse representation for predictors during fitting and tuning. - -```{r} -set_encoding( - model = "discrim_mixture", - eng = "mda", - mode = "classification", - options = list( - predictor_indicators = "traditional", - compute_intercept = TRUE, - remove_intercept = TRUE, - allow_sparse_x = FALSE - ) -) -``` - - -### Step 4. Add modules for prediction - -Similar to the fitting module, we specify the code for making different types of predictions. To make hard class predictions, the `class` object contains the details. The elements of the list are: - - * `pre` and `post` are optional functions that can preprocess the data being fed to the prediction code and to postprocess the raw output of the predictions. These won't be needed for this example, but a section below has examples of how these can be used when the model code is not easy to use. If the data being predicted has a simple type requirement, you can avoid using a `pre` function with the `args` below. - * `func` is the prediction function (in the same format as above). In many cases, packages have a predict method for their model's class but this is typically not exported. In this case (and the example below), it is simple enough to make a generic call to `predict()` with no associated package. - * `args` is a list of arguments to pass to the prediction function. These will most likely be wrapped in `rlang::expr()` so that they are not evaluated when defining the method. For mda, the code would be `predict(object, newdata, type = "class")`. What is actually given to the function is the parsnip model fit object, which includes a sub-object called `fit()` that houses the mda model object. If the data need to be a matrix or data frame, you could also use `newdata = quote(as.data.frame(newdata))` or similar. - -The parsnip prediction code will expect the result to be an unnamed character string or factor. This will be coerced to a factor with the same levels as the original data. - -To add this method to the model environment, a similar `set()` function is used: - -```{r mds-class} -class_info <- - list( - pre = NULL, - post = NULL, - func = c(fun = "predict"), - args = - # These lists should be of the form: - # {predict.mda argument name} = {values provided from parsnip objects} - list( - # We don't want the first two arguments evaluated right now - # since they don't exist yet. `type` is a simple object that - # doesn't need to have its evaluation deferred. - object = quote(object$fit), - newdata = quote(new_data), - type = "class" - ) - ) - -set_pred( - model = "discrim_mixture", - eng = "mda", - mode = "classification", - type = "class", - value = class_info -) -``` - -A similar call can be used to define the class probability module (if they can be computed). The format is identical to the `class` module but the output is expected to be a tibble with columns for each factor level. - -As an example of the `post` function, the data frame created by `mda:::predict.mda()` will be converted to a tibble. The arguments are `x` (the raw results coming from the predict method) and `object` (the parsnip model fit object). The latter has a sub-object called `lvl` which is a character string of the outcome's factor levels (if any). - -We register the probability module. There is a template function that makes this slightly easier to format the objects: - -```{r mda-prob} -prob_info <- - pred_value_template( - post = function(x, object) { - tibble::as_tibble(x) - }, - func = c(fun = "predict"), - # Now everything else is put into the `args` slot - object = quote(object$fit), - newdata = quote(new_data), - type = "posterior" - ) - -set_pred( - model = "discrim_mixture", - eng = "mda", - mode = "classification", - type = "prob", - value = prob_info -) - -show_model_info("discrim_mixture") -``` - -If this model could be used for regression situations, we could also add a "numeric" module. For `pred`, the model requires an unnamed numeric vector output (usually). - -Examples are [here](https://github.com/tidymodels/parsnip/blob/master/R/linear_reg_data.R) and [here](https://github.com/tidymodels/parsnip/blob/master/R/rand_forest_data.R). - - -### Does it work? - -As a developer, one thing that may come in handy is the `translate()` function. This will tell you what the model's eventual syntax will be. - -For example: - -```{r mda-code} -discrim_mixture(sub_classes = 2) %>% - translate(engine = "mda") -``` - -Let's try it on a data set from the modeldata package: - -```{r mda-data} -data("two_class_dat", package = "modeldata") -set.seed(4622) -example_split <- initial_split(two_class_dat, prop = 0.99) -example_train <- training(example_split) -example_test <- testing(example_split) - -mda_spec <- discrim_mixture(sub_classes = 2) %>% - set_engine("mda") - -mda_fit <- mda_spec %>% - fit(Class ~ ., data = example_train, engine = "mda") -mda_fit - -predict(mda_fit, new_data = example_test, type = "prob") %>% - bind_cols(example_test %>% select(Class)) - -predict(mda_fit, new_data = example_test) %>% - bind_cols(example_test %>% select(Class)) -``` - - -## Add an engine - -The process for adding an engine to an existing model is _almost_ the same as building a new model but simpler with fewer steps. You only need to add the engine-specific aspects of the model. For example, if we wanted to fit a linear regression model using M-estimation, we could only add a new engine. The code for the `rlm()` function in MASS is pretty similar to `lm()`, so we can copy that code and change the package/function names: - -```{r rlm} -set_model_engine("linear_reg", "regression", eng = "rlm") -set_dependency("linear_reg", eng = "rlm", pkg = "MASS") - -set_fit( - model = "linear_reg", - eng = "rlm", - mode = "regression", - value = list( - interface = "formula", - protect = c("formula", "data", "weights"), - func = c(pkg = "MASS", fun = "rlm"), - defaults = list() - ) -) - -set_encoding( - model = "linear_reg", - eng = "rlm", - mode = "regression", - options = list( - predictor_indicators = "traditional", - compute_intercept = TRUE, - remove_intercept = TRUE, - allow_sparse_x = FALSE - ) -) - -set_pred( - model = "linear_reg", - eng = "rlm", - mode = "regression", - type = "numeric", - value = list( - pre = NULL, - post = NULL, - func = c(fun = "predict"), - args = - list( - object = expr(object$fit), - newdata = expr(new_data), - type = "response" - ) - ) -) - -# testing: -linear_reg() %>% - set_engine("rlm") %>% - fit(mpg ~ ., data = mtcars) -``` - -## Add parsnip models to another package - -The process here is almost the same. All of the previous functions are still required but their execution is a little different. - -For parsnip to register them, that package must already be loaded. For this reason, it makes sense to have parsnip in the "Depends" category. - -The first difference is that the functions that define the model must be inside of a wrapper function that is called when your package is loaded. For our example here, this might look like: - -```{r eval = FALSE} -make_discrim_mixture_mda <- function() { - parsnip::set_new_model("discrim_mixture") - - parsnip::set_model_mode("discrim_mixture", "classification") - - # and so one... -} -``` - -This function is then executed when your package is loaded: - -```{r eval = FALSE} -.onLoad <- function(libname, pkgname) { - # This defines discrim_mixture in the model database - make_discrim_mixture_mda() -} -``` - -For an example package that uses parsnip definitions, take a look at the [discrim](https://github.com/tidymodels/discrim) package. - -{{% warning %}} To use a new model and/or engine in the broader tidymodels infrastructure, we recommend your model definition declarations (e.g. `set_new_model()` and similar) reside in a package. If these definitions are in a script only, the new model may not work with the tune package, for example for parallel processing. {{%/ warning %}} - -It is also important for parallel processing support to **list the home package as a dependency**. If the `discrim_mixture()` function lived in a package called `mixedup`, include the line: - -```r -set_dependency("discrim_mixture", eng = "mda", pkg = "mixedup") -``` - -Parallel processing requires this explicit dependency setting. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux will load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a parsnip model is not loaded in the worker processes, the model will not have an entry in parsnip's internal database (and produce an error). - - -## Your model, tuning parameters, and you - -The tune package can be used to find reasonable values of model arguments via tuning. There are some S3 methods that are useful to define for your model. `discrim_mixture()` has one main tuning parameter: `sub_classes`. To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. - -`tunable()` takes the model specification as its argument and returns a tibble with columns: - -* `name`: The name of the argument. - -* `call_info`: A list that describes how to call a function that returns a dials parameter object. - -* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `"model_spec"`. - -* `component`: A character string with more information about the source. For models, this is just the name of the function (e.g. `"discrim_mixture"`). - -* `component_id`: A character string to indicate where a unique identifier is for the object. For a model, this is indicates the type of model argument (e.g. "main"). - -The main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. - -For example, for a nearest-neighbors `neighbors` parameter, this value is just: - -```{r mtry} -info <- list(pkg = "dials", fun = "neighbors") - -# FYI: how it is used under-the-hood: -new_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg) -rlang::eval_tidy(new_param_call) -``` - -For `discrim_mixture()`, a dials object is needed that returns an integer that is the number of sub-classes that should be create. We can create a dials parameter function for this: - -```{r sub-classes} -sub_classes <- function(range = c(1L, 10L), trans = NULL) { - new_quant_param( - type = "integer", - range = range, - inclusive = c(TRUE, TRUE), - trans = trans, - label = c(sub_classes = "# Sub-Classes"), - finalize = NULL - ) -} -``` - -If this were in the dials package, we could use: - -```{r tunable} -tunable.discrim_mixture <- function(x, ...) { - tibble::tibble( - name = c("sub_classes"), - call_info = list(list(pkg = NULL, fun = "sub_classes")), - source = "model_spec", - component = "discrim_mixture", - component_id = "main" - ) -} -``` - -Once this method is in place, the tuning functions can be used: - -```{r tune-mda, message = FALSE} -mda_spec <- - discrim_mixture(sub_classes = tune()) %>% - set_engine("mda") - -set.seed(452) -cv <- vfold_cv(example_train) -mda_tune_res <- mda_spec %>% - tune_grid(Class ~ ., cv, grid = 4) -show_best(mda_tune_res, metric = "roc_auc") -``` - - - -## Pro-tips, what-ifs, exceptions, FAQ, and minutiae - -There are various things that came to mind while developing this resource. - -**Do I have to return a simple vector for `predict` and `predict_class`?** - -Previously, when discussing the `pred` information: - -> For `pred`, the model requires an unnamed numeric vector output **(usually)**. - -There are some models (e.g. `glmnet`, `plsr`, `Cubist`, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so, for these cases, the current convention is to return a tibble with the prediction in a column called `values` and have extra columns for any parameters that define the different sub-models. - -For example, if I fit a linear regression model via `glmnet` and get four values of the regularization parameter (`lambda`): - -```{r glmnet, eval = FALSE} -linear_reg() %>% - set_engine("glmnet", nlambda = 4) %>% - fit(mpg ~ ., data = mtcars) %>% - multi_predict(new_data = mtcars[1:3, -1]) -``` - -_However_, the API is still being developed. Currently, there is not an interface in the prediction functions to pass in the values of the parameters to make predictions with (`lambda`, in this case). - -**What do I do about how my model handles factors or categorical data?** - -Some modeling functions in R create indicator/dummy variables from categorical data when you use a model formula (typically using `model.matrix()`), and some do not. Some examples of models that do _not_ create indicator variables include tree-based models, naive Bayes models, and multilevel or hierarchical models. The tidymodels ecosystem assumes a `model.matrix()`-like default encoding for categorical data used in a model formula, but you can change this encoding using `set_encoding()`. For example, you can set predictor encodings that say, "leave my data alone," and keep factors as is: - -```{r encodinginfo, eval=FALSE} -set_encoding( - model = "decision_tree", - eng = "rpart", - mode = "regression", - options = list( - predictor_indicators = "none", - compute_intercept = FALSE, - remove_intercept = FALSE - ) -) -``` - -{{% note %}} There are three options for `predictor_indicators`: -- "none" (do not expand factor predictors) -- "traditional" (apply the standard `model.matrix()` encoding) -- "one_hot" (create the complete set including the baseline level for all factors) {{%/ note %}} - -To learn more about encoding categorical predictors, check out [this blog post](https://www.tidyverse.org/blog/2020/07/parsnip-0-1-2/#predictor-encoding-consistency). - -**What is the `defaults` slot and why do I need it?** - -You might want to set defaults that can be overridden by the user. For example, for logistic regression with `glm`, it make sense to default `family = binomial`. However, if someone wants to use a different link function, they should be able to do that. For that model/engine definition, it has: - -```{r glm-alt, eval = FALSE} -defaults = list(family = expr(binomial)) -``` - -So that is the default: - -```{r glm-alt-show, eval = FALSE} -logistic_reg() %>% translate(engine = "glm") - -# but you can change it: - -logistic_reg() %>% - set_engine("glm", family = expr(binomial(link = "probit"))) %>% - translate() -``` - -That's what `defaults` are for. - -Note that we wrapped `binomial` inside of `expr()`. If we didn't, it would substitute the results of executing `binomial()` inside of the expression (and that's a mess). - -**What if I want more complex defaults?** - -The `translate` function can be used to check values or set defaults once the model's mode is known. To do this, you can create a model-specific S3 method that first calls the general method (`translate.model_spec()`) and then makes modifications or conducts error traps. - -For example, the ranger and randomForest package functions have arguments for calculating importance. One is a logical and the other is a string. Since this is likely to lead to a bunch of frustration and GitHub issues, we can put in a check: - -```{r rf-trans, eval = FALSE} -# Simplified version -translate.rand_forest <- function (x, engine, ...){ - # Run the general method to get the real arguments in place - x <- translate.default(x, engine, ...) - - # Check and see if they make sense for the engine and/or mode: - if (x$engine == "ranger") { - if (any(names(x$method$fit$args) == "importance")) - if (is.logical(x$method$fit$args$importance)) - rlang::abort("`importance` should be a character value. See ?ranger::ranger.") - } - x -} -``` - -As another example, `nnet::nnet()` has an option for the final layer to be linear (called `linout`). If `mode = "regression"`, that should probably be set to `TRUE`. You couldn't do this with the `args` (described above) since you need the function translated first. - - -**My model fit requires more than one function call. So....?** - -The best course of action is to write wrapper so that it can be one call. This was the case with xgboost and keras. - -**Why would I preprocess my data?** - -There might be non-trivial transformations that the model prediction code requires (such as converting to a sparse matrix representation, etc.) - -This would **not** include making dummy variables and `model.matrix` stuff. The parsnip infrastructure already does that for you. - - -**Why would I post-process my predictions?** - -What comes back from some R functions may be somewhat... arcane or problematic. As an example, for xgboost, if you fit a multi-class boosted tree, you might expect the class probabilities to come back as a matrix (*narrator: they don't*). If you have four classes and make predictions on three samples, you get a vector of 12 probability values. You need to convert these to a rectangular data set. - -Another example is the predict method for ranger, which encapsulates the actual predictions in a more complex object structure. - -These are the types of problems that the post-processor will solve. - -**Are there other modes?** - -Not yet but there will be. For example, it might make sense to have a different mode when doing risk-based modeling via Cox regression models. That would enable different classes of objects and those might be needed since the types of models don't make direct predictions of the outcome. - -If you have a suggestion, please add a [GitHub issue](https://github.com/tidymodels/parsnip/issues) to discuss it. - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - - - diff --git a/content/learn/develop/models/index.markdown b/content/learn/develop/models/index.markdown deleted file mode 100644 index f82fc912..00000000 --- a/content/learn/develop/models/index.markdown +++ /dev/null @@ -1,800 +0,0 @@ ---- -title: "How to build a parsnip model" -tags: [parsnip] -categories: [] -type: learn-subsection -weight: 2 -description: | - Create a parsnip model function from an existing model implementation. ---- - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: mda, modeldata, and tidymodels. - -The parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this: - - * It eliminates a lot of duplicate code. - * Since the expressions are not evaluated until fitting, it eliminates many package dependencies. - -A parsnip model function is itself very general. For example, the `logistic_reg()` function itself doesn't have any model code within it. Instead, each model function is associated with one or more computational _engines_. These might be different R packages or some function in another language (that can be evaluated by R). - -This article describes the process of creating a new model function. Before proceeding, take a minute and read our [guidelines on creating modeling packages](https://tidymodels.github.io/model-implementation-principles/) to understand the general themes and conventions that we use. - -## An example model - -As an example, we'll create a function for _mixture discriminant analysis_. There are [a few packages](http://search.r-project.org/cgi-bin/namazu.cgi?query=%22mixture+discriminant%22&max=100&result=normal&sort=score&idxname=functions) that implement this but we'll focus on `mda::mda`: - - -```r -str(mda::mda) -#> function (formula = formula(data), data = sys.frame(sys.parent()), subclasses = 3, -#> sub.df = NULL, tot.df = NULL, dimension = sum(subclasses) - 1, eps = 100 * -#> .Machine$double.eps, iter = 5, weights = mda.start(x, g, subclasses, -#> trace, ...), method = polyreg, keep.fitted = (n * dimension < 5000), -#> trace = FALSE, ...) -``` - -The main hyperparameter is the number of subclasses. We'll name our function `discrim_mixture`. - -## Aspects of models - -Before proceeding, it helps to to review how parsnip categorizes models: - -* The model _type_ is related to the structural aspect of the model. For example, the model type `linear_reg` represents linear models (slopes and intercepts) that model a numeric outcome. Other model types in the package are `nearest_neighbor`, `decision_tree`, and so on. - -* Within a model type is the _mode_, related to the modeling goal. Currently the two modes in the package are regression and classification. Some models have methods for both models (e.g. nearest neighbors) while others have only a single mode (e.g. logistic regression). - -* The computation _engine_ is a combination of the estimation method and the implementation. For example, for linear regression, one engine is `"lm"` which uses ordinary least squares analysis via the `lm()` function. Another engine is `"stan"` which uses the Stan infrastructure to estimate parameters using Bayes rule. - -When adding a model into parsnip, the user has to specify which modes and engines are used. The package also enables users to add a new mode or engine to an existing model. - -## The general process - -The parsnip package stores information about the models in an internal environment object. The environment can be accessed via the function `get_model_env()`. The package includes a variety of functions that can get or set the different aspects of the models. - -If you are adding a new model from your own package, you can use these functions to add new entries into the model environment. - -### Step 1. Register the model, modes, and arguments - -We will add the MDA model using the model type `discrim_mixture`. Since this is a classification method, we only have to register a single mode: - - -```r -library(tidymodels) -set_new_model("discrim_mixture") -set_model_mode(model = "discrim_mixture", mode = "classification") -set_model_engine( - "discrim_mixture", - mode = "classification", - eng = "mda" -) -set_dependency("discrim_mixture", eng = "mda", pkg = "mda") -``` - -These functions should silently finish. There is also a function that can be used to show what aspects of the model have been added to parsnip: - - -```r -show_model_info("discrim_mixture") -#> Information for `discrim_mixture` -#> modes: unknown, classification -#> -#> engines: -#> classification: mdaNA -#> -#> ¹The model can use case weights. -#> -#> no registered arguments. -#> -#> no registered fit modules. -#> -#> no registered prediction modules. -``` - -The next step would be to declare the main arguments to the model. These are declared independent of the mode. To specify the argument, there are a few slots to fill in: - - * The name that parsnip uses for the argument. In general, we try to use non-jargony names for arguments (e.g. "penalty" instead of "lambda" for regularized regression). We recommend consulting [the model argument table available here](/find/parsnip/) to see if an existing argument name can be used before creating a new one. - - * The argument name that is used by the underlying modeling function. - - * A function reference for a _constructor_ that will be used to generate tuning parameter values. This should be a character vector with a named element called `fun` that is the constructor function. There is an optional element `pkg` that can be used to call the function using its namespace. If referencing functions from the dials package, quantitative parameters can have additional arguments in the list for `trans` and `range` while qualitative parameters can pass `values` via this list. - - * A logical value for whether the argument can be used to generate multiple predictions for a single R object. For example, for boosted trees, if a model is fit with 10 boosting iterations, many modeling packages allow the model object to make predictions for any iterations less than the one used to fit the model. In general this is not the case so one would use `has_submodels = FALSE`. - -For `mda::mda()`, the main tuning parameter is `subclasses` which we will rewrite as `sub_classes`. - - -```r -set_model_arg( - model = "discrim_mixture", - eng = "mda", - parsnip = "sub_classes", - original = "subclasses", - func = list(pkg = "foo", fun = "bar"), - has_submodel = FALSE -) -show_model_info("discrim_mixture") -#> Information for `discrim_mixture` -#> modes: unknown, classification -#> -#> engines: -#> classification: mdaNA -#> -#> ¹The model can use case weights. -#> -#> arguments: -#> mda: -#> sub_classes --> subclasses -#> -#> no registered fit modules. -#> -#> no registered prediction modules. -``` - -### Step 2. Create the model function - -This is a fairly simple function that can follow a basic template. The main arguments to our function will be: - - * The mode. If the model can do more than one mode, you might default this to "unknown". In our case, since it is only a classification model, it makes sense to default it to that mode so that the users won't have to specify it. - - * The argument names (`sub_classes` here). These should be defaulted to `NULL`. - -A basic version of the function is: - - -```r -discrim_mixture <- - function(mode = "classification", sub_classes = NULL) { - # Check for correct mode - if (mode != "classification") { - rlang::abort("`mode` should be 'classification'") - } - - # Capture the arguments in quosures - args <- list(sub_classes = rlang::enquo(sub_classes)) - - # Save some empty slots for future parts of the specification - new_model_spec( - "discrim_mixture", - args = args, - eng_args = NULL, - mode = mode, - method = NULL, - engine = NULL - ) - } -``` - -This is pretty simple since the data are not exposed to this function. - -{{% warning %}} We strongly suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former return better traceback results and have safer defaults for handling call objects. {{%/ warning %}} - -### Step 3. Add a fit module - -Now that parsnip knows about the model, mode, and engine, we can give it the information on fitting the model for our engine. The information needed to fit the model is contained in another list. The elements are: - - * `interface` is a single character value that could be "formula", "data.frame", or "matrix". This defines the type of interface used by the underlying fit function (`mda::mda`, in this case). This helps the translation of the data to be in an appropriate format for the that function. - - * `protect` is an optional list of function arguments that **should not be changeable** by the user. In this case, we probably don't want users to pass data values to these arguments (until the `fit()` function is called). - - * `func` is the package and name of the function that will be called. If you are using a locally defined function, only `fun` is required. - - * `defaults` is an optional list of arguments to the fit function that the user can change, but whose defaults can be set here. This isn't needed in this case, but is described later in this document. - -For the first engine: - - -```r -set_fit( - model = "discrim_mixture", - eng = "mda", - mode = "classification", - value = list( - interface = "formula", - protect = c("formula", "data"), - func = c(pkg = "mda", fun = "mda"), - defaults = list() - ) -) - -show_model_info("discrim_mixture") -#> Information for `discrim_mixture` -#> modes: unknown, classification -#> -#> engines: -#> classification: mda -#> -#> ¹The model can use case weights. -#> -#> arguments: -#> mda: -#> sub_classes --> subclasses -#> -#> fit modules: -#> engine mode -#> mda classification -#> -#> no registered prediction modules. -``` - -We also set up the information on how the predictors should be handled. These options ensure that the data that parsnip gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly. - - * `predictor_indicators` describes whether and how to create indicator/dummy variables from factor predictors. There are three options: `"none"` (do not expand factor predictors), `"traditional"` (apply the standard `model.matrix()` encodings), and `"one_hot"` (create the complete set including the baseline level for all factors). - - * `compute_intercept` controls whether `model.matrix()` should include the intercept in its formula. This affects more than the inclusion of an intercept column. With an intercept, `model.matrix()` computes dummy variables for all but one factor level. Without an intercept, `model.matrix()` computes a full set of indicators for the first factor variable, but an incomplete set for the remainder. - - * `remove_intercept` removes the intercept column *after* `model.matrix()` is finished. This can be useful if the model function (e.g. `lm()`) automatically generates an intercept. - -* `allow_sparse_x` specifies whether the model can accommodate a sparse representation for predictors during fitting and tuning. - - -```r -set_encoding( - model = "discrim_mixture", - eng = "mda", - mode = "classification", - options = list( - predictor_indicators = "traditional", - compute_intercept = TRUE, - remove_intercept = TRUE, - allow_sparse_x = FALSE - ) -) -``` - - -### Step 4. Add modules for prediction - -Similar to the fitting module, we specify the code for making different types of predictions. To make hard class predictions, the `class` object contains the details. The elements of the list are: - - * `pre` and `post` are optional functions that can preprocess the data being fed to the prediction code and to postprocess the raw output of the predictions. These won't be needed for this example, but a section below has examples of how these can be used when the model code is not easy to use. If the data being predicted has a simple type requirement, you can avoid using a `pre` function with the `args` below. - * `func` is the prediction function (in the same format as above). In many cases, packages have a predict method for their model's class but this is typically not exported. In this case (and the example below), it is simple enough to make a generic call to `predict()` with no associated package. - * `args` is a list of arguments to pass to the prediction function. These will most likely be wrapped in `rlang::expr()` so that they are not evaluated when defining the method. For mda, the code would be `predict(object, newdata, type = "class")`. What is actually given to the function is the parsnip model fit object, which includes a sub-object called `fit()` that houses the mda model object. If the data need to be a matrix or data frame, you could also use `newdata = quote(as.data.frame(newdata))` or similar. - -The parsnip prediction code will expect the result to be an unnamed character string or factor. This will be coerced to a factor with the same levels as the original data. - -To add this method to the model environment, a similar `set()` function is used: - - -```r -class_info <- - list( - pre = NULL, - post = NULL, - func = c(fun = "predict"), - args = - # These lists should be of the form: - # {predict.mda argument name} = {values provided from parsnip objects} - list( - # We don't want the first two arguments evaluated right now - # since they don't exist yet. `type` is a simple object that - # doesn't need to have its evaluation deferred. - object = quote(object$fit), - newdata = quote(new_data), - type = "class" - ) - ) - -set_pred( - model = "discrim_mixture", - eng = "mda", - mode = "classification", - type = "class", - value = class_info -) -``` - -A similar call can be used to define the class probability module (if they can be computed). The format is identical to the `class` module but the output is expected to be a tibble with columns for each factor level. - -As an example of the `post` function, the data frame created by `mda:::predict.mda()` will be converted to a tibble. The arguments are `x` (the raw results coming from the predict method) and `object` (the parsnip model fit object). The latter has a sub-object called `lvl` which is a character string of the outcome's factor levels (if any). - -We register the probability module. There is a template function that makes this slightly easier to format the objects: - - -```r -prob_info <- - pred_value_template( - post = function(x, object) { - tibble::as_tibble(x) - }, - func = c(fun = "predict"), - # Now everything else is put into the `args` slot - object = quote(object$fit), - newdata = quote(new_data), - type = "posterior" - ) - -set_pred( - model = "discrim_mixture", - eng = "mda", - mode = "classification", - type = "prob", - value = prob_info -) - -show_model_info("discrim_mixture") -#> Information for `discrim_mixture` -#> modes: unknown, classification -#> -#> engines: -#> classification: mda -#> -#> ¹The model can use case weights. -#> -#> arguments: -#> mda: -#> sub_classes --> subclasses -#> -#> fit modules: -#> engine mode -#> mda classification -#> -#> prediction modules: -#> mode engine methods -#> classification mda class, prob -``` - -If this model could be used for regression situations, we could also add a "numeric" module. For `pred`, the model requires an unnamed numeric vector output (usually). - -Examples are [here](https://github.com/tidymodels/parsnip/blob/master/R/linear_reg_data.R) and [here](https://github.com/tidymodels/parsnip/blob/master/R/rand_forest_data.R). - - -### Does it work? - -As a developer, one thing that may come in handy is the `translate()` function. This will tell you what the model's eventual syntax will be. - -For example: - - -```r -discrim_mixture(sub_classes = 2) %>% - translate(engine = "mda") -#> discrim mixture Model Specification (classification) -#> -#> Main Arguments: -#> sub_classes = 2 -#> -#> Computational engine: mda -#> -#> Model fit template: -#> mda::mda(formula = missing_arg(), data = missing_arg(), subclasses = 2) -``` - -Let's try it on a data set from the modeldata package: - - -```r -data("two_class_dat", package = "modeldata") -set.seed(4622) -example_split <- initial_split(two_class_dat, prop = 0.99) -example_train <- training(example_split) -example_test <- testing(example_split) - -mda_spec <- discrim_mixture(sub_classes = 2) %>% - set_engine("mda") - -mda_fit <- mda_spec %>% - fit(Class ~ ., data = example_train, engine = "mda") -mda_fit -#> parsnip model object -#> -#> Call: -#> mda::mda(formula = Class ~ ., data = data, subclasses = ~2) -#> -#> Dimension: 2 -#> -#> Percent Between-Group Variance Explained: -#> v1 v2 -#> 82.6 100.0 -#> -#> Degrees of Freedom (per dimension): 3 -#> -#> Training Misclassification Error: 0.172 ( N = 783 ) -#> -#> Deviance: 671 - -predict(mda_fit, new_data = example_test, type = "prob") %>% - bind_cols(example_test %>% select(Class)) -#> # A tibble: 8 × 3 -#> .pred_Class1 .pred_Class2 Class -#> -#> 1 0.679 0.321 Class1 -#> 2 0.690 0.310 Class1 -#> 3 0.384 0.616 Class2 -#> 4 0.300 0.700 Class1 -#> 5 0.0262 0.974 Class2 -#> 6 0.405 0.595 Class2 -#> 7 0.793 0.207 Class1 -#> 8 0.0949 0.905 Class2 - -predict(mda_fit, new_data = example_test) %>% - bind_cols(example_test %>% select(Class)) -#> # A tibble: 8 × 2 -#> .pred_class Class -#> -#> 1 Class1 Class1 -#> 2 Class1 Class1 -#> 3 Class2 Class2 -#> 4 Class2 Class1 -#> 5 Class2 Class2 -#> 6 Class2 Class2 -#> 7 Class1 Class1 -#> 8 Class2 Class2 -``` - - -## Add an engine - -The process for adding an engine to an existing model is _almost_ the same as building a new model but simpler with fewer steps. You only need to add the engine-specific aspects of the model. For example, if we wanted to fit a linear regression model using M-estimation, we could only add a new engine. The code for the `rlm()` function in MASS is pretty similar to `lm()`, so we can copy that code and change the package/function names: - - -```r -set_model_engine("linear_reg", "regression", eng = "rlm") -set_dependency("linear_reg", eng = "rlm", pkg = "MASS") - -set_fit( - model = "linear_reg", - eng = "rlm", - mode = "regression", - value = list( - interface = "formula", - protect = c("formula", "data", "weights"), - func = c(pkg = "MASS", fun = "rlm"), - defaults = list() - ) -) - -set_encoding( - model = "linear_reg", - eng = "rlm", - mode = "regression", - options = list( - predictor_indicators = "traditional", - compute_intercept = TRUE, - remove_intercept = TRUE, - allow_sparse_x = FALSE - ) -) - -set_pred( - model = "linear_reg", - eng = "rlm", - mode = "regression", - type = "numeric", - value = list( - pre = NULL, - post = NULL, - func = c(fun = "predict"), - args = - list( - object = expr(object$fit), - newdata = expr(new_data), - type = "response" - ) - ) -) - -# testing: -linear_reg() %>% - set_engine("rlm") %>% - fit(mpg ~ ., data = mtcars) -#> parsnip model object -#> -#> Call: -#> rlm(formula = mpg ~ ., data = data) -#> Converged in 8 iterations -#> -#> Coefficients: -#> (Intercept) cyl disp hp drat wt -#> 17.8225 -0.2788 0.0159 -0.0254 0.4639 -4.1436 -#> qsec vs am gear carb -#> 0.6531 0.2498 1.4341 0.8594 -0.0108 -#> -#> Degrees of freedom: 32 total; 21 residual -#> Scale estimate: 2.15 -``` - -## Add parsnip models to another package - -The process here is almost the same. All of the previous functions are still required but their execution is a little different. - -For parsnip to register them, that package must already be loaded. For this reason, it makes sense to have parsnip in the "Depends" category. - -The first difference is that the functions that define the model must be inside of a wrapper function that is called when your package is loaded. For our example here, this might look like: - - -```r -make_discrim_mixture_mda <- function() { - parsnip::set_new_model("discrim_mixture") - - parsnip::set_model_mode("discrim_mixture", "classification") - - # and so one... -} -``` - -This function is then executed when your package is loaded: - - -```r -.onLoad <- function(libname, pkgname) { - # This defines discrim_mixture in the model database - make_discrim_mixture_mda() -} -``` - -For an example package that uses parsnip definitions, take a look at the [discrim](https://github.com/tidymodels/discrim) package. - -{{% warning %}} To use a new model and/or engine in the broader tidymodels infrastructure, we recommend your model definition declarations (e.g. `set_new_model()` and similar) reside in a package. If these definitions are in a script only, the new model may not work with the tune package, for example for parallel processing. {{%/ warning %}} - -It is also important for parallel processing support to **list the home package as a dependency**. If the `discrim_mixture()` function lived in a package called `mixedup`, include the line: - -```r -set_dependency("discrim_mixture", eng = "mda", pkg = "mixedup") -``` - -Parallel processing requires this explicit dependency setting. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux will load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a parsnip model is not loaded in the worker processes, the model will not have an entry in parsnip's internal database (and produce an error). - - -## Your model, tuning parameters, and you - -The tune package can be used to find reasonable values of model arguments via tuning. There are some S3 methods that are useful to define for your model. `discrim_mixture()` has one main tuning parameter: `sub_classes`. To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. - -`tunable()` takes the model specification as its argument and returns a tibble with columns: - -* `name`: The name of the argument. - -* `call_info`: A list that describes how to call a function that returns a dials parameter object. - -* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `"model_spec"`. - -* `component`: A character string with more information about the source. For models, this is just the name of the function (e.g. `"discrim_mixture"`). - -* `component_id`: A character string to indicate where a unique identifier is for the object. For a model, this is indicates the type of model argument (e.g. "main"). - -The main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. - -For example, for a nearest-neighbors `neighbors` parameter, this value is just: - - -```r -info <- list(pkg = "dials", fun = "neighbors") - -# FYI: how it is used under-the-hood: -new_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg) -rlang::eval_tidy(new_param_call) -#> # Nearest Neighbors (quantitative) -#> Range: [1, 10] -``` - -For `discrim_mixture()`, a dials object is needed that returns an integer that is the number of sub-classes that should be create. We can create a dials parameter function for this: - - -```r -sub_classes <- function(range = c(1L, 10L), trans = NULL) { - new_quant_param( - type = "integer", - range = range, - inclusive = c(TRUE, TRUE), - trans = trans, - label = c(sub_classes = "# Sub-Classes"), - finalize = NULL - ) -} -``` - -If this were in the dials package, we could use: - - -```r -tunable.discrim_mixture <- function(x, ...) { - tibble::tibble( - name = c("sub_classes"), - call_info = list(list(pkg = NULL, fun = "sub_classes")), - source = "model_spec", - component = "discrim_mixture", - component_id = "main" - ) -} -``` - -Once this method is in place, the tuning functions can be used: - - -```r -mda_spec <- - discrim_mixture(sub_classes = tune()) %>% - set_engine("mda") - -set.seed(452) -cv <- vfold_cv(example_train) -mda_tune_res <- mda_spec %>% - tune_grid(Class ~ ., cv, grid = 4) -show_best(mda_tune_res, metric = "roc_auc") -#> # A tibble: 4 × 7 -#> sub_classes .metric .estimator mean n std_err .config -#> -#> 1 2 roc_auc binary 0.890 10 0.0143 Preprocessor1_Model3 -#> 2 3 roc_auc binary 0.889 10 0.0142 Preprocessor1_Model4 -#> 3 6 roc_auc binary 0.884 10 0.0147 Preprocessor1_Model2 -#> 4 8 roc_auc binary 0.881 10 0.0146 Preprocessor1_Model1 -``` - - - -## Pro-tips, what-ifs, exceptions, FAQ, and minutiae - -There are various things that came to mind while developing this resource. - -**Do I have to return a simple vector for `predict` and `predict_class`?** - -Previously, when discussing the `pred` information: - -> For `pred`, the model requires an unnamed numeric vector output **(usually)**. - -There are some models (e.g. `glmnet`, `plsr`, `Cubist`, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so, for these cases, the current convention is to return a tibble with the prediction in a column called `values` and have extra columns for any parameters that define the different sub-models. - -For example, if I fit a linear regression model via `glmnet` and get four values of the regularization parameter (`lambda`): - - -```r -linear_reg() %>% - set_engine("glmnet", nlambda = 4) %>% - fit(mpg ~ ., data = mtcars) %>% - multi_predict(new_data = mtcars[1:3, -1]) -``` - -_However_, the API is still being developed. Currently, there is not an interface in the prediction functions to pass in the values of the parameters to make predictions with (`lambda`, in this case). - -**What do I do about how my model handles factors or categorical data?** - -Some modeling functions in R create indicator/dummy variables from categorical data when you use a model formula (typically using `model.matrix()`), and some do not. Some examples of models that do _not_ create indicator variables include tree-based models, naive Bayes models, and multilevel or hierarchical models. The tidymodels ecosystem assumes a `model.matrix()`-like default encoding for categorical data used in a model formula, but you can change this encoding using `set_encoding()`. For example, you can set predictor encodings that say, "leave my data alone," and keep factors as is: - - -```r -set_encoding( - model = "decision_tree", - eng = "rpart", - mode = "regression", - options = list( - predictor_indicators = "none", - compute_intercept = FALSE, - remove_intercept = FALSE - ) -) -``` - -{{% note %}} There are three options for `predictor_indicators`: -- "none" (do not expand factor predictors) -- "traditional" (apply the standard `model.matrix()` encoding) -- "one_hot" (create the complete set including the baseline level for all factors) {{%/ note %}} - -To learn more about encoding categorical predictors, check out [this blog post](https://www.tidyverse.org/blog/2020/07/parsnip-0-1-2/#predictor-encoding-consistency). - -**What is the `defaults` slot and why do I need it?** - -You might want to set defaults that can be overridden by the user. For example, for logistic regression with `glm`, it make sense to default `family = binomial`. However, if someone wants to use a different link function, they should be able to do that. For that model/engine definition, it has: - - -```r -defaults = list(family = expr(binomial)) -``` - -So that is the default: - - -```r -logistic_reg() %>% translate(engine = "glm") - -# but you can change it: - -logistic_reg() %>% - set_engine("glm", family = expr(binomial(link = "probit"))) %>% - translate() -``` - -That's what `defaults` are for. - -Note that we wrapped `binomial` inside of `expr()`. If we didn't, it would substitute the results of executing `binomial()` inside of the expression (and that's a mess). - -**What if I want more complex defaults?** - -The `translate` function can be used to check values or set defaults once the model's mode is known. To do this, you can create a model-specific S3 method that first calls the general method (`translate.model_spec()`) and then makes modifications or conducts error traps. - -For example, the ranger and randomForest package functions have arguments for calculating importance. One is a logical and the other is a string. Since this is likely to lead to a bunch of frustration and GitHub issues, we can put in a check: - - -```r -# Simplified version -translate.rand_forest <- function (x, engine, ...){ - # Run the general method to get the real arguments in place - x <- translate.default(x, engine, ...) - - # Check and see if they make sense for the engine and/or mode: - if (x$engine == "ranger") { - if (any(names(x$method$fit$args) == "importance")) - if (is.logical(x$method$fit$args$importance)) - rlang::abort("`importance` should be a character value. See ?ranger::ranger.") - } - x -} -``` - -As another example, `nnet::nnet()` has an option for the final layer to be linear (called `linout`). If `mode = "regression"`, that should probably be set to `TRUE`. You couldn't do this with the `args` (described above) since you need the function translated first. - - -**My model fit requires more than one function call. So....?** - -The best course of action is to write wrapper so that it can be one call. This was the case with xgboost and keras. - -**Why would I preprocess my data?** - -There might be non-trivial transformations that the model prediction code requires (such as converting to a sparse matrix representation, etc.) - -This would **not** include making dummy variables and `model.matrix` stuff. The parsnip infrastructure already does that for you. - - -**Why would I post-process my predictions?** - -What comes back from some R functions may be somewhat... arcane or problematic. As an example, for xgboost, if you fit a multi-class boosted tree, you might expect the class probabilities to come back as a matrix (*narrator: they don't*). If you have four classes and make predictions on three samples, you get a vector of 12 probability values. You need to convert these to a rectangular data set. - -Another example is the predict method for ranger, which encapsulates the actual predictions in a more complex object structure. - -These are the types of problems that the post-processor will solve. - -**Are there other modes?** - -Not yet but there will be. For example, it might make sense to have a different mode when doing risk-based modeling via Cox regression models. That would enable different classes of objects and those might be needed since the types of models don't make direct predictions of the outcome. - -If you have a suggestion, please add a [GitHub issue](https://github.com/tidymodels/parsnip/issues) to discuss it. - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> mda * 0.5-3 2022-05-05 [1] CRAN (R 4.2.0) -#> modeldata * 1.0.1 2022-09-06 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - - - diff --git a/content/learn/develop/parameters/index.Rmarkdown b/content/learn/develop/parameters/index.Rmarkdown deleted file mode 100644 index fe644703..00000000 --- a/content/learn/develop/parameters/index.Rmarkdown +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: "How to create a tuning parameter function" -tags: [dials] -categories: [] -type: learn-subsection -weight: 4 -description: | - Build functions to use in tuning both quantitative and qualitative parameters. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(tidymodels) - -pkgs <- c("dials", "scales") -``` - -## Introduction - -`r req_pkgs(pkgs)` - -Some models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using `new_quant_param()` or `new_qual_param()`, respectively. This article is a guide to creating new parameters. - -## Quantitative parameters - -As an example, let's consider the multivariate adaptive regression spline ([MARS](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline)) model, which creates nonlinear features from predictors and adds them to a linear regression models. The earth package is an excellent implementation of this method. - -MARS creates an initial set of features and then prunes them back to an appropriate size. This can be done automatically by `earth::earth()` or the number of final terms can be set by the user. The parsnip function `mars()` has a parameter called `num_terms` that defines this. - -What if we want to create a parameter for the number of _initial terms_ included in the model. There is no argument in `parsnip::mars()` for this but we will make one now. The argument name in `earth::earth()` is `nk`, which is not very descriptive. Our parameter will be called `num_initial_terms`. - -We use the `new_quant_param()` function since this is a numeric parameter. The main two arguments to a numeric parameter function are `range` and `trans`. - -The `range` specifies the possible values of the parameter. For our example, a minimal value might be one or two. What is the upper limit? The default in the earth package is - -```{r eart, eval = FALSE} -min(200, max(20, 2 * ncol(x))) + 1 -``` - -where `x` is the predictor matrix. We often put in values that are either sensible defaults or are minimal enough to work for the majority of data sets. For now, let's specify an upper limit of 10 but this will be discussed more in the next section. - -The other argument is `trans`, which represents a transformation that should be applied to the parameter values when working with them. For example, many regularization methods have a `penalty` parameter that tends to range between zero and some upper bound (let's say 1). The effect of going from a penalty value of 0.01 to 0.1 is much more impactful than going from 0.9 to 1.0. In such a case, it might make sense to work with this parameter in transformed units (such as the log, in this example). If new parameter values are generated at random, it helps if they are uniformly simulated in the transformed units and then converted back to the original units. - -The `trans` parameter accepts a transformation object from the scales package. For example: - -```{r scales} -library(scales) -lsf.str("package:scales", pattern = "_trans$") -scales::log10_trans() -``` - -A value of `NULL` means that no transformation should be used. - -A quantitative parameter function should have these two arguments and, in the function body, a call `new_quant_param()`. There are a few arguments to this function: - -```{r new_quant_param} -library(tidymodels) -args(new_quant_param) -``` - -* Possible types are double precision and integers. The value of `type` should agree with the values of `range` in the function definition. - -* It's OK for our tuning to include the minimum or maximum, so we'll use `c(TRUE, TRUE)` for `inclusive`. If the value cannot include one end of the range, set one or both of these values to `FALSE`. - -* The `label` should be a named character string where the name is the parameter name and the value represents what will be printed automatically. - -* `finalize` is an argument that can set parts of the range. This is discussed more below. - -Here's an example of a basic quantitative parameter object: - -```{r num-initial-terms} -num_initial_terms <- function(range = c(1L, 10L), trans = NULL) { - new_quant_param( - type = "integer", - range = range, - inclusive = c(TRUE, TRUE), - trans = trans, - label = c(num_initial_terms = "# Initial MARS Terms"), - finalize = NULL - ) -} - -num_initial_terms() - -# Sample from the parameter: -set.seed(4832856) -num_initial_terms() %>% value_sample(5) -``` - -### Finalizing parameters - -It might be the case that the range of the parameter is unknown. For example, parameters that are related to the number of columns in a data set cannot be exactly specified in the absence of data. In those cases, a placeholder of `unknown()` can be added. This will force the user to "finalize" the parameter object for their particular data set. Let's redefine our function with an `unknown()` value: - -```{r num-initial-terms-unk, error = TRUE} -num_initial_terms <- function(range = c(1L, unknown()), trans = NULL) { - new_quant_param( - type = "integer", - range = range, - inclusive = c(TRUE, TRUE), - trans = trans, - label = c(num_initial_terms = "# Initial MARS Terms"), - finalize = NULL - ) -} -num_initial_terms() - -# Can we sample? -num_initial_terms() %>% value_sample(5) -``` - -The `finalize` argument of `num_initial_terms()` can take a function that uses data to set the range. For example, the package already includes a few functions for finalization: - -```{r dials-final-funcs} -lsf.str("package:dials", pattern = "^get_") -``` - -These functions generally take a data frame of predictors (in an argument called `x`) and add the range of the parameter object. Using the formula in the earth package, we might use: - -```{r earth-range} -get_initial_mars_terms <- function(object, x) { - upper_bound <- min(200, max(20, 2 * ncol(x))) + 1 - upper_bound <- as.integer(upper_bound) - bounds <- range_get(object) - bounds$upper <- upper_bound - range_set(object, bounds) -} - -# Use the mtcars are the finalize the upper bound: -num_initial_terms() %>% get_initial_mars_terms(x = mtcars[, -1]) -``` - -Once we add this function to the object, the general `finalize()` method can be used: - -```{r final-obj} -num_initial_terms <- function(range = c(1L, unknown()), trans = NULL) { - new_quant_param( - type = "integer", - range = range, - inclusive = c(TRUE, TRUE), - trans = trans, - label = c(num_initial_terms = "# Initial MARS Terms"), - finalize = get_initial_mars_terms - ) -} - -num_initial_terms() %>% finalize(x = mtcars[, -1]) -``` - -## Qualitative parameters - -Now let's look at an example of a qualitative parameter. If a model includes a data aggregation step, we want to allow users to tune how our parameters are aggregated. For example, in embedding methods, possible values might be `min`, `max`, `mean`, `sum`, or to not aggregate at all ("none"). Since these cannot be put on a numeric scale, they are possible values of a qualitative parameter. We'll take "character" input (not "logical"), and we must specify the allowed values. By default we won't aggregate, so we are putting `"none"` as the first value. - -```{r aggregation} -aggregation <- function(values = c("none", "min", "max", "mean", "sum")) { - new_qual_param( - type = "character", - values = values, - label = c(aggregation = "Aggregation Method") - ) -} -``` - -Within the dials package, the convention is to have the values contained in a separate vector whose name starts with `values_`. For example: - -```{r aggregation-vec} -values_aggregation <- c("none", "min", "max", "mean", "sum") -aggregation <- function(values = values_aggregation) { - new_qual_param( - type = "character", - values = values, - label = c(aggregation = "Aggregation Method") - ) -} -``` - -This step may not make sense if you are using the function in a script and not keeping it within a package. - -We can use our `aggregation` parameters with dials functions. - -```{r aggregation-use} -aggregation() -aggregation() %>% value_sample(3) -``` - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - - diff --git a/content/learn/develop/parameters/index.markdown b/content/learn/develop/parameters/index.markdown deleted file mode 100644 index a9e789dd..00000000 --- a/content/learn/develop/parameters/index.markdown +++ /dev/null @@ -1,283 +0,0 @@ ---- -title: "How to create a tuning parameter function" -tags: [dials] -categories: [] -type: learn-subsection -weight: 4 -description: | - Build functions to use in tuning both quantitative and qualitative parameters. ---- - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: dials and scales. - -Some models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using `new_quant_param()` or `new_qual_param()`, respectively. This article is a guide to creating new parameters. - -## Quantitative parameters - -As an example, let's consider the multivariate adaptive regression spline ([MARS](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline)) model, which creates nonlinear features from predictors and adds them to a linear regression models. The earth package is an excellent implementation of this method. - -MARS creates an initial set of features and then prunes them back to an appropriate size. This can be done automatically by `earth::earth()` or the number of final terms can be set by the user. The parsnip function `mars()` has a parameter called `num_terms` that defines this. - -What if we want to create a parameter for the number of _initial terms_ included in the model. There is no argument in `parsnip::mars()` for this but we will make one now. The argument name in `earth::earth()` is `nk`, which is not very descriptive. Our parameter will be called `num_initial_terms`. - -We use the `new_quant_param()` function since this is a numeric parameter. The main two arguments to a numeric parameter function are `range` and `trans`. - -The `range` specifies the possible values of the parameter. For our example, a minimal value might be one or two. What is the upper limit? The default in the earth package is - - -```r -min(200, max(20, 2 * ncol(x))) + 1 -``` - -where `x` is the predictor matrix. We often put in values that are either sensible defaults or are minimal enough to work for the majority of data sets. For now, let's specify an upper limit of 10 but this will be discussed more in the next section. - -The other argument is `trans`, which represents a transformation that should be applied to the parameter values when working with them. For example, many regularization methods have a `penalty` parameter that tends to range between zero and some upper bound (let's say 1). The effect of going from a penalty value of 0.01 to 0.1 is much more impactful than going from 0.9 to 1.0. In such a case, it might make sense to work with this parameter in transformed units (such as the log, in this example). If new parameter values are generated at random, it helps if they are uniformly simulated in the transformed units and then converted back to the original units. - -The `trans` parameter accepts a transformation object from the scales package. For example: - - -```r -library(scales) -lsf.str("package:scales", pattern = "_trans$") -#> asn_trans : function () -#> atanh_trans : function () -#> boxcox_trans : function (p, offset = 0) -#> compose_trans : function (...) -#> date_trans : function () -#> exp_trans : function (base = exp(1)) -#> hms_trans : function () -#> identity_trans : function () -#> log_trans : function (base = exp(1)) -#> log10_trans : function () -#> log1p_trans : function () -#> log2_trans : function () -#> logit_trans : function () -#> modulus_trans : function (p, offset = 1) -#> probability_trans : function (distribution, ...) -#> probit_trans : function () -#> pseudo_log_trans : function (sigma = 1, base = exp(1)) -#> reciprocal_trans : function () -#> reverse_trans : function () -#> sqrt_trans : function () -#> time_trans : function (tz = NULL) -#> yj_trans : function (p) -scales::log10_trans() -#> Transformer: log-10 [1e-100, Inf] -``` - -A value of `NULL` means that no transformation should be used. - -A quantitative parameter function should have these two arguments and, in the function body, a call `new_quant_param()`. There are a few arguments to this function: - - -```r -library(tidymodels) -args(new_quant_param) -#> function (type = c("double", "integer"), range = NULL, inclusive = NULL, -#> default = deprecated(), trans = NULL, values = NULL, label = NULL, -#> finalize = NULL) -#> NULL -``` - -* Possible types are double precision and integers. The value of `type` should agree with the values of `range` in the function definition. - -* It's OK for our tuning to include the minimum or maximum, so we'll use `c(TRUE, TRUE)` for `inclusive`. If the value cannot include one end of the range, set one or both of these values to `FALSE`. - -* The `label` should be a named character string where the name is the parameter name and the value represents what will be printed automatically. - -* `finalize` is an argument that can set parts of the range. This is discussed more below. - -Here's an example of a basic quantitative parameter object: - - -```r -num_initial_terms <- function(range = c(1L, 10L), trans = NULL) { - new_quant_param( - type = "integer", - range = range, - inclusive = c(TRUE, TRUE), - trans = trans, - label = c(num_initial_terms = "# Initial MARS Terms"), - finalize = NULL - ) -} - -num_initial_terms() -#> # Initial MARS Terms (quantitative) -#> Range: [1, 10] - -# Sample from the parameter: -set.seed(4832856) -num_initial_terms() %>% value_sample(5) -#> [1] 6 4 9 10 4 -``` - -### Finalizing parameters - -It might be the case that the range of the parameter is unknown. For example, parameters that are related to the number of columns in a data set cannot be exactly specified in the absence of data. In those cases, a placeholder of `unknown()` can be added. This will force the user to "finalize" the parameter object for their particular data set. Let's redefine our function with an `unknown()` value: - - -```r -num_initial_terms <- function(range = c(1L, unknown()), trans = NULL) { - new_quant_param( - type = "integer", - range = range, - inclusive = c(TRUE, TRUE), - trans = trans, - label = c(num_initial_terms = "# Initial MARS Terms"), - finalize = NULL - ) -} -num_initial_terms() -#> # Initial MARS Terms (quantitative) -#> Range: [1, ?] - -# Can we sample? -num_initial_terms() %>% value_sample(5) -#> Error in `range_validate()`: -#> ! Cannot validate ranges when they contains 1+ unknown values. -``` - -The `finalize` argument of `num_initial_terms()` can take a function that uses data to set the range. For example, the package already includes a few functions for finalization: - - -```r -lsf.str("package:dials", pattern = "^get_") -#> get_batch_sizes : function (object, x, frac = c(1/10, 1/3), ...) -#> get_log_p : function (object, x, ...) -#> get_n : function (object, x, log_vals = FALSE, ...) -#> get_n_frac : function (object, x, log_vals = FALSE, frac = 1/3, ...) -#> get_n_frac_range : function (object, x, log_vals = FALSE, frac = c(1/10, 5/10), ...) -#> get_p : function (object, x, log_vals = FALSE, ...) -#> get_rbf_range : function (object, x, seed = sample.int(10^5, 1), ...) -``` - -These functions generally take a data frame of predictors (in an argument called `x`) and add the range of the parameter object. Using the formula in the earth package, we might use: - - -```r -get_initial_mars_terms <- function(object, x) { - upper_bound <- min(200, max(20, 2 * ncol(x))) + 1 - upper_bound <- as.integer(upper_bound) - bounds <- range_get(object) - bounds$upper <- upper_bound - range_set(object, bounds) -} - -# Use the mtcars are the finalize the upper bound: -num_initial_terms() %>% get_initial_mars_terms(x = mtcars[, -1]) -#> # Initial MARS Terms (quantitative) -#> Range: [1, 21] -``` - -Once we add this function to the object, the general `finalize()` method can be used: - - -```r -num_initial_terms <- function(range = c(1L, unknown()), trans = NULL) { - new_quant_param( - type = "integer", - range = range, - inclusive = c(TRUE, TRUE), - trans = trans, - label = c(num_initial_terms = "# Initial MARS Terms"), - finalize = get_initial_mars_terms - ) -} - -num_initial_terms() %>% finalize(x = mtcars[, -1]) -#> # Initial MARS Terms (quantitative) -#> Range: [1, 21] -``` - -## Qualitative parameters - -Now let's look at an example of a qualitative parameter. If a model includes a data aggregation step, we want to allow users to tune how our parameters are aggregated. For example, in embedding methods, possible values might be `min`, `max`, `mean`, `sum`, or to not aggregate at all ("none"). Since these cannot be put on a numeric scale, they are possible values of a qualitative parameter. We'll take "character" input (not "logical"), and we must specify the allowed values. By default we won't aggregate, so we are putting `"none"` as the first value. - - -```r -aggregation <- function(values = c("none", "min", "max", "mean", "sum")) { - new_qual_param( - type = "character", - values = values, - label = c(aggregation = "Aggregation Method") - ) -} -``` - -Within the dials package, the convention is to have the values contained in a separate vector whose name starts with `values_`. For example: - - -```r -values_aggregation <- c("none", "min", "max", "mean", "sum") -aggregation <- function(values = values_aggregation) { - new_qual_param( - type = "character", - values = values, - label = c(aggregation = "Aggregation Method") - ) -} -``` - -This step may not make sense if you are using the function in a script and not keeping it within a package. - -We can use our `aggregation` parameters with dials functions. - - -```r -aggregation() -#> Aggregation Method (qualitative) -#> 5 possible values include: -#> 'none', 'min', 'max', 'mean' and 'sum' -aggregation() %>% value_sample(3) -#> [1] "min" "sum" "mean" -``` - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - - diff --git a/content/learn/develop/recipes/index.Rmarkdown b/content/learn/develop/recipes/index.Rmarkdown deleted file mode 100755 index 1f3b15c4..00000000 --- a/content/learn/develop/recipes/index.Rmarkdown +++ /dev/null @@ -1,503 +0,0 @@ ---- -title: "Create your own recipe step function" -tags: [recipes] -categories: [] -type: learn-subsection -weight: 1 -description: | - Write a new recipe step for data preprocessing. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r ex_setup, include=FALSE} -library(tidymodels) -library(modeldata) -pkgs <- c("tidymodels", "modeldata") -theme_set(theme_bw() + theme(legend.position = "top")) -set.seed(1234) # for recipe step ID -``` - -## Introduction - -`r req_pkgs(pkgs)` - -There are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages [can be found here](/find/recipes/). However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for [centering](https://github.com/tidymodels/recipes/blob/master/R/center.R) or [PCA](https://github.com/tidymodels/recipes/blob/master/R/pca.R) to start. - -For check operations (e.g. `check_class()`), the process is very similar. Notes on this are available at the end of this article. - -The general process to follow is to: - -1. Define a step constructor function. - -2. Create the minimal S3 methods for `prep()`, `bake()`, and `print()`. - -3. Optionally add some extra methods to work with other tidymodels packages, such as `tunable()` and `tidy()`. - -As an example, we will create a step for converting data into percentiles. - -## A new step definition - -Let's create a step that replaces the value of a variable with its percentile from the training set. The example data we'll use is from the modeldata package: - -```{r initial} -library(modeldata) -data(biomass) -str(biomass) - -biomass_tr <- biomass[biomass$dataset == "Training",] -biomass_te <- biomass[biomass$dataset == "Testing",] -``` - -To illustrate the transformation with the `carbon` variable, note the training set distribution of this variable with a vertical line below for the first value of the test set. - -```{r carbon_dist, fig.width=6, fig.height=4.25, out.width = '100%'} -library(ggplot2) -theme_set(theme_bw()) -ggplot(biomass_tr, aes(x = carbon)) + - geom_histogram(binwidth = 5, col = "blue", fill = "blue", alpha = .5) + - geom_vline(xintercept = biomass_te$carbon[1], lty = 2) -``` - -Based on the training set, `r round(mean(biomass_tr$carbon <= biomass_te$carbon[1])*100, 1)`% of the data are less than a value of `r biomass_te$carbon[1]`. There are some applications where it might be advantageous to represent the predictor values as percentiles rather than their original values. - -Our new step will do this computation for any numeric variables of interest. We will call this new recipe step `step_percentile()`. The code below is designed for illustration and not speed or best practices. We've left out a lot of error trapping that we would want in a real implementation. - -## Create the function - -To start, there is a _user-facing_ function. Let's call that `step_percentile()`. This is just a simple wrapper around a _constructor function_, which defines the rules for any step object that defines a percentile transformation. We'll call this constructor `step_percentile_new()`. - -The function `step_percentile()` takes the same arguments as your function and simply adds it to a new recipe. The `...` signifies the variable selectors that can be used. - -```{r initial_def} -step_percentile <- function( - recipe, - ..., - role = NA, - trained = FALSE, - ref_dist = NULL, - options = list(probs = (0:100)/100, names = TRUE), - skip = FALSE, - id = rand_id("percentile") - ) { - - add_step( - recipe, - step_percentile_new( - terms = enquos(...), - trained = trained, - role = role, - ref_dist = ref_dist, - options = options, - skip = skip, - id = id - ) - ) -} -``` - -You should always keep the first four arguments (`recipe` though `trained`) the same as listed above. Some notes: - - * the `role` argument is used when you either 1) create new variables and want their role to be pre-set or 2) replace the existing variables with new values. The latter is what we will be doing and using `role = NA` will leave the existing role intact. - * `trained` is set by the package when the estimation step has been run. You should default your function definition's argument to `FALSE`. - * `skip` is a logical. Whenever a recipe is prepped, each step is trained and then baked. However, there are some steps that should not be applied when a call to `bake()` is used. For example, if a step is applied to the variables with roles of "outcomes", these data would not be available for new samples. - * `id` is a character string that can be used to identify steps in package code. `rand_id()` will create an ID that has the prefix and a random character sequence. - -We can estimate the percentiles of new data points based on the percentiles from the training set with `approx()`. Our `step_percentile` contains a `ref_dist` object to store these percentiles (pre-computed from the training set in `prep()`) for later use in `bake()`. - -We will use `stats::quantile()` to compute the grid. However, we might also want to have control over the granularity of this grid, so the `options` argument will be used to define how that calculation is done. We could use the ellipses (aka `...`) so that any options passed to `step_percentile()` that are not one of its arguments will then be passed to `stats::quantile()`. However, we recommend making a separate list object with the options and use these inside the function because `...` is already used to define the variable selection. - -It is also important to consider if there are any _main arguments_ to the step. For example, for spline-related steps such as `step_ns()`, users typically want to adjust the argument for the degrees of freedom in the spline (e.g. `splines::ns(x, df)`). Rather than letting users add `df` to the `options` argument: - -* Allow the important arguments to be main arguments to the step function. - -* Follow the tidymodels [conventions for naming arguments](https://tidymodels.github.io/model-implementation-principles/standardized-argument-names.html). Whenever possible, avoid jargon and keep common argument names. - -There are benefits to following these principles (as shown below). - -## Initialize a new object - -Now, the constructor function can be created. - -The function cascade is: - -``` -step_percentile() calls recipes::add_step() -└──> recipes::add_step() calls step_percentile_new() - └──> step_percentile_new() calls recipes::step() -``` - -`step()` is a general constructor for recipes that mainly makes sure that the resulting step object is a list with an appropriate S3 class structure. Using `subclass = "percentile"` will set the class of new objects to `"step_percentile"`. - -```{r initialize} -step_percentile_new <- - function(terms, role, trained, ref_dist, options, skip, id) { - step( - subclass = "percentile", - terms = terms, - role = role, - trained = trained, - ref_dist = ref_dist, - options = options, - skip = skip, - id = id - ) - } -``` - -This constructor function should have no default argument values. Defaults should be set in the user-facing step object. - -## Create the `prep` method - -You will need to create a new `prep()` method for your step's class. To do this, three arguments that the method should have are: - -```r -function(x, training, info = NULL) -``` - -where - - * `x` will be the `step_percentile` object, - * `training` will be a _tibble_ that has the training set data, and - * `info` will also be a tibble that has information on the current set of data available. This information is updated as each step is evaluated by its specific `prep()` method so it may not have the variables from the original data. The columns in this tibble are `variable` (the variable name), `type` (currently either "numeric" or "nominal"), `role` (defining the variable's role), and `source` (either "original" or "derived" depending on where it originated). - -You can define other arguments as well. - -The first thing that you might want to do in the `prep()` function is to translate the specification listed in the `terms` argument to column names in the current data. There is a function called `recipes_eval_select()` that can be used to obtain this. - -{{% warning %}} The `recipes_eval_select()` function is not one you interact with as a typical recipes user, but it is helpful if you develop your own custom recipe steps. {{%/ warning %}} - -```{r prep_1, eval = FALSE} -prep.step_percentile <- function(x, training, info = NULL, ...) { - col_names <- recipes_eval_select(x$terms, training, info) - # TODO finish the rest of the function -} -``` - -After this function call, it is a good idea to check that the selected columns have the appropriate type (e.g. numeric for this example). See `recipes::check_type()` to do this for basic types. - -Once we have this, we can save the approximation grid. For the grid, we will use a helper function that enables us to run `rlang::exec()` to splice in any extra arguments contained in the `options` list to the call to `quantile()`: - -```{r splice} -get_train_pctl <- function(x, args = NULL) { - res <- rlang::exec("quantile", x = x, !!!args) - # Remove duplicate percentile values - res[!duplicated(res)] -} - -# For example: -get_train_pctl(biomass_tr$carbon, list(probs = 0:1)) -get_train_pctl(biomass_tr$carbon) -``` - -Now, the `prep()` method can be created: - -```{r prep-2} -prep.step_percentile <- function(x, training, info = NULL, ...) { - col_names <- recipes_eval_select(x$terms, training, info) - ## You can add error trapping for non-numeric data here and so on. - - ## We'll use the names later so make sure they are available - if (x$options$names == FALSE) { - rlang::abort("`names` should be set to TRUE") - } - - if (!any(names(x$options) == "probs")) { - x$options$probs <- (0:100)/100 - } else { - x$options$probs <- sort(unique(x$options$probs)) - } - - # Compute percentile grid - ref_dist <- purrr::map(training[, col_names], get_train_pctl, args = x$options) - - ## Use the constructor function to return the updated object. - ## Note that `trained` is now set to TRUE - - step_percentile_new( - terms = x$terms, - trained = TRUE, - role = x$role, - ref_dist = ref_dist, - options = x$options, - skip = x$skip, - id = x$id - ) -} -``` - -We suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former can be used for better traceback results. - - -## Create the `bake` method - -Remember that the `prep()` function does not _apply_ the step to the data; it only estimates any required values such as `ref_dist`. We will need to create a new method for our `step_percentile()` class. The minimum arguments for this are - -```r -function(object, new_data, ...) -``` - -where `object` is the updated step function that has been through the corresponding `prep()` code and `new_data` is a tibble of data to be processed. The output of this function needs to be a tibble of the modified version of `new_data`. - -Here is the code to convert the new data to percentiles. The input data (`x` below) comes in as a numeric vector and the output is a vector of approximate percentiles: - -```{r bake-helpers} -pctl_by_approx <- function(x, ref) { - # In case duplicates were removed, get the percentiles from - # the names of the reference object - grid <- as.numeric(gsub("%$", "", names(ref))) - approx(x = ref, y = grid, xout = x)$y/100 -} -``` - -These computations are done column-wise using `purrr::map2_dfc()` to modify the new data in-place: - -```{r bake-method} -bake.step_percentile <- function(object, new_data, ...) { - ## For illustration (and not speed), we will loop through the affected variables - ## and do the computations - vars <- names(object$ref_dist) - - new_data[, vars] <- - purrr::map2_dfc(new_data[, vars], object$ref_dist, pctl_by_approx) - - new_data -} -``` - -`tibble::as_tibble()` can be used at the end of the function to turn `new_data` back into a tibble if needed. We didn't use `tibble::as_tibble()` above since `new_data` stayed as a tibble throughout the whole function. - -{{% note %}} You need to import `recipes::prep()` and `recipes::bake()` to create your own step function in a package. {{%/ note %}} - -## Run the example - -Let's use the example data to make sure that it works: - -```{r example} -rec_obj <- - recipe(HHV ~ ., data = biomass_tr) %>% - step_percentile(ends_with("gen")) %>% - prep(training = biomass_tr) - -biomass_te %>% select(ends_with("gen")) %>% slice(1:2) -bake(rec_obj, biomass_te %>% slice(1:2), ends_with("gen")) - -# Checking to get approximate result: -mean(biomass_tr$hydrogen <= biomass_te$hydrogen[1]) -mean(biomass_tr$oxygen <= biomass_te$oxygen[1]) -``` - -The plot below shows how the original hydrogen percentiles line up with the estimated values: - -```{r cdf_plot,} -hydrogen_values <- - bake(rec_obj, biomass_te, hydrogen) %>% - bind_cols(biomass_te %>% select(original = hydrogen)) - -ggplot(biomass_tr, aes(x = hydrogen)) + - # Plot the empirical distribution function of the - # hydrogen training set values as a black line - stat_ecdf() + - # Overlay the estimated percentiles for the new data: - geom_point(data = hydrogen_values, - aes(x = original, y = hydrogen), - col = "red", alpha = .5, cex = 2) + - labs(x = "New Hydrogen Values", y = "Percentile Based on Training Set") -``` - -These line up very nicely! - -## Custom check operations - -The process here is exactly the same as steps; the internal functions have a similar naming convention: - - * `add_check()` instead of `add_step()` - * `check()` instead of `step()`, and so on. - -It is strongly recommended that: - - 1. The operations start with `check_` (i.e. `check_range()` and `check_range_new()`) - 1. The check uses `rlang::abort(paste0(...))` when the conditions are not met - 1. The original data are returned (unaltered) by the check when the conditions are satisfied. - -## Other step methods - -There are a few other S3 methods that can be created for your step function. They are not required unless you plan on using your step in the broader tidymodels package set. - -### A print method - -If you don't add a print method for `step_percentile`, it will still print but it will be printed as a list of (potentially large) objects and look a bit ugly. The recipes package contains a helper function called `printer()` that should be useful in most cases. We are using it here for the custom print method for `step_percentile`. It requires the original terms specification and the column names this specification is evaluated to by `prep()`. For the former, our step object is structured so that the list object `ref_dist` has the names of the selected variables: - -```{r print-method} -print.step_percentile <- - function(x, width = max(20, options()$width - 35), ...) { - cat("Percentile transformation on ", sep = "") - printer( - # Names before prep (could be selectors) - untr_obj = x$terms, - # Names after prep: - tr_obj = names(x$ref_dist), - # Has it been prepped? - trained = x$trained, - # An estimate of how many characters to print on a line: - width = width - ) - invisible(x) - } - -# Results before `prep()`: -recipe(HHV ~ ., data = biomass_tr) %>% - step_percentile(ends_with("gen")) - -# Results after `prep()`: -rec_obj -``` - -### Methods for declaring required packages - -Some recipe steps use functions from other packages. When this is the case, the `step_*()` function should check to see if the package is installed. The function `recipes::recipes_pkg_check()` will do this. For example: - -``` -> recipes::recipes_pkg_check("some_package") -1 package is needed for this step and is not installed. (some_package). Start -a clean R session then run: install.packages("some_package") -``` - -There is an S3 method that can be used to declare what packages should be loaded when using the step. For a hypothetical step that relies on the `hypothetical` package, this might look like: - -```{r eval = FALSE} -required_pkgs.step_hypothetical <- function(x, ...) { - c("hypothetical", "myrecipespkg") -} -``` - -In this example, `myrecipespkg` is the package where the step resides (if it is in a package). - -{{% note %}} If you are writing steps for an extension package, then every step should have a `required_pkgs()` method. {{%/ note %}} - -The reason to declare what packages should be loaded is parallel processing. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a recipe step is not loaded in the worker processes, the `prep()` methods cannot be found and an error occurs. - -If this S3 method is used for your step, you can rely on this for checking the installation: - -```{r eval = FALSE} -recipes::recipes_pkg_check(required_pkgs.step_hypothetical()) -``` - -If you'd like an example of this in a package, please take a look at the [embed](https://github.com/tidymodels/embed/) or [themis](https://github.com/tidymodels/themis/) package. - -### A tidy method - -The `broom::tidy()` method is a means to return information about the step in a usable format. For our step, it would be helpful to know the reference values. - -When the recipe has been prepped, those data are in the list `ref_dist`. A small function can be used to reformat that data into a tibble. It is customary to return the main values as `value`: - -```{r tidy-calcs} -format_pctl <- function(x) { - tibble::tibble( - value = unname(x), - percentile = as.numeric(gsub("%$", "", names(x))) - ) -} - -# For example: -pctl_step_object <- rec_obj$steps[[1]] -pctl_step_object -format_pctl(pctl_step_object$ref_dist[["hydrogen"]]) -``` - -The tidy method could return these values for each selected column. Before `prep()`, missing values can be used as placeholders. - -```{r tidy} -tidy.step_percentile <- function(x, ...) { - if (is_trained(x)) { - res <- map_dfr(x$ref_dist, format_pctl, .id = "term") - } - else { - term_names <- sel2char(x$terms) - res <- - tibble( - terms = term_names, - value = rlang::na_dbl, - percentile = rlang::na_dbl - ) - } - # Always return the step id: - res$id <- x$id - res -} - -tidy(rec_obj, number = 1) -``` - -### Methods for tuning parameters - -The tune package can be used to find reasonable values of step arguments by model tuning. There are some S3 methods that are useful to define for your step. The percentile example doesn't really have any tunable parameters, so we will demonstrate using `step_poly()`, which returns a polynomial expansion of selected columns. Its function definition has the arguments: - -```{r poly-args} -args(step_poly) -``` - -The argument `degree` is tunable. - -To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. - -`tunable()` takes the step object as its argument and returns a tibble with columns: - -* `name`: The name of the argument. - -* `call_info`: A list that describes how to call a function that returns a dials parameter object. - -* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `"recipe"`. - -* `component`: A character string with more information about the source. For recipes, this is just the name of the step (e.g. `"step_poly"`). - -* `component_id`: A character string to indicate where a unique identifier is for the object. For recipes, this is just the `id` value of the step object. - -The main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. - -For example, for a nearest-neighbors `neighbors` parameter, this value is just: - -```{r mtry} -info <- list(pkg = "dials", fun = "neighbors") - -# FYI: how it is used under-the-hood: -new_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg) -rlang::eval_tidy(new_param_call) -``` - -For `step_poly()`, a dials object is needed that returns an integer that is the number of new columns to create. It turns out that there are a few different types of tuning parameters related to degree: - -```r -> lsf.str("package:dials", pattern = "degree") -degree : function (range = c(1, 3), trans = NULL) -degree_int : function (range = c(1L, 3L), trans = NULL) -prod_degree : function (range = c(1L, 2L), trans = NULL) -spline_degree : function (range = c(3L, 10L), trans = NULL) -``` - -Looking at the `range` values, some return doubles and others return integers. For our problem, `degree_int()` would be a good choice. - -For `step_poly()` the `tunable()` S3 method could be: - -```{r tunable, eval = FALSE} -tunable.step_poly <- function (x, ...) { - tibble::tibble( - name = c("degree"), - call_info = list(list(pkg = "dials", fun = "degree_int")), - source = "recipe", - component = "step_poly", - component_id = x$id - ) -} -``` - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - - diff --git a/content/learn/develop/recipes/index.markdown b/content/learn/develop/recipes/index.markdown deleted file mode 100644 index 3fd4057e..00000000 --- a/content/learn/develop/recipes/index.markdown +++ /dev/null @@ -1,634 +0,0 @@ ---- -title: "Create your own recipe step function" -tags: [recipes] -categories: [] -type: learn-subsection -weight: 1 -description: | - Write a new recipe step for data preprocessing. ---- - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: modeldata and tidymodels. - -There are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages [can be found here](/find/recipes/). However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for [centering](https://github.com/tidymodels/recipes/blob/master/R/center.R) or [PCA](https://github.com/tidymodels/recipes/blob/master/R/pca.R) to start. - -For check operations (e.g. `check_class()`), the process is very similar. Notes on this are available at the end of this article. - -The general process to follow is to: - -1. Define a step constructor function. - -2. Create the minimal S3 methods for `prep()`, `bake()`, and `print()`. - -3. Optionally add some extra methods to work with other tidymodels packages, such as `tunable()` and `tidy()`. - -As an example, we will create a step for converting data into percentiles. - -## A new step definition - -Let's create a step that replaces the value of a variable with its percentile from the training set. The example data we'll use is from the modeldata package: - - -```r -library(modeldata) -data(biomass) -str(biomass) -#> 'data.frame': 536 obs. of 8 variables: -#> $ sample : chr "Akhrot Shell" "Alabama Oak Wood Waste" "Alder" "Alfalfa" ... -#> $ dataset : chr "Training" "Training" "Training" "Training" ... -#> $ carbon : num 49.8 49.5 47.8 45.1 46.8 ... -#> $ hydrogen: num 5.64 5.7 5.8 4.97 5.4 5.75 5.99 5.7 5.5 5.9 ... -#> $ oxygen : num 42.9 41.3 46.2 35.6 40.7 ... -#> $ nitrogen: num 0.41 0.2 0.11 3.3 1 2.04 2.68 1.7 0.8 1.2 ... -#> $ sulfur : num 0 0 0.02 0.16 0.02 0.1 0.2 0.2 0 0.1 ... -#> $ HHV : num 20 19.2 18.3 18.2 18.4 ... - -biomass_tr <- biomass[biomass$dataset == "Training",] -biomass_te <- biomass[biomass$dataset == "Testing",] -``` - -To illustrate the transformation with the `carbon` variable, note the training set distribution of this variable with a vertical line below for the first value of the test set. - - -```r -library(ggplot2) -theme_set(theme_bw()) -ggplot(biomass_tr, aes(x = carbon)) + - geom_histogram(binwidth = 5, col = "blue", fill = "blue", alpha = .5) + - geom_vline(xintercept = biomass_te$carbon[1], lty = 2) -``` - - - -Based on the training set, 42.1% of the data are less than a value of 46.35. There are some applications where it might be advantageous to represent the predictor values as percentiles rather than their original values. - -Our new step will do this computation for any numeric variables of interest. We will call this new recipe step `step_percentile()`. The code below is designed for illustration and not speed or best practices. We've left out a lot of error trapping that we would want in a real implementation. - -## Create the function - -To start, there is a _user-facing_ function. Let's call that `step_percentile()`. This is just a simple wrapper around a _constructor function_, which defines the rules for any step object that defines a percentile transformation. We'll call this constructor `step_percentile_new()`. - -The function `step_percentile()` takes the same arguments as your function and simply adds it to a new recipe. The `...` signifies the variable selectors that can be used. - - -```r -step_percentile <- function( - recipe, - ..., - role = NA, - trained = FALSE, - ref_dist = NULL, - options = list(probs = (0:100)/100, names = TRUE), - skip = FALSE, - id = rand_id("percentile") - ) { - - add_step( - recipe, - step_percentile_new( - terms = enquos(...), - trained = trained, - role = role, - ref_dist = ref_dist, - options = options, - skip = skip, - id = id - ) - ) -} -``` - -You should always keep the first four arguments (`recipe` though `trained`) the same as listed above. Some notes: - - * the `role` argument is used when you either 1) create new variables and want their role to be pre-set or 2) replace the existing variables with new values. The latter is what we will be doing and using `role = NA` will leave the existing role intact. - * `trained` is set by the package when the estimation step has been run. You should default your function definition's argument to `FALSE`. - * `skip` is a logical. Whenever a recipe is prepped, each step is trained and then baked. However, there are some steps that should not be applied when a call to `bake()` is used. For example, if a step is applied to the variables with roles of "outcomes", these data would not be available for new samples. - * `id` is a character string that can be used to identify steps in package code. `rand_id()` will create an ID that has the prefix and a random character sequence. - -We can estimate the percentiles of new data points based on the percentiles from the training set with `approx()`. Our `step_percentile` contains a `ref_dist` object to store these percentiles (pre-computed from the training set in `prep()`) for later use in `bake()`. - -We will use `stats::quantile()` to compute the grid. However, we might also want to have control over the granularity of this grid, so the `options` argument will be used to define how that calculation is done. We could use the ellipses (aka `...`) so that any options passed to `step_percentile()` that are not one of its arguments will then be passed to `stats::quantile()`. However, we recommend making a separate list object with the options and use these inside the function because `...` is already used to define the variable selection. - -It is also important to consider if there are any _main arguments_ to the step. For example, for spline-related steps such as `step_ns()`, users typically want to adjust the argument for the degrees of freedom in the spline (e.g. `splines::ns(x, df)`). Rather than letting users add `df` to the `options` argument: - -* Allow the important arguments to be main arguments to the step function. - -* Follow the tidymodels [conventions for naming arguments](https://tidymodels.github.io/model-implementation-principles/standardized-argument-names.html). Whenever possible, avoid jargon and keep common argument names. - -There are benefits to following these principles (as shown below). - -## Initialize a new object - -Now, the constructor function can be created. - -The function cascade is: - -``` -step_percentile() calls recipes::add_step() -└──> recipes::add_step() calls step_percentile_new() - └──> step_percentile_new() calls recipes::step() -``` - -`step()` is a general constructor for recipes that mainly makes sure that the resulting step object is a list with an appropriate S3 class structure. Using `subclass = "percentile"` will set the class of new objects to `"step_percentile"`. - - -```r -step_percentile_new <- - function(terms, role, trained, ref_dist, options, skip, id) { - step( - subclass = "percentile", - terms = terms, - role = role, - trained = trained, - ref_dist = ref_dist, - options = options, - skip = skip, - id = id - ) - } -``` - -This constructor function should have no default argument values. Defaults should be set in the user-facing step object. - -## Create the `prep` method - -You will need to create a new `prep()` method for your step's class. To do this, three arguments that the method should have are: - -```r -function(x, training, info = NULL) -``` - -where - - * `x` will be the `step_percentile` object, - * `training` will be a _tibble_ that has the training set data, and - * `info` will also be a tibble that has information on the current set of data available. This information is updated as each step is evaluated by its specific `prep()` method so it may not have the variables from the original data. The columns in this tibble are `variable` (the variable name), `type` (currently either "numeric" or "nominal"), `role` (defining the variable's role), and `source` (either "original" or "derived" depending on where it originated). - -You can define other arguments as well. - -The first thing that you might want to do in the `prep()` function is to translate the specification listed in the `terms` argument to column names in the current data. There is a function called `recipes_eval_select()` that can be used to obtain this. - -{{% warning %}} The `recipes_eval_select()` function is not one you interact with as a typical recipes user, but it is helpful if you develop your own custom recipe steps. {{%/ warning %}} - - -```r -prep.step_percentile <- function(x, training, info = NULL, ...) { - col_names <- recipes_eval_select(x$terms, training, info) - # TODO finish the rest of the function -} -``` - -After this function call, it is a good idea to check that the selected columns have the appropriate type (e.g. numeric for this example). See `recipes::check_type()` to do this for basic types. - -Once we have this, we can save the approximation grid. For the grid, we will use a helper function that enables us to run `rlang::exec()` to splice in any extra arguments contained in the `options` list to the call to `quantile()`: - - -```r -get_train_pctl <- function(x, args = NULL) { - res <- rlang::exec("quantile", x = x, !!!args) - # Remove duplicate percentile values - res[!duplicated(res)] -} - -# For example: -get_train_pctl(biomass_tr$carbon, list(probs = 0:1)) -#> 0% 100% -#> 14.6 97.2 -get_train_pctl(biomass_tr$carbon) -#> 0% 25% 50% 75% 100% -#> 14.6 44.7 47.1 49.7 97.2 -``` - -Now, the `prep()` method can be created: - - -```r -prep.step_percentile <- function(x, training, info = NULL, ...) { - col_names <- recipes_eval_select(x$terms, training, info) - ## You can add error trapping for non-numeric data here and so on. - - ## We'll use the names later so make sure they are available - if (x$options$names == FALSE) { - rlang::abort("`names` should be set to TRUE") - } - - if (!any(names(x$options) == "probs")) { - x$options$probs <- (0:100)/100 - } else { - x$options$probs <- sort(unique(x$options$probs)) - } - - # Compute percentile grid - ref_dist <- purrr::map(training[, col_names], get_train_pctl, args = x$options) - - ## Use the constructor function to return the updated object. - ## Note that `trained` is now set to TRUE - - step_percentile_new( - terms = x$terms, - trained = TRUE, - role = x$role, - ref_dist = ref_dist, - options = x$options, - skip = x$skip, - id = x$id - ) -} -``` - -We suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former can be used for better traceback results. - - -## Create the `bake` method - -Remember that the `prep()` function does not _apply_ the step to the data; it only estimates any required values such as `ref_dist`. We will need to create a new method for our `step_percentile()` class. The minimum arguments for this are - -```r -function(object, new_data, ...) -``` - -where `object` is the updated step function that has been through the corresponding `prep()` code and `new_data` is a tibble of data to be processed. The output of this function needs to be a tibble of the modified version of `new_data`. - -Here is the code to convert the new data to percentiles. The input data (`x` below) comes in as a numeric vector and the output is a vector of approximate percentiles: - - -```r -pctl_by_approx <- function(x, ref) { - # In case duplicates were removed, get the percentiles from - # the names of the reference object - grid <- as.numeric(gsub("%$", "", names(ref))) - approx(x = ref, y = grid, xout = x)$y/100 -} -``` - -These computations are done column-wise using `purrr::map2_dfc()` to modify the new data in-place: - - -```r -bake.step_percentile <- function(object, new_data, ...) { - ## For illustration (and not speed), we will loop through the affected variables - ## and do the computations - vars <- names(object$ref_dist) - - new_data[, vars] <- - purrr::map2_dfc(new_data[, vars], object$ref_dist, pctl_by_approx) - - new_data -} -``` - -`tibble::as_tibble()` can be used at the end of the function to turn `new_data` back into a tibble if needed. We didn't use `tibble::as_tibble()` above since `new_data` stayed as a tibble throughout the whole function. - -{{% note %}} You need to import `recipes::prep()` and `recipes::bake()` to create your own step function in a package. {{%/ note %}} - -## Run the example - -Let's use the example data to make sure that it works: - - -```r -rec_obj <- - recipe(HHV ~ ., data = biomass_tr) %>% - step_percentile(ends_with("gen")) %>% - prep(training = biomass_tr) - -biomass_te %>% select(ends_with("gen")) %>% slice(1:2) -#> hydrogen oxygen nitrogen -#> 1 5.67 47.2 0.30 -#> 2 5.50 48.1 2.85 -bake(rec_obj, biomass_te %>% slice(1:2), ends_with("gen")) -#> # A tibble: 2 × 3 -#> hydrogen oxygen nitrogen -#> -#> 1 0.45 0.903 0.21 -#> 2 0.38 0.922 0.928 - -# Checking to get approximate result: -mean(biomass_tr$hydrogen <= biomass_te$hydrogen[1]) -#> [1] 0.452 -mean(biomass_tr$oxygen <= biomass_te$oxygen[1]) -#> [1] 0.901 -``` - -The plot below shows how the original hydrogen percentiles line up with the estimated values: - - -```r -hydrogen_values <- - bake(rec_obj, biomass_te, hydrogen) %>% - bind_cols(biomass_te %>% select(original = hydrogen)) - -ggplot(biomass_tr, aes(x = hydrogen)) + - # Plot the empirical distribution function of the - # hydrogen training set values as a black line - stat_ecdf() + - # Overlay the estimated percentiles for the new data: - geom_point(data = hydrogen_values, - aes(x = original, y = hydrogen), - col = "red", alpha = .5, cex = 2) + - labs(x = "New Hydrogen Values", y = "Percentile Based on Training Set") -``` - - - -These line up very nicely! - -## Custom check operations - -The process here is exactly the same as steps; the internal functions have a similar naming convention: - - * `add_check()` instead of `add_step()` - * `check()` instead of `step()`, and so on. - -It is strongly recommended that: - - 1. The operations start with `check_` (i.e. `check_range()` and `check_range_new()`) - 1. The check uses `rlang::abort(paste0(...))` when the conditions are not met - 1. The original data are returned (unaltered) by the check when the conditions are satisfied. - -## Other step methods - -There are a few other S3 methods that can be created for your step function. They are not required unless you plan on using your step in the broader tidymodels package set. - -### A print method - -If you don't add a print method for `step_percentile`, it will still print but it will be printed as a list of (potentially large) objects and look a bit ugly. The recipes package contains a helper function called `printer()` that should be useful in most cases. We are using it here for the custom print method for `step_percentile`. It requires the original terms specification and the column names this specification is evaluated to by `prep()`. For the former, our step object is structured so that the list object `ref_dist` has the names of the selected variables: - - -```r -print.step_percentile <- - function(x, width = max(20, options()$width - 35), ...) { - cat("Percentile transformation on ", sep = "") - printer( - # Names before prep (could be selectors) - untr_obj = x$terms, - # Names after prep: - tr_obj = names(x$ref_dist), - # Has it been prepped? - trained = x$trained, - # An estimate of how many characters to print on a line: - width = width - ) - invisible(x) - } - -# Results before `prep()`: -recipe(HHV ~ ., data = biomass_tr) %>% - step_percentile(ends_with("gen")) -#> Recipe -#> -#> Inputs: -#> -#> role #variables -#> outcome 1 -#> predictor 7 -#> -#> Operations: -#> -#> Percentile transformation on ends_with("gen") - -# Results after `prep()`: -rec_obj -#> Recipe -#> -#> Inputs: -#> -#> role #variables -#> outcome 1 -#> predictor 7 -#> -#> Training data contained 456 data points and no missing data. -#> -#> Operations: -#> -#> Percentile transformation on hydrogen, oxygen, nitrogen [trained] -``` - -### Methods for declaring required packages - -Some recipe steps use functions from other packages. When this is the case, the `step_*()` function should check to see if the package is installed. The function `recipes::recipes_pkg_check()` will do this. For example: - -``` -> recipes::recipes_pkg_check("some_package") -1 package is needed for this step and is not installed. (some_package). Start -a clean R session then run: install.packages("some_package") -``` - -There is an S3 method that can be used to declare what packages should be loaded when using the step. For a hypothetical step that relies on the `hypothetical` package, this might look like: - - -```r -required_pkgs.step_hypothetical <- function(x, ...) { - c("hypothetical", "myrecipespkg") -} -``` - -In this example, `myrecipespkg` is the package where the step resides (if it is in a package). - -{{% note %}} If you are writing steps for an extension package, then every step should have a `required_pkgs()` method. {{%/ note %}} - -The reason to declare what packages should be loaded is parallel processing. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a recipe step is not loaded in the worker processes, the `prep()` methods cannot be found and an error occurs. - -If this S3 method is used for your step, you can rely on this for checking the installation: - - -```r -recipes::recipes_pkg_check(required_pkgs.step_hypothetical()) -``` - -If you'd like an example of this in a package, please take a look at the [embed](https://github.com/tidymodels/embed/) or [themis](https://github.com/tidymodels/themis/) package. - -### A tidy method - -The `broom::tidy()` method is a means to return information about the step in a usable format. For our step, it would be helpful to know the reference values. - -When the recipe has been prepped, those data are in the list `ref_dist`. A small function can be used to reformat that data into a tibble. It is customary to return the main values as `value`: - - -```r -format_pctl <- function(x) { - tibble::tibble( - value = unname(x), - percentile = as.numeric(gsub("%$", "", names(x))) - ) -} - -# For example: -pctl_step_object <- rec_obj$steps[[1]] -pctl_step_object -#> Percentile transformation on hydrogen, oxygen, nitrogen [trained] -format_pctl(pctl_step_object$ref_dist[["hydrogen"]]) -#> # A tibble: 87 × 2 -#> value percentile -#> -#> 1 0.03 0 -#> 2 0.934 1 -#> 3 1.60 2 -#> 4 2.07 3 -#> 5 2.45 4 -#> 6 2.74 5 -#> 7 3.15 6 -#> 8 3.49 7 -#> 9 3.71 8 -#> 10 3.99 9 -#> # … with 77 more rows -``` - -The tidy method could return these values for each selected column. Before `prep()`, missing values can be used as placeholders. - - -```r -tidy.step_percentile <- function(x, ...) { - if (is_trained(x)) { - res <- map_dfr(x$ref_dist, format_pctl, .id = "term") - } - else { - term_names <- sel2char(x$terms) - res <- - tibble( - terms = term_names, - value = rlang::na_dbl, - percentile = rlang::na_dbl - ) - } - # Always return the step id: - res$id <- x$id - res -} - -tidy(rec_obj, number = 1) -#> # A tibble: 274 × 4 -#> term value percentile id -#> -#> 1 hydrogen 0.03 0 percentile_Bp5vK -#> 2 hydrogen 0.934 1 percentile_Bp5vK -#> 3 hydrogen 1.60 2 percentile_Bp5vK -#> 4 hydrogen 2.07 3 percentile_Bp5vK -#> 5 hydrogen 2.45 4 percentile_Bp5vK -#> 6 hydrogen 2.74 5 percentile_Bp5vK -#> 7 hydrogen 3.15 6 percentile_Bp5vK -#> 8 hydrogen 3.49 7 percentile_Bp5vK -#> 9 hydrogen 3.71 8 percentile_Bp5vK -#> 10 hydrogen 3.99 9 percentile_Bp5vK -#> # … with 264 more rows -``` - -### Methods for tuning parameters - -The tune package can be used to find reasonable values of step arguments by model tuning. There are some S3 methods that are useful to define for your step. The percentile example doesn't really have any tunable parameters, so we will demonstrate using `step_poly()`, which returns a polynomial expansion of selected columns. Its function definition has the arguments: - - -```r -args(step_poly) -#> function (recipe, ..., role = "predictor", trained = FALSE, objects = NULL, -#> degree = 2, options = list(), skip = FALSE, id = rand_id("poly")) -#> NULL -``` - -The argument `degree` is tunable. - -To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. - -`tunable()` takes the step object as its argument and returns a tibble with columns: - -* `name`: The name of the argument. - -* `call_info`: A list that describes how to call a function that returns a dials parameter object. - -* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `"recipe"`. - -* `component`: A character string with more information about the source. For recipes, this is just the name of the step (e.g. `"step_poly"`). - -* `component_id`: A character string to indicate where a unique identifier is for the object. For recipes, this is just the `id` value of the step object. - -The main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. - -For example, for a nearest-neighbors `neighbors` parameter, this value is just: - - -```r -info <- list(pkg = "dials", fun = "neighbors") - -# FYI: how it is used under-the-hood: -new_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg) -rlang::eval_tidy(new_param_call) -#> # Nearest Neighbors (quantitative) -#> Range: [1, 10] -``` - -For `step_poly()`, a dials object is needed that returns an integer that is the number of new columns to create. It turns out that there are a few different types of tuning parameters related to degree: - -```r -> lsf.str("package:dials", pattern = "degree") -degree : function (range = c(1, 3), trans = NULL) -degree_int : function (range = c(1L, 3L), trans = NULL) -prod_degree : function (range = c(1L, 2L), trans = NULL) -spline_degree : function (range = c(3L, 10L), trans = NULL) -``` - -Looking at the `range` values, some return doubles and others return integers. For our problem, `degree_int()` would be a good choice. - -For `step_poly()` the `tunable()` S3 method could be: - - -```r -tunable.step_poly <- function (x, ...) { - tibble::tibble( - name = c("degree"), - call_info = list(list(pkg = "dials", fun = "degree_int")), - source = "recipe", - component = "step_poly", - component_id = x$id - ) -} -``` - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> modeldata * 1.0.1 2022-09-06 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - - diff --git a/content/learn/develop/recipes/index_files/figure-html/carbon_dist-1.png b/content/learn/develop/recipes/index_files/figure-html/carbon_dist-1.png deleted file mode 100644 index fdd32343..00000000 Binary files a/content/learn/develop/recipes/index_files/figure-html/carbon_dist-1.png and /dev/null differ diff --git a/content/learn/develop/recipes/index_files/figure-html/cdf_plot-1.png b/content/learn/develop/recipes/index_files/figure-html/cdf_plot-1.png deleted file mode 100644 index 4d2dc139..00000000 Binary files a/content/learn/develop/recipes/index_files/figure-html/cdf_plot-1.png and /dev/null differ diff --git a/content/learn/models/_index.md b/content/learn/models/_index.md deleted file mode 100644 index 8796ce16..00000000 --- a/content/learn/models/_index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: Create robust models -weight: 2 -type: learn-subsection ---- - diff --git a/content/learn/models/calibration/figs/beta-cal-1.svg b/content/learn/models/calibration/figs/beta-cal-1.svg deleted file mode 100644 index 9299bd7b..00000000 --- a/content/learn/models/calibration/figs/beta-cal-1.svg +++ /dev/null @@ -1,1646 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -Window Midpoint -Event Rate -Beta calibration - - diff --git a/content/learn/models/calibration/figs/break-logistic-1.svg b/content/learn/models/calibration/figs/break-logistic-1.svg deleted file mode 100644 index ac870a36..00000000 --- a/content/learn/models/calibration/figs/break-logistic-1.svg +++ /dev/null @@ -1,91 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -Probability -Predicted Event Rate - - diff --git a/content/learn/models/calibration/figs/break-plot-1.svg b/content/learn/models/calibration/figs/break-plot-1.svg deleted file mode 100644 index 67bb4de8..00000000 --- a/content/learn/models/calibration/figs/break-plot-1.svg +++ /dev/null @@ -1,101 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -Bin Midpoint -Event Rate - - diff --git a/content/learn/models/calibration/figs/break-windowed-1.svg b/content/learn/models/calibration/figs/break-windowed-1.svg deleted file mode 100644 index 69654225..00000000 --- a/content/learn/models/calibration/figs/break-windowed-1.svg +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -Window Midpoint -Event Rate - - diff --git a/content/learn/models/calibration/figs/calibrated-res-1.svg b/content/learn/models/calibration/figs/calibrated-res-1.svg deleted file mode 100644 index 857496d1..00000000 --- a/content/learn/models/calibration/figs/calibrated-res-1.svg +++ /dev/null @@ -1,636 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -Window Midpoint -Event Rate - - diff --git a/content/learn/models/calibration/figs/isoreg-cal-1.svg b/content/learn/models/calibration/figs/isoreg-cal-1.svg deleted file mode 100644 index 9f553dd3..00000000 --- a/content/learn/models/calibration/figs/isoreg-cal-1.svg +++ /dev/null @@ -1,1647 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -Window Midpoint -Event Rate -Isotonic regression calibration - - diff --git a/content/learn/models/calibration/figs/logistic-cal-1.svg b/content/learn/models/calibration/figs/logistic-cal-1.svg deleted file mode 100644 index b9cdc903..00000000 --- a/content/learn/models/calibration/figs/logistic-cal-1.svg +++ /dev/null @@ -1,1635 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -Window Midpoint -Event Rate -Logistic calibration via GAM - - diff --git a/content/learn/models/calibration/figs/prob-hist-1.svg b/content/learn/models/calibration/figs/prob-hist-1.svg deleted file mode 100644 index 98d863ab..00000000 --- a/content/learn/models/calibration/figs/prob-hist-1.svg +++ /dev/null @@ -1,1726 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -WS - - - - - - - - - - -PS - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -0 -200 -400 -600 - - - - -0 -200 -400 -600 - - - - -Probability Estimate of PS -count - - diff --git a/content/learn/models/calibration/index.Rmarkdown b/content/learn/models/calibration/index.Rmarkdown deleted file mode 100644 index f91aa70a..00000000 --- a/content/learn/models/calibration/index.Rmarkdown +++ /dev/null @@ -1,341 +0,0 @@ ---- -title: "An introduction to calibration with tidymodels" -tags: [probably,yardstick] -categories: [classification,calibration] -type: learn-subsection -weight: 5 -description: | - Learn how the probably package can improve classification and regression models. ---- - -```{r} -#| label: startup -#| include: false - -library(tidymodels) -library(probably) -library(discrim) -library(doMC) - -registerDoMC(cores = parallel::detectCores()) - -source(here::here("content/learn/common.R")) - -pkgs <- c("tidymodels", "probably", "discrim", "klaR") -``` - - -`r req_pkgs(pkgs)` The probably package should be version 1.0.0 or greater. - -There are essentially three different parts to a predictive model: - - - the pre-processing stage (e.g., feature engineering, normalization, etc.) - - model fitting (actually training the model) - - post-processing (such as optimizing a probability threshold) - -This article demonstrates a post-processing tool called model calibration. After the model fit, we might be able to improve a model by altering the predicted values. - -A classification model is well-calibrated if its probability estimate is consistent with the rate that the event occurs "in the wild." If you are not familiar with calibration, there are references at the end of this article. - -To get started, load some packages: - -```{r} -#| label: startup2 - -library(tidymodels) -library(probably) -library(discrim) - -tidymodels_prefer() -theme_set(theme_bw()) -options(pillar.advice = FALSE, pillar.min_title_chars = Inf) -``` - - -## An example: predicting cell segmentation quality - -The modeldata package contains a data set called `cells`. Initially distributed by [Hill and Haney (2007)](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-8-340), they showed how to create models that predict the _quality_ of the image analysis of cells. The outcome has two levels `"PS"` (for poorly segmented images) or `"WS"` (well-segmented). There are 56 image features that can be used to build a classifier. - -Let's load the data, remove an unwanted column, and look at the outcome frequencies: - -```{r} -#| label: cell-data-start - -data(cells) -cells$case <- NULL - -dim(cells) -cells %>% count(class) -``` - -There is a class imbalance but that will not affect our work here. - -Let's make a 75% to 25% split of the data into training and testing using `initial_split()`. We'll also create a set of 10-fold cross-validation indices for model resampling. - -```{r} -#| label: cell-data-obj - -set.seed(8928) -split <- initial_split(cells, strata = class) -cells_tr <- training(split) -cells_te <- testing(split) - -cells_rs <- vfold_cv(cells_tr, strata = class) -``` - -Now that there are data to be modeled, let's get to it! - -## A naive Bayes model - -We'll show the utility of calibration tools by using a type of model that, in this instance, is likely to produce a poorly calibrated model. The naive Bayes classifier is a well-established model that assumes that the predictors are statistically _independent_ of one another (to simplify the calculations). While that is certainly not the case for these data, the model can be effective at discriminating between the classes. Unfortunately, when there are many predictors in the model, it has a tendency to produce class probability distributions that are pathological. The predictions tend to gravitate to values near zero or one, producing distributions that are "U"-shaped ([Kuhn and Johnson, 2013](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22Applied+Predictive+Modeling%22&btnG=)). - -To demonstrate, let's set up the model: - -```{r} -#| label: bayes-setup - -bayes_wflow <- - workflow() %>% - add_formula(class ~ .) %>% - add_model(naive_Bayes()) -``` - -We'll resample the model first so that we can get a good assessment of the results. During the resampling process, two metrics are used to judge how well the model worked. First, the area under the ROC curve is used to measure the ability of the model to separate the classes (using probability predictions). Second, the Brier score can measure how close the probability estimates are to the actual outcome values (zero or one). The `collect_metrics()` function shows the resampling estimates: - -```{r} -#| label: bayes-resample -#| warning: false - -cls_met <- metric_set(roc_auc, brier_class) -# We'll save the out-of-sample predictions to visualize them. -ctrl <- control_resamples(save_pred = TRUE) - -bayes_res <- - bayes_wflow %>% - fit_resamples(cells_rs, metrics = cls_met, control = ctrl) - -collect_metrics(bayes_res) -``` - -The ROC score is impressive! However, the Brier value indicates that the probability values, while discriminating well, are not very realistic. A value of 0.25 is the "bad model" threshold when there are two classes (a value of zero being the best possible result). - -### But is it calibrated? - -Spoilers: no. It is not. - -The first clue is the extremely U-shaped distribution of the probability scores (facetted by the true class value): - -```{r} -#| label: prob-hist -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -collect_predictions(bayes_res) %>% - ggplot(aes(.pred_PS)) + - geom_histogram(col = "white", bins = 40) + - facet_wrap(~ class, ncol = 1) + - geom_rug(col = "blue", alpha = 1 / 2) + - labs(x = "Probability Estimate of PS") -``` - -There are almost no cells with moderate probability estimates. Furthermore, when the model is incorrect, it is "confidently incorrect". - -The probably package has tools for visualizing and correcting models with poor calibration properties. - -The most common plot is to break the predictions into about ten equally sized buckets and compute the actual event rate within each. For example, if a bin captures the samples predicted to be poorly segmented with probabilities between 20% and 30%, we should expect about a 25% event rate (i.e., the bin midpoint) within that partition. Here's a plot with ten bins: - -```{r} -#| label: break-plot -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -cal_plot_breaks(bayes_res) -``` - -The probabilities are not showing very good accuracy. - -There is also a similar function that can use moving windows with overlapping partitions. This provides a little more detail: - -```{r} -#| label: break-windowed -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -cal_plot_windowed(bayes_res, step_size = 0.025) -``` - -Bad. Still bad. - -Finally, for two class outcomes, we can fit a logistic generalized additive model (GAM) and examine the trend. - -```{r} -#| label: break-logistic -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -cal_plot_logistic(bayes_res) -``` - -Ooof. - -## Remediation - -The good news is that we can do something about this. There are tools to "fix" the probability estimates so that they have better properties, such as falling along the diagonal lines in the diagnostic plots shown above. Different methods improve the predictions in different ways. - -The most common approach is the fit a logistic regression model to the data (with the probability estimates as the predictor). The probability predictions from this model are then used as the calibrated estimate. By default, a generalized additive model is used for this fit, but the `smooth = FALSE` argument can use simple linear effects. - -If effect, the GAM model estimates the probability regions where the model is off (as shown in the diagnostic plot). For example, suppose that when the model predicts a 2% event rate, the GAM model estimates that it under-predicts the probability by 5% (relative to the observed data). Given this gap, new predictions are adjusted up so that the probability estimates are more in-line with the data. - -How do we know if this works? There are a set of `cal_validate_*()` functions that can use holdout data to resample the model with and without the calibration tool of choice. Since we already resampled the model, we'll use those results to estimate 10 more logistic regressions and use the out-of-sample data to estimate performance. - -`collect_metrics()` can again be used to see the performance statistics. We'll also use `cal_plot_windowed()` on the calibrated holdout data to get a visual assessment: - -```{r} -#| label: logistic-cal -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -logit_val <- cal_validate_logistic(bayes_res, metrics = cls_met, save_pred = TRUE) -collect_metrics(logit_val) - -collect_predictions(logit_val) %>% - filter(.type == "calibrated") %>% - cal_plot_windowed(truth = class, estimate = .pred_PS, step_size = 0.025) + - ggtitle("Logistic calibration via GAM") -``` - -That's a lot better but it is problematic that the calibrated predictions do not reach zero or one. - -A different approach is to use isotonic regression. This method can result in very few unique probability estimates. The probably package has a version of isotonic regression that resamples the process to produce more unique probabilities: - -```{r} -#| label: isoreg-cal -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -set.seed(1212) -iso_val <- cal_validate_isotonic_boot(bayes_res, metrics = cls_met, - save_pred = TRUE, times = 25) -collect_metrics(iso_val) - -collect_predictions(iso_val) %>% - filter(.type == "calibrated") %>% - cal_plot_windowed(truth = class, estimate = .pred_PS, step_size = 0.025) + - ggtitle("Isotonic regression calibration") -``` - -Much better. However, there is a slight bias since the estimated points are consistently above the identity line on the 45-degree angle. - -Finally, we can also test out [Beta calibration](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22Beyond+sigmoids%22+calibration&btnG=): - -```{r} -#| label: beta-cal -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -beta_val <- cal_validate_beta(bayes_res, metrics = cls_met, save_pred = TRUE) -collect_metrics(beta_val) - -collect_predictions(beta_val) %>% - filter(.type == "calibrated") %>% - cal_plot_windowed(truth = class, estimate = .pred_PS, step_size = 0.025) + - ggtitle("Beta calibration") -``` - -Also a big improvement but it does poorly at the lower end of the scale. - -Beta calibration appears to have the best results. We'll save a model that is trained using all of the out-of-sample predictions from the original naive Bayes resampling results. - -We can also fit the final naive Bayes model to predict the test set: - -```{r} -#| label: finalize -#| warning: false - -cell_cal <- cal_estimate_beta(bayes_res) -bayes_fit <- bayes_wflow %>% fit(data = cells_tr) -``` - -The `cell_cal` object can be used to enact the calibration for new predictions (as we'll see in a minute). - -## Test set results - -First, we make our ordinary predictions: - -```{r} -#| label: uncalibrated -#| warning: false - -cell_test_pred <- augment(bayes_fit, new_data = cells_te) -cell_test_pred %>% cls_met(class, .pred_PS) -``` - -These metric estimates are very consistent with the resampled performance estimates. - -We can then use our `cell_cal` object with the `cal_apply()` function: - -```{r} -#| label: calibrated -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -cell_test_cal_pred <- - cell_test_pred %>% - cal_apply(cell_cal) -cell_test_cal_pred %>% dplyr::select(class, starts_with(".pred_")) -``` - -Note that `cal_apply()` recomputed the hard class predictions in the `.pred_class` column. It is possible that the changes in the probability estimates could invalidate the original hard class estimates. - -What do the calibrated test set results show? - -```{r} -#| label: calibrated-res -#| out.width: 60% -#| fig.width: 5 -#| fig.height: 5 -#| fig.align: center - -cell_test_cal_pred %>% cls_met(class, .pred_PS) -cell_test_cal_pred %>% - cal_plot_windowed(truth = class, estimate = .pred_PS, step_size = 0.025) -``` - -Much better. The test set results also agree with the results from `cal_validate_beta().` - -## Other model types - -probably can also calibrate classification models with more than two outcome levels. The functions `cal_*_multinomial()` use a multinomial model in the same spirit as the logistic regression model. Isotonic and Beta calibration can also be used via a "one versus all" approach that builds a set of binary calibrators and normalizes their results at the end (to ensure that they add to one). - -For regression models, there is `cal_plot_regression()` and `cal_*_linear()`. The latter uses `lm()` or `mgcv::gam()` to create a calibrator object. - -## Some background references - - - - Kull, Meelis, Telmo M. Silva Filho, and Peter Flach. "[Beyond sigmoids: How to obtain well-calibrated probabilities from binary classifiers with beta calibration.](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22Beyond+sigmoids%22+calibration&btnG=)" (2017): 5052-5080 - -- Niculescu-Mizil, Alexandru, and Rich Caruana. "[Predicting good probabilities with supervised learning](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%E2%80%9CPredicting+Good+Probabilities+with+Supervised+Learning%E2%80%9D&btnG=)." In _Proceedings of the 22nd international conference on Machine learning_, pp. 625-632. 2005. - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/models/calibration/index.markdown b/content/learn/models/calibration/index.markdown deleted file mode 100644 index 8e99c179..00000000 --- a/content/learn/models/calibration/index.markdown +++ /dev/null @@ -1,381 +0,0 @@ ---- -title: "An introduction to calibration with tidymodels" -tags: [probably,yardstick] -categories: [classification,calibration] -type: learn-subsection -weight: 5 -description: | - Learn how the probably package can improve classification and regression models. ---- - - - - -To use the code in this article, you will need to install the following packages: discrim, klaR, probably, and tidymodels. The probably package should be version 1.0.0 or greater. - -There are essentially three different parts to a predictive model: - - - the pre-processing stage (e.g., feature engineering, normalization, etc.) - - model fitting (actually training the model) - - post-processing (such as optimizing a probability threshold) - -This article demonstrates a post-processing tool called model calibration. After the model fit, we might be able to improve a model by altering the predicted values. - -A classification model is well-calibrated if its probability estimate is consistent with the rate that the event occurs "in the wild." If you are not familiar with calibration, there are references at the end of this article. - -To get started, load some packages: - - -```r -library(tidymodels) -library(probably) -library(discrim) - -tidymodels_prefer() -theme_set(theme_bw()) -options(pillar.advice = FALSE, pillar.min_title_chars = Inf) -``` - - -## An example: predicting cell segmentation quality - -The modeldata package contains a data set called `cells`. Initially distributed by [Hill and Haney (2007)](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-8-340), they showed how to create models that predict the _quality_ of the image analysis of cells. The outcome has two levels `"PS"` (for poorly segmented images) or `"WS"` (well-segmented). There are 56 image features that can be used to build a classifier. - -Let's load the data, remove an unwanted column, and look at the outcome frequencies: - - -```r -data(cells) -cells$case <- NULL - -dim(cells) -#> [1] 2019 57 -cells %>% count(class) -#> # A tibble: 2 × 2 -#> class n -#> -#> 1 PS 1300 -#> 2 WS 719 -``` - -There is a class imbalance but that will not affect our work here. - -Let's make a 75% to 25% split of the data into training and testing using `initial_split()`. We'll also create a set of 10-fold cross-validation indices for model resampling. - - -```r -set.seed(8928) -split <- initial_split(cells, strata = class) -cells_tr <- training(split) -cells_te <- testing(split) - -cells_rs <- vfold_cv(cells_tr, strata = class) -``` - -Now that there are data to be modeled, let's get to it! - -## A naive Bayes model - -We'll show the utility of calibration tools by using a type of model that, in this instance, is likely to produce a poorly calibrated model. The naive Bayes classifier is a well-established model that assumes that the predictors are statistically _independent_ of one another (to simplify the calculations). While that is certainly not the case for these data, the model can be effective at discriminating between the classes. Unfortunately, when there are many predictors in the model, it has a tendency to produce class probability distributions that are pathological. The predictions tend to gravitate to values near zero or one, producing distributions that are "U"-shaped ([Kuhn and Johnson, 2013](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22Applied+Predictive+Modeling%22&btnG=)). - -To demonstrate, let's set up the model: - - -```r -bayes_wflow <- - workflow() %>% - add_formula(class ~ .) %>% - add_model(naive_Bayes()) -``` - -We'll resample the model first so that we can get a good assessment of the results. During the resampling process, two metrics are used to judge how well the model worked. First, the area under the ROC curve is used to measure the ability of the model to separate the classes (using probability predictions). Second, the Brier score can measure how close the probability estimates are to the actual outcome values (zero or one). The `collect_metrics()` function shows the resampling estimates: - - -```r -cls_met <- metric_set(roc_auc, brier_class) -# We'll save the out-of-sample predictions to visualize them. -ctrl <- control_resamples(save_pred = TRUE) - -bayes_res <- - bayes_wflow %>% - fit_resamples(cells_rs, metrics = cls_met, control = ctrl) - -collect_metrics(bayes_res) -#> # A tibble: 2 × 6 -#> .metric .estimator mean n std_err .config -#> -#> 1 brier_class binary 0.202 10 0.0100 Preprocessor1_Model1 -#> 2 roc_auc binary 0.856 10 0.00936 Preprocessor1_Model1 -``` - -The ROC score is impressive! However, the Brier value indicates that the probability values, while discriminating well, are not very realistic. A value of 0.25 is the "bad model" threshold when there are two classes (a value of zero being the best possible result). - -### But is it calibrated? - -Spoilers: no. It is not. - -The first clue is the extremely U-shaped distribution of the probability scores (facetted by the true class value): - - -```r -collect_predictions(bayes_res) %>% - ggplot(aes(.pred_PS)) + - geom_histogram(col = "white", bins = 40) + - facet_wrap(~ class, ncol = 1) + - geom_rug(col = "blue", alpha = 1 / 2) + - labs(x = "Probability Estimate of PS") -``` - - - -There are almost no cells with moderate probability estimates. Furthermore, when the model is incorrect, it is "confidently incorrect". - -The probably package has tools for visualizing and correcting models with poor calibration properties. - -The most common plot is to break the predictions into about ten equally sized buckets and compute the actual event rate within each. For example, if a bin captures the samples predicted to be poorly segmented with probabilities between 20% and 30%, we should expect about a 25% event rate (i.e., the bin midpoint) within that partition. Here's a plot with ten bins: - - -```r -cal_plot_breaks(bayes_res) -``` - - - -The probabilities are not showing very good accuracy. - -There is also a similar function that can use moving windows with overlapping partitions. This provides a little more detail: - - -```r -cal_plot_windowed(bayes_res, step_size = 0.025) -``` - - - -Bad. Still bad. - -Finally, for two class outcomes, we can fit a logistic generalized additive model (GAM) and examine the trend. - - -```r -cal_plot_logistic(bayes_res) -``` - - - -Ooof. - -## Remediation - -The good news is that we can do something about this. There are tools to "fix" the probability estimates so that they have better properties, such as falling along the diagonal lines in the diagnostic plots shown above. Different methods improve the predictions in different ways. - -The most common approach is the fit a logistic regression model to the data (with the probability estimates as the predictor). The probability predictions from this model are then used as the calibrated estimate. By default, a generalized additive model is used for this fit, but the `smooth = FALSE` argument can use simple linear effects. - -If effect, the GAM model estimates the probability regions where the model is off (as shown in the diagnostic plot). For example, suppose that when the model predicts a 2% event rate, the GAM model estimates that it under-predicts the probability by 5% (relative to the observed data). Given this gap, new predictions are adjusted up so that the probability estimates are more in-line with the data. - -How do we know if this works? There are a set of `cal_validate_*()` functions that can use holdout data to resample the model with and without the calibration tool of choice. Since we already resampled the model, we'll use those results to estimate 10 more logistic regressions and use the out-of-sample data to estimate performance. - -`collect_metrics()` can again be used to see the performance statistics. We'll also use `cal_plot_windowed()` on the calibrated holdout data to get a visual assessment: - - -```r -logit_val <- cal_validate_logistic(bayes_res, metrics = cls_met, save_pred = TRUE) -collect_metrics(logit_val) -#> # A tibble: 4 × 7 -#> .metric .type .estimator mean n std_err .config -#> -#> 1 brier_class uncalibrated binary 0.202 10 0.0100 config -#> 2 roc_auc uncalibrated binary 0.856 10 0.00936 config -#> 3 brier_class calibrated binary 0.154 10 0.00608 config -#> 4 roc_auc calibrated binary 0.855 10 0.00968 config - -collect_predictions(logit_val) %>% - filter(.type == "calibrated") %>% - cal_plot_windowed(truth = class, estimate = .pred_PS, step_size = 0.025) + - ggtitle("Logistic calibration via GAM") -``` - - - -That's a lot better but it is problematic that the calibrated predictions do not reach zero or one. - -A different approach is to use isotonic regression. This method can result in very few unique probability estimates. The probably package has a version of isotonic regression that resamples the process to produce more unique probabilities: - - -```r -set.seed(1212) -iso_val <- cal_validate_isotonic_boot(bayes_res, metrics = cls_met, - save_pred = TRUE, times = 25) -collect_metrics(iso_val) -#> # A tibble: 4 × 7 -#> .metric .type .estimator mean n std_err .config -#> -#> 1 brier_class uncalibrated binary 0.202 10 0.0100 config -#> 2 roc_auc uncalibrated binary 0.856 10 0.00936 config -#> 3 brier_class calibrated binary 0.150 10 0.00504 config -#> 4 roc_auc calibrated binary 0.856 10 0.00928 config - -collect_predictions(iso_val) %>% - filter(.type == "calibrated") %>% - cal_plot_windowed(truth = class, estimate = .pred_PS, step_size = 0.025) + - ggtitle("Isotonic regression calibration") -``` - - - -Much better. However, there is a slight bias since the estimated points are consistently above the identity line on the 45-degree angle. - -Finally, we can also test out [Beta calibration](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22Beyond+sigmoids%22+calibration&btnG=): - - -```r -beta_val <- cal_validate_beta(bayes_res, metrics = cls_met, save_pred = TRUE) -collect_metrics(beta_val) -#> # A tibble: 4 × 7 -#> .metric .type .estimator mean n std_err .config -#> -#> 1 brier_class uncalibrated binary 0.202 10 0.0100 config -#> 2 roc_auc uncalibrated binary 0.856 10 0.00936 config -#> 3 brier_class calibrated binary 0.145 10 0.00439 config -#> 4 roc_auc calibrated binary 0.856 10 0.00933 config - -collect_predictions(beta_val) %>% - filter(.type == "calibrated") %>% - cal_plot_windowed(truth = class, estimate = .pred_PS, step_size = 0.025) + - ggtitle("Beta calibration") -``` - - - -Also a big improvement but it does poorly at the lower end of the scale. - -Beta calibration appears to have the best results. We'll save a model that is trained using all of the out-of-sample predictions from the original naive Bayes resampling results. - -We can also fit the final naive Bayes model to predict the test set: - - -```r -cell_cal <- cal_estimate_beta(bayes_res) -bayes_fit <- bayes_wflow %>% fit(data = cells_tr) -``` - -The `cell_cal` object can be used to enact the calibration for new predictions (as we'll see in a minute). - -## Test set results - -First, we make our ordinary predictions: - - -```r -cell_test_pred <- augment(bayes_fit, new_data = cells_te) -cell_test_pred %>% cls_met(class, .pred_PS) -#> # A tibble: 2 × 3 -#> .metric .estimator .estimate -#> -#> 1 roc_auc binary 0.839 -#> 2 brier_class binary 0.226 -``` - -These metric estimates are very consistent with the resampled performance estimates. - -We can then use our `cell_cal` object with the `cal_apply()` function: - - -```r -cell_test_cal_pred <- - cell_test_pred %>% - cal_apply(cell_cal) -cell_test_cal_pred %>% dplyr::select(class, starts_with(".pred_")) -#> # A tibble: 505 × 4 -#> class .pred_class .pred_PS .pred_WS -#> -#> 1 PS PS 0.884 0.116 -#> 2 WS WS 0.212 0.788 -#> 3 WS WS 0.0742 0.926 -#> 4 PS PS 0.835 0.165 -#> 5 PS PS 0.948 0.0523 -#> 6 WS WS 0.206 0.794 -#> 7 PS PS 0.854 0.146 -#> 8 PS PS 0.726 0.274 -#> 9 WS WS 0.339 0.661 -#> 10 WS PS 0.604 0.396 -#> # ℹ 495 more rows -``` - -Note that `cal_apply()` recomputed the hard class predictions in the `.pred_class` column. It is possible that the changes in the probability estimates could invalidate the original hard class estimates. - -What do the calibrated test set results show? - - -```r -cell_test_cal_pred %>% cls_met(class, .pred_PS) -#> # A tibble: 2 × 3 -#> .metric .estimator .estimate -#> -#> 1 roc_auc binary 0.839 -#> 2 brier_class binary 0.154 -cell_test_cal_pred %>% - cal_plot_windowed(truth = class, estimate = .pred_PS, step_size = 0.025) -``` - - - -Much better. The test set results also agree with the results from `cal_validate_beta().` - -## Other model types - -probably can also calibrate classification models with more than two outcome levels. The functions `cal_*_multinomial()` use a multinomial model in the same spirit as the logistic regression model. Isotonic and Beta calibration can also be used via a "one versus all" approach that builds a set of binary calibrators and normalizes their results at the end (to ensure that they add to one). - -For regression models, there is `cal_plot_regression()` and `cal_*_linear()`. The latter uses `lm()` or `mgcv::gam()` to create a calibrator object. - -## Some background references - - - - Kull, Meelis, Telmo M. Silva Filho, and Peter Flach. "[Beyond sigmoids: How to obtain well-calibrated probabilities from binary classifiers with beta calibration.](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22Beyond+sigmoids%22+calibration&btnG=)" (2017): 5052-5080 - -- Niculescu-Mizil, Alexandru, and Rich Caruana. "[Predicting good probabilities with supervised learning](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%E2%80%9CPredicting+Good+Probabilities+with+Supervised+Learning%E2%80%9D&btnG=)." In _Proceedings of the 22nd international conference on Machine learning_, pp. 625-632. 2005. - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.0 (2022-04-22) -#> os macOS Monterey 12.6.1 -#> system aarch64, darwin20 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/New_York -#> date 2023-06-01 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.2.0) -#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.2.0) -#> discrim * 1.0.1 2023-03-08 [1] CRAN (R 4.2.0) -#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.0) -#> klaR * 1.7-2 2023-03-17 [1] CRAN (R 4.2.0) -#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.2.0) -#> probably * 1.0.0 2023-05-31 [1] local -#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.0) -#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.2.0) -#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.0) -#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.2.0) -#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.2.0) -#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.2.0) -#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.2.0) -#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/models/coefficients/index.Rmarkdown b/content/learn/models/coefficients/index.Rmarkdown deleted file mode 100644 index 6d7b841e..00000000 --- a/content/learn/models/coefficients/index.Rmarkdown +++ /dev/null @@ -1,355 +0,0 @@ ---- -title: "Working with model coefficients" -tags: [parsnip,tune,broom,workflows] -categories: [model fitting] -type: learn-subsection -weight: 5 -description: | - Create models that use coefficients, extract them from fitted models, and visualize them. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -pkgs <- c("tidymodels", "glmnet") -library(Matrix) -library(glmnet) -``` - -## Introduction - -There are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients. - -This article describes how to retrieve the estimated coefficients from models fit using tidymodels. `r req_pkgs(pkgs)` - -## Linear regression - -Let's start with a linear regression model: - -$$\hat{y} = \hat{\beta}_0 + \hat{\beta}_1x_1 + \ldots + \hat{\beta}_px_p$$ - -The $\beta$ values are the coefficients and the $x_j$ are model predictors, or features. - -Let's use the [Chicago train data](https://bookdown.org/max/FES/chicago-intro.html) where we predict the ridership at the Clark and Lake station (column name: `ridership`) with the previous ridership data 14 days prior at three of the stations. - -The data are in the modeldata package: - -```{r setup-tm, message = FALSE, warning=FALSE} -library(tidymodels) -tidymodels_prefer() -theme_set(theme_bw()) - -data(Chicago) - -Chicago <- Chicago %>% select(ridership, Clark_Lake, Austin, Harlem) -``` - -### A single model - -Let's start by fitting only a single parsnip model object. We'll create a model specification using `linear_reg()`. - -{{% note %}} The default engine is `"lm"` so no call to `set_engine()` is required. {{%/ note %}} - -The `fit()` function estimates the model coefficients, given a formula and data set. - - -```{r lm-single} -lm_spec <- linear_reg() -lm_fit <- fit(lm_spec, ridership ~ ., data = Chicago) -lm_fit -``` - -The best way to retrieve the fitted parameters is to use the `tidy()` method. This function, in the broom package, returns the coefficients and their associated statistics in a data frame with standardized column names: - -```{r lm-tidy} -tidy(lm_fit) -``` - -We'll use this function in subsequent sections. - -### Resampled or tuned models - -The tidymodels framework emphasizes the use of resampling methods to evaluate and characterize how well a model works. While time series resampling methods are appropriate for these data, we can also use the [bootstrap](https://www.tmwr.org/resampling.html#bootstrap) to resample the data. This is a standard resampling approach when evaluating the uncertainty in statistical estimates. - -We'll use five bootstrap resamples of the data to simplify the plots and output (normally, we would use a larger number of resamples for more reliable estimates). - -```{r bootstraps} -set.seed(123) -bt <- bootstraps(Chicago, times = 5) -``` - -With resampling, we fit the same model to the different simulated versions of the data set produced by resampling. The tidymodels function [`fit_resamples()`](https://www.tmwr.org/resampling.html#resampling-performance) is the recommended approach for doing so. - -{{% warning %}} The `fit_resamples()` function does not automatically save the model objects for each resample since these can be quite large and its main purpose is estimating performance. However, we can pass a function to `fit_resamples()` that _can_ save the model object or any other aspect of the fit. {{%/ warning %}} - -This function takes a single argument that represents the fitted [workflow object](https://www.tmwr.org/workflows.html) (even if you don't give `fit_resamples()` a workflow). - -From this, we can extract the model fit. There are two "levels" of model objects that are available: - -* The parsnip model object, which wraps the underlying model object. We retrieve this using the `extract_fit_parsnip()` function. - -* The underlying model object (a.k.a. the engine fit) via the `extract_fit_engine()`. - -We'll use the latter option and then tidy this model object as we did in the previous section. Let's add this to the control function so that we can re-use it. - -```{r lm-ctrl} -get_lm_coefs <- function(x) { - x %>% - # get the lm model object - extract_fit_engine() %>% - # transform its format - tidy() -} -tidy_ctrl <- control_grid(extract = get_lm_coefs) -``` - -This argument is then passed to `fit_resamples()`: - -```{r lm-resampled} -lm_res <- - lm_spec %>% - fit_resamples(ridership ~ ., resamples = bt, control = tidy_ctrl) -lm_res -``` - -Note that there is a `.extracts` column in our resampling results. This object contains the output of our `get_lm_coefs()` function for each resample. The structure of the elements of this column is a little complex. Let's start by looking at the first element (which corresponds to the first resample): - - -```{r lm-extract-ex} -lm_res$.extracts[[1]] -``` - -There is _another_ column in this element called `.extracts` that has the results of the `tidy()` function call: - -```{r lm-extract-again} -lm_res$.extracts[[1]]$.extracts[[1]] -``` - -These nested columns can be flattened via the tidyr `unnest()` function: - -```{r lm-extract-almost} -lm_res %>% - select(id, .extracts) %>% - unnest(.extracts) -``` - -We still have a column of nested tibbles, so we can run the same command again to get the data into a more useful format: - -```{r lm-extract-final} -lm_coefs <- - lm_res %>% - select(id, .extracts) %>% - unnest(.extracts) %>% - unnest(.extracts) - -lm_coefs %>% select(id, term, estimate, p.value) -``` - -That's better! Now, let's plot the model coefficients for each resample: - -```{r lm-plot} -lm_coefs %>% - filter(term != "(Intercept)") %>% - ggplot(aes(x = term, y = estimate, group = id, col = id)) + - geom_hline(yintercept = 0, lty = 3) + - geom_line(alpha = 0.3, linewidth = 1.2) + - labs(y = "Coefficient", x = NULL) + - theme(legend.position = "top") -``` - -There seems to be a lot of uncertainty in the coefficient for the Austin station data, but less for the other two. - -Looking at the code for unnesting the results, you may find the double-nesting structure excessive or cumbersome. However, the extraction functionality is flexible, and a simpler structure would prevent many use cases. - -## More complex: a glmnet model - -The glmnet model can fit the same linear regression model structure shown above. It uses regularization (a.k.a penalization) to estimate the model parameters. This has the benefit of shrinking the coefficients towards zero, important in situations where there are strong correlations between predictors or if some feature selection is required. Both of these cases are true for our Chicago train data set. - -There are two types of penalization that this model uses: - -* Lasso (a.k.a. $L_1$) penalties can shrink the model terms so much that they are absolute zero (i.e. their effect is entirely removed from the model). - -* Weight decay (a.k.a ridge regression or $L_2$) uses a different type of penalty that is most useful for highly correlated predictors. - -The glmnet model has two primary tuning parameters, the total amount of penalization and the mixture of the two penalty types. For example, this specification: - -```{r glmnet-spec} -glmnet_spec <- - linear_reg(penalty = 0.1, mixture = 0.95) %>% - set_engine("glmnet") -``` - -has a penalty that is 95% lasso and 5% weight decay. The total amount of these two penalties is 0.1 (which is fairly high). - -{{% note %}} Models with regularization require that predictors are all on the same scale. The ridership at our three stations are very different, but glmnet [automatically centers and scales the data](https://parsnip.tidymodels.org/reference/details_linear_reg_glmnet.html). You can use recipes to [center and scale your data yourself](https://recipes.tidymodels.org/reference/step_normalize.html). {{%/ note %}} - -Let's combine the model specification with a formula in a model `workflow()` and then fit the model to the data: - -```{r glmnet-wflow} -glmnet_wflow <- - workflow() %>% - add_model(glmnet_spec) %>% - add_formula(ridership ~ .) - -glmnet_fit <- fit(glmnet_wflow, Chicago) -glmnet_fit -``` - -In this output, the term `lambda` is used to represent the penalty. - -Note that the output shows many values of the penalty despite our specification of `penalty = 0.1`. It turns out that this model fits a "path" of penalty values. Even though we are interested in a value of 0.1, we can get the model coefficients for many associated values of the penalty from the same model object. - -Let's look at two different approaches to obtaining the coefficients. Both will use the `tidy()` method. One will tidy a glmnet object and the other will tidy a tidymodels object. - -### Using glmnet penalty values - -This glmnet fit contains multiple penalty values which depend on the data set; changing the data (or the mixture amount) often produces a different set of values. For this data set, there are `r length(extract_fit_engine(glmnet_fit)$lambda)` penalties available. To get the set of penalties produced for this data set, we can extract the engine fit and tidy: - -```{r glmnet-tidy} -glmnet_fit %>% - extract_fit_engine() %>% - tidy() %>% - rename(penalty = lambda) %>% # <- for consistent naming - filter(term != "(Intercept)") -``` - -This works well but, it turns out that our penalty value (0.1) is not in the list produced by the model! The underlying package has functions that use interpolation to produce coefficients for this specific value, but the `tidy()` method for glmnet objects does not use it. - -### Using specific penalty values - -If we run the `tidy()` method on the workflow or parsnip object, a different function is used that returns the coefficients for the penalty value that we specified: - -```{r glmnet-tidy-parsnip} -tidy(glmnet_fit) -``` - -For any another (single) penalty, we can use an additional argument: - -```{r glmnet-tidy-parsnip-alt} -tidy(glmnet_fit, penalty = 5.5620) # A value from above -``` - -The reason for having two `tidy()` methods is that, with tidymodels, the focus is on using a specific penalty value. - - -### Tuning a glmnet model - -If we know a priori acceptable values for penalty and mixture, we can use the `fit_resamples()` function as we did before with linear regression. Otherwise, we can tune those parameters with the tidymodels `tune_*()` functions. - -Let's tune our glmnet model over both parameters with this grid: - -```{r glmnet-grid} -pen_vals <- 10^seq(-3, 0, length.out = 10) -grid <- crossing(penalty = pen_vals, mixture = c(0.1, 1.0)) -``` - -Here is where more glmnet-related complexity comes in: we know that each resample and each value of `mixture` will probably produce a different set of penalty values contained in the model object. _How can we look at the coefficients at the specific penalty values that we are using to tune?_ - -The approach that we suggest is to use the special `path_values` option for glmnet. Details are described in the [technical documentation about glmnet and tidymodels](https://parsnip.tidymodels.org/reference/glmnet-details.html#arguments) but in short, this parameter will assign the collection of penalty values used by each glmnet fit (regardless of the data or value of mixture). - -We can pass these as an engine argument and then update our previous workflow object: - -```{r glmnet-tune} -glmnet_tune_spec <- - linear_reg(penalty = tune(), mixture = tune()) %>% - set_engine("glmnet", path_values = pen_vals) - -glmnet_wflow <- - glmnet_wflow %>% - update_model(glmnet_tune_spec) -``` - -Now we will use an extraction function similar to when we used ordinary least squares. We add an additional argument to retain coefficients that are shrunk to zero by the lasso penalty: - -```{r glmnet-tuning} -get_glmnet_coefs <- function(x) { - x %>% - extract_fit_engine() %>% - tidy(return_zeros = TRUE) %>% - rename(penalty = lambda) -} -parsnip_ctrl <- control_grid(extract = get_glmnet_coefs) - -glmnet_res <- - glmnet_wflow %>% - tune_grid( - resamples = bt, - grid = grid, - control = parsnip_ctrl - ) -glmnet_res -``` - -As noted before, the elements of the main `.extracts` column have an embedded list column with the results of `get_glmnet_coefs()`: - -```{r glmnet-extract-single} -glmnet_res$.extracts[[1]] %>% head() - -glmnet_res$.extracts[[1]]$.extracts[[1]] %>% head() -``` - -As before, we'll have to use a double `unnest()`. Since the penalty value is in both the top-level and lower-level `.extracts`, we'll use `select()` to get rid of the first version (but keep `mixture`): - -```{r glmnet-extract-1, eval = FALSE} -glmnet_res %>% - select(id, .extracts) %>% - unnest(.extracts) %>% - select(id, mixture, .extracts) %>% # <- removes the first penalty column - unnest(.extracts) -``` - -But wait! We know that each glmnet fit contains all of the coefficients. This means, for a specific resample and value of `mixture`, the results are the same: - -```{r glmnet-extract-dups} -all.equal( - # First bootstrap, first `mixture`, first `penalty` - glmnet_res$.extracts[[1]]$.extracts[[1]], - # First bootstrap, first `mixture`, second `penalty` - glmnet_res$.extracts[[1]]$.extracts[[2]] -) -``` - -For this reason, we'll add a `slice(1)` when grouping by `id` and `mixture`. This will get rid of the replicated results. - -```{r glmnet-extract-final} -glmnet_coefs <- - glmnet_res %>% - select(id, .extracts) %>% - unnest(.extracts) %>% - select(id, mixture, .extracts) %>% - group_by(id, mixture) %>% # ┐ - slice(1) %>% # │ Remove the redundant results - ungroup() %>% # ┘ - unnest(.extracts) - -glmnet_coefs %>% - select(id, penalty, mixture, term, estimate) %>% - filter(term != "(Intercept)") -``` - -Now we have the coefficients. Let's look at how they behave as more regularization is used: - -```{r glmnet-plot, fig.height=4, fig.width=8.5} -glmnet_coefs %>% - filter(term != "(Intercept)") %>% - mutate(mixture = format(mixture)) %>% - ggplot(aes(x = penalty, y = estimate, col = mixture, groups = id)) + - geom_hline(yintercept = 0, lty = 3) + - geom_line(alpha = 0.5, linewidth = 1.2) + - facet_wrap(~ term) + - scale_x_log10() + - scale_color_brewer(palette = "Accent") + - labs(y = "coefficient") + - theme(legend.position = "top") -``` - -Notice a couple of things: - -* With a pure lasso model (i.e., `mixture = 1`), the Austin station predictor is selected out in each resample. With a mixture of both penalties, its influence increases. Also, as the penalty increases, the uncertainty in this coefficient decreases. - -* The Harlem predictor is either quickly selected out of the model or goes from negative to positive. - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/learn/models/coefficients/index.markdown b/content/learn/models/coefficients/index.markdown deleted file mode 100644 index ed1ff147..00000000 --- a/content/learn/models/coefficients/index.markdown +++ /dev/null @@ -1,613 +0,0 @@ ---- -title: "Working with model coefficients" -tags: [parsnip,tune,broom,workflows] -categories: [model fitting] -type: learn-subsection -weight: 5 -description: | - Create models that use coefficients, extract them from fitted models, and visualize them. ---- - - - -## Introduction - -There are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients. - -This article describes how to retrieve the estimated coefficients from models fit using tidymodels. To use the code in this article, you will need to install the following packages: glmnet and tidymodels. - -## Linear regression - -Let's start with a linear regression model: - -`$$\hat{y} = \hat{\beta}_0 + \hat{\beta}_1x_1 + \ldots + \hat{\beta}_px_p$$` - -The `\(\beta\)` values are the coefficients and the `\(x_j\)` are model predictors, or features. - -Let's use the [Chicago train data](https://bookdown.org/max/FES/chicago-intro.html) where we predict the ridership at the Clark and Lake station (column name: `ridership`) with the previous ridership data 14 days prior at three of the stations. - -The data are in the modeldata package: - - -```r -library(tidymodels) -tidymodels_prefer() -theme_set(theme_bw()) - -data(Chicago) - -Chicago <- Chicago %>% select(ridership, Clark_Lake, Austin, Harlem) -``` - -### A single model - -Let's start by fitting only a single parsnip model object. We'll create a model specification using `linear_reg()`. - -{{% note %}} The default engine is `"lm"` so no call to `set_engine()` is required. {{%/ note %}} - -The `fit()` function estimates the model coefficients, given a formula and data set. - - - -```r -lm_spec <- linear_reg() -lm_fit <- fit(lm_spec, ridership ~ ., data = Chicago) -lm_fit -#> parsnip model object -#> -#> -#> Call: -#> stats::lm(formula = ridership ~ ., data = data) -#> -#> Coefficients: -#> (Intercept) Clark_Lake Austin Harlem -#> 1.678 0.904 0.612 -0.555 -``` - -The best way to retrieve the fitted parameters is to use the `tidy()` method. This function, in the broom package, returns the coefficients and their associated statistics in a data frame with standardized column names: - - -```r -tidy(lm_fit) -#> # A tibble: 4 × 5 -#> term estimate std.error statistic p.value -#> -#> 1 (Intercept) 1.68 0.156 10.7 1.11e- 26 -#> 2 Clark_Lake 0.904 0.0280 32.3 5.14e-210 -#> 3 Austin 0.612 0.320 1.91 5.59e- 2 -#> 4 Harlem -0.555 0.165 -3.36 7.85e- 4 -``` - -We'll use this function in subsequent sections. - -### Resampled or tuned models - -The tidymodels framework emphasizes the use of resampling methods to evaluate and characterize how well a model works. While time series resampling methods are appropriate for these data, we can also use the [bootstrap](https://www.tmwr.org/resampling.html#bootstrap) to resample the data. This is a standard resampling approach when evaluating the uncertainty in statistical estimates. - -We'll use five bootstrap resamples of the data to simplify the plots and output (normally, we would use a larger number of resamples for more reliable estimates). - - -```r -set.seed(123) -bt <- bootstraps(Chicago, times = 5) -``` - -With resampling, we fit the same model to the different simulated versions of the data set produced by resampling. The tidymodels function [`fit_resamples()`](https://www.tmwr.org/resampling.html#resampling-performance) is the recommended approach for doing so. - -{{% warning %}} The `fit_resamples()` function does not automatically save the model objects for each resample since these can be quite large and its main purpose is estimating performance. However, we can pass a function to `fit_resamples()` that _can_ save the model object or any other aspect of the fit. {{%/ warning %}} - -This function takes a single argument that represents the fitted [workflow object](https://www.tmwr.org/workflows.html) (even if you don't give `fit_resamples()` a workflow). - -From this, we can extract the model fit. There are two "levels" of model objects that are available: - -* The parsnip model object, which wraps the underlying model object. We retrieve this using the `extract_fit_parsnip()` function. - -* The underlying model object (a.k.a. the engine fit) via the `extract_fit_engine()`. - -We'll use the latter option and then tidy this model object as we did in the previous section. Let's add this to the control function so that we can re-use it. - - -```r -get_lm_coefs <- function(x) { - x %>% - # get the lm model object - extract_fit_engine() %>% - # transform its format - tidy() -} -tidy_ctrl <- control_grid(extract = get_lm_coefs) -``` - -This argument is then passed to `fit_resamples()`: - - -```r -lm_res <- - lm_spec %>% - fit_resamples(ridership ~ ., resamples = bt, control = tidy_ctrl) -lm_res -#> # Resampling results -#> # Bootstrap sampling -#> # A tibble: 5 × 5 -#> splits id .metrics .notes .extracts -#> -#> 1 Bootstrap1 -#> 2 Bootstrap2 -#> 3 Bootstrap3 -#> 4 Bootstrap4 -#> 5 Bootstrap5 -``` - -Note that there is a `.extracts` column in our resampling results. This object contains the output of our `get_lm_coefs()` function for each resample. The structure of the elements of this column is a little complex. Let's start by looking at the first element (which corresponds to the first resample): - - - -```r -lm_res$.extracts[[1]] -#> # A tibble: 1 × 2 -#> .extracts .config -#> -#> 1 Preprocessor1_Model1 -``` - -There is _another_ column in this element called `.extracts` that has the results of the `tidy()` function call: - - -```r -lm_res$.extracts[[1]]$.extracts[[1]] -#> # A tibble: 4 × 5 -#> term estimate std.error statistic p.value -#> -#> 1 (Intercept) 1.40 0.157 8.90 7.23e- 19 -#> 2 Clark_Lake 0.842 0.0280 30.1 2.39e-184 -#> 3 Austin 1.46 0.320 4.54 5.70e- 6 -#> 4 Harlem -0.637 0.163 -3.92 9.01e- 5 -``` - -These nested columns can be flattened via the tidyr `unnest()` function: - - -```r -lm_res %>% - select(id, .extracts) %>% - unnest(.extracts) -#> # A tibble: 5 × 3 -#> id .extracts .config -#> -#> 1 Bootstrap1 Preprocessor1_Model1 -#> 2 Bootstrap2 Preprocessor1_Model1 -#> 3 Bootstrap3 Preprocessor1_Model1 -#> 4 Bootstrap4 Preprocessor1_Model1 -#> 5 Bootstrap5 Preprocessor1_Model1 -``` - -We still have a column of nested tibbles, so we can run the same command again to get the data into a more useful format: - - -```r -lm_coefs <- - lm_res %>% - select(id, .extracts) %>% - unnest(.extracts) %>% - unnest(.extracts) - -lm_coefs %>% select(id, term, estimate, p.value) -#> # A tibble: 20 × 4 -#> id term estimate p.value -#> -#> 1 Bootstrap1 (Intercept) 1.40 7.23e- 19 -#> 2 Bootstrap1 Clark_Lake 0.842 2.39e-184 -#> 3 Bootstrap1 Austin 1.46 5.70e- 6 -#> 4 Bootstrap1 Harlem -0.637 9.01e- 5 -#> 5 Bootstrap2 (Intercept) 1.69 2.87e- 28 -#> 6 Bootstrap2 Clark_Lake 0.911 1.06e-219 -#> 7 Bootstrap2 Austin 0.595 5.93e- 2 -#> 8 Bootstrap2 Harlem -0.580 3.88e- 4 -#> 9 Bootstrap3 (Intercept) 1.27 3.43e- 16 -#> 10 Bootstrap3 Clark_Lake 0.859 5.03e-194 -#> 11 Bootstrap3 Austin 1.09 6.77e- 4 -#> 12 Bootstrap3 Harlem -0.470 4.34e- 3 -#> 13 Bootstrap4 (Intercept) 1.95 2.91e- 34 -#> 14 Bootstrap4 Clark_Lake 0.974 1.47e-233 -#> 15 Bootstrap4 Austin -0.116 7.21e- 1 -#> 16 Bootstrap4 Harlem -0.620 2.11e- 4 -#> 17 Bootstrap5 (Intercept) 1.87 1.98e- 33 -#> 18 Bootstrap5 Clark_Lake 0.901 1.16e-210 -#> 19 Bootstrap5 Austin 0.494 1.15e- 1 -#> 20 Bootstrap5 Harlem -0.512 1.73e- 3 -``` - -That's better! Now, let's plot the model coefficients for each resample: - - -```r -lm_coefs %>% - filter(term != "(Intercept)") %>% - ggplot(aes(x = term, y = estimate, group = id, col = id)) + - geom_hline(yintercept = 0, lty = 3) + - geom_line(alpha = 0.3, linewidth = 1.2) + - labs(y = "Coefficient", x = NULL) + - theme(legend.position = "top") -``` - - - -There seems to be a lot of uncertainty in the coefficient for the Austin station data, but less for the other two. - -Looking at the code for unnesting the results, you may find the double-nesting structure excessive or cumbersome. However, the extraction functionality is flexible, and a simpler structure would prevent many use cases. - -## More complex: a glmnet model - -The glmnet model can fit the same linear regression model structure shown above. It uses regularization (a.k.a penalization) to estimate the model parameters. This has the benefit of shrinking the coefficients towards zero, important in situations where there are strong correlations between predictors or if some feature selection is required. Both of these cases are true for our Chicago train data set. - -There are two types of penalization that this model uses: - -* Lasso (a.k.a. `\(L_1\)`) penalties can shrink the model terms so much that they are absolute zero (i.e. their effect is entirely removed from the model). - -* Weight decay (a.k.a ridge regression or `\(L_2\)`) uses a different type of penalty that is most useful for highly correlated predictors. - -The glmnet model has two primary tuning parameters, the total amount of penalization and the mixture of the two penalty types. For example, this specification: - - -```r -glmnet_spec <- - linear_reg(penalty = 0.1, mixture = 0.95) %>% - set_engine("glmnet") -``` - -has a penalty that is 95% lasso and 5% weight decay. The total amount of these two penalties is 0.1 (which is fairly high). - -{{% note %}} Models with regularization require that predictors are all on the same scale. The ridership at our three stations are very different, but glmnet [automatically centers and scales the data](https://parsnip.tidymodels.org/reference/details_linear_reg_glmnet.html). You can use recipes to [center and scale your data yourself](https://recipes.tidymodels.org/reference/step_normalize.html). {{%/ note %}} - -Let's combine the model specification with a formula in a model `workflow()` and then fit the model to the data: - - -```r -glmnet_wflow <- - workflow() %>% - add_model(glmnet_spec) %>% - add_formula(ridership ~ .) - -glmnet_fit <- fit(glmnet_wflow, Chicago) -glmnet_fit -#> ══ Workflow [trained] ════════════════════════════════════════════════ -#> Preprocessor: Formula -#> Model: linear_reg() -#> -#> ── Preprocessor ────────────────────────────────────────────────────── -#> ridership ~ . -#> -#> ── Model ───────────────────────────────────────────────────────────── -#> -#> Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian", alpha = ~0.95) -#> -#> Df %Dev Lambda -#> 1 0 0.0 6.10 -#> 2 1 12.8 5.56 -#> 3 1 23.4 5.07 -#> 4 1 32.4 4.62 -#> 5 1 40.0 4.21 -#> 6 1 46.2 3.83 -#> 7 1 51.5 3.49 -#> 8 1 55.9 3.18 -#> 9 1 59.6 2.90 -#> 10 1 62.7 2.64 -#> 11 2 65.3 2.41 -#> 12 2 67.4 2.19 -#> 13 2 69.2 2.00 -#> 14 2 70.7 1.82 -#> 15 2 72.0 1.66 -#> 16 2 73.0 1.51 -#> 17 2 73.9 1.38 -#> 18 2 74.6 1.26 -#> 19 2 75.2 1.14 -#> 20 2 75.7 1.04 -#> 21 2 76.1 0.95 -#> 22 2 76.4 0.86 -#> 23 2 76.7 0.79 -#> 24 2 76.9 0.72 -#> 25 2 77.1 0.66 -#> 26 2 77.3 0.60 -#> 27 2 77.4 0.54 -#> 28 2 77.6 0.50 -#> 29 2 77.6 0.45 -#> 30 2 77.7 0.41 -#> 31 2 77.8 0.38 -#> 32 2 77.8 0.34 -#> 33 2 77.9 0.31 -#> 34 2 77.9 0.28 -#> 35 2 78.0 0.26 -#> 36 2 78.0 0.23 -#> 37 2 78.0 0.21 -#> 38 2 78.0 0.20 -#> 39 2 78.0 0.18 -#> 40 2 78.0 0.16 -#> 41 2 78.0 0.15 -#> 42 2 78.1 0.14 -#> 43 2 78.1 0.12 -#> 44 2 78.1 0.11 -#> 45 2 78.1 0.10 -#> 46 2 78.1 0.09 -#> -#> ... -#> and 9 more lines. -``` - -In this output, the term `lambda` is used to represent the penalty. - -Note that the output shows many values of the penalty despite our specification of `penalty = 0.1`. It turns out that this model fits a "path" of penalty values. Even though we are interested in a value of 0.1, we can get the model coefficients for many associated values of the penalty from the same model object. - -Let's look at two different approaches to obtaining the coefficients. Both will use the `tidy()` method. One will tidy a glmnet object and the other will tidy a tidymodels object. - -### Using glmnet penalty values - -This glmnet fit contains multiple penalty values which depend on the data set; changing the data (or the mixture amount) often produces a different set of values. For this data set, there are 55 penalties available. To get the set of penalties produced for this data set, we can extract the engine fit and tidy: - - -```r -glmnet_fit %>% - extract_fit_engine() %>% - tidy() %>% - rename(penalty = lambda) %>% # <- for consistent naming - filter(term != "(Intercept)") -#> # A tibble: 99 × 5 -#> term step estimate penalty dev.ratio -#> -#> 1 Clark_Lake 2 0.0753 5.56 0.127 -#> 2 Clark_Lake 3 0.145 5.07 0.234 -#> 3 Clark_Lake 4 0.208 4.62 0.324 -#> 4 Clark_Lake 5 0.266 4.21 0.400 -#> 5 Clark_Lake 6 0.319 3.83 0.463 -#> 6 Clark_Lake 7 0.368 3.49 0.515 -#> 7 Clark_Lake 8 0.413 3.18 0.559 -#> 8 Clark_Lake 9 0.454 2.90 0.596 -#> 9 Clark_Lake 10 0.491 2.64 0.627 -#> 10 Clark_Lake 11 0.526 2.41 0.653 -#> # … with 89 more rows -``` - -This works well but, it turns out that our penalty value (0.1) is not in the list produced by the model! The underlying package has functions that use interpolation to produce coefficients for this specific value, but the `tidy()` method for glmnet objects does not use it. - -### Using specific penalty values - -If we run the `tidy()` method on the workflow or parsnip object, a different function is used that returns the coefficients for the penalty value that we specified: - - -```r -tidy(glmnet_fit) -#> # A tibble: 4 × 3 -#> term estimate penalty -#> -#> 1 (Intercept) 1.69 0.1 -#> 2 Clark_Lake 0.846 0.1 -#> 3 Austin 0.271 0.1 -#> 4 Harlem 0 0.1 -``` - -For any another (single) penalty, we can use an additional argument: - - -```r -tidy(glmnet_fit, penalty = 5.5620) # A value from above -#> # A tibble: 4 × 3 -#> term estimate penalty -#> -#> 1 (Intercept) 12.6 5.56 -#> 2 Clark_Lake 0.0753 5.56 -#> 3 Austin 0 5.56 -#> 4 Harlem 0 5.56 -``` - -The reason for having two `tidy()` methods is that, with tidymodels, the focus is on using a specific penalty value. - - -### Tuning a glmnet model - -If we know a priori acceptable values for penalty and mixture, we can use the `fit_resamples()` function as we did before with linear regression. Otherwise, we can tune those parameters with the tidymodels `tune_*()` functions. - -Let's tune our glmnet model over both parameters with this grid: - - -```r -pen_vals <- 10^seq(-3, 0, length.out = 10) -grid <- crossing(penalty = pen_vals, mixture = c(0.1, 1.0)) -``` - -Here is where more glmnet-related complexity comes in: we know that each resample and each value of `mixture` will probably produce a different set of penalty values contained in the model object. _How can we look at the coefficients at the specific penalty values that we are using to tune?_ - -The approach that we suggest is to use the special `path_values` option for glmnet. Details are described in the [technical documentation about glmnet and tidymodels](https://parsnip.tidymodels.org/reference/glmnet-details.html#arguments) but in short, this parameter will assign the collection of penalty values used by each glmnet fit (regardless of the data or value of mixture). - -We can pass these as an engine argument and then update our previous workflow object: - - -```r -glmnet_tune_spec <- - linear_reg(penalty = tune(), mixture = tune()) %>% - set_engine("glmnet", path_values = pen_vals) - -glmnet_wflow <- - glmnet_wflow %>% - update_model(glmnet_tune_spec) -``` - -Now we will use an extraction function similar to when we used ordinary least squares. We add an additional argument to retain coefficients that are shrunk to zero by the lasso penalty: - - -```r -get_glmnet_coefs <- function(x) { - x %>% - extract_fit_engine() %>% - tidy(return_zeros = TRUE) %>% - rename(penalty = lambda) -} -parsnip_ctrl <- control_grid(extract = get_glmnet_coefs) - -glmnet_res <- - glmnet_wflow %>% - tune_grid( - resamples = bt, - grid = grid, - control = parsnip_ctrl - ) -glmnet_res -#> # Tuning results -#> # Bootstrap sampling -#> # A tibble: 5 × 5 -#> splits id .metrics .notes .extracts -#> -#> 1 Bootstrap1 -#> 2 Bootstrap2 -#> 3 Bootstrap3 -#> 4 Bootstrap4 -#> 5 Bootstrap5 -``` - -As noted before, the elements of the main `.extracts` column have an embedded list column with the results of `get_glmnet_coefs()`: - - -```r -glmnet_res$.extracts[[1]] %>% head() -#> # A tibble: 6 × 4 -#> penalty mixture .extracts .config -#> -#> 1 1 0.1 Preprocessor1_Model01 -#> 2 1 0.1 Preprocessor1_Model02 -#> 3 1 0.1 Preprocessor1_Model03 -#> 4 1 0.1 Preprocessor1_Model04 -#> 5 1 0.1 Preprocessor1_Model05 -#> 6 1 0.1 Preprocessor1_Model06 - -glmnet_res$.extracts[[1]]$.extracts[[1]] %>% head() -#> # A tibble: 6 × 5 -#> term step estimate penalty dev.ratio -#> -#> 1 (Intercept) 1 0.568 1 0.769 -#> 2 (Intercept) 2 0.432 0.464 0.775 -#> 3 (Intercept) 3 0.607 0.215 0.779 -#> 4 (Intercept) 4 0.846 0.1 0.781 -#> 5 (Intercept) 5 1.06 0.0464 0.782 -#> 6 (Intercept) 6 1.22 0.0215 0.783 -``` - -As before, we'll have to use a double `unnest()`. Since the penalty value is in both the top-level and lower-level `.extracts`, we'll use `select()` to get rid of the first version (but keep `mixture`): - - -```r -glmnet_res %>% - select(id, .extracts) %>% - unnest(.extracts) %>% - select(id, mixture, .extracts) %>% # <- removes the first penalty column - unnest(.extracts) -``` - -But wait! We know that each glmnet fit contains all of the coefficients. This means, for a specific resample and value of `mixture`, the results are the same: - - -```r -all.equal( - # First bootstrap, first `mixture`, first `penalty` - glmnet_res$.extracts[[1]]$.extracts[[1]], - # First bootstrap, first `mixture`, second `penalty` - glmnet_res$.extracts[[1]]$.extracts[[2]] -) -#> [1] TRUE -``` - -For this reason, we'll add a `slice(1)` when grouping by `id` and `mixture`. This will get rid of the replicated results. - - -```r -glmnet_coefs <- - glmnet_res %>% - select(id, .extracts) %>% - unnest(.extracts) %>% - select(id, mixture, .extracts) %>% - group_by(id, mixture) %>% # ┐ - slice(1) %>% # │ Remove the redundant results - ungroup() %>% # ┘ - unnest(.extracts) - -glmnet_coefs %>% - select(id, penalty, mixture, term, estimate) %>% - filter(term != "(Intercept)") -#> # A tibble: 300 × 5 -#> id penalty mixture term estimate -#> -#> 1 Bootstrap1 1 0.1 Clark_Lake 0.391 -#> 2 Bootstrap1 0.464 0.1 Clark_Lake 0.485 -#> 3 Bootstrap1 0.215 0.1 Clark_Lake 0.590 -#> 4 Bootstrap1 0.1 0.1 Clark_Lake 0.680 -#> 5 Bootstrap1 0.0464 0.1 Clark_Lake 0.746 -#> 6 Bootstrap1 0.0215 0.1 Clark_Lake 0.793 -#> 7 Bootstrap1 0.01 0.1 Clark_Lake 0.817 -#> 8 Bootstrap1 0.00464 0.1 Clark_Lake 0.828 -#> 9 Bootstrap1 0.00215 0.1 Clark_Lake 0.834 -#> 10 Bootstrap1 0.001 0.1 Clark_Lake 0.837 -#> # … with 290 more rows -``` - -Now we have the coefficients. Let's look at how they behave as more regularization is used: - - -```r -glmnet_coefs %>% - filter(term != "(Intercept)") %>% - mutate(mixture = format(mixture)) %>% - ggplot(aes(x = penalty, y = estimate, col = mixture, groups = id)) + - geom_hline(yintercept = 0, lty = 3) + - geom_line(alpha = 0.5, linewidth = 1.2) + - facet_wrap(~ term) + - scale_x_log10() + - scale_color_brewer(palette = "Accent") + - labs(y = "coefficient") + - theme(legend.position = "top") -``` - - - -Notice a couple of things: - -* With a pure lasso model (i.e., `mixture = 1`), the Austin station predictor is selected out in each resample. With a mixture of both penalties, its influence increases. Also, as the penalty increases, the uncertainty in this coefficient decreases. - -* The Harlem predictor is either quickly selected out of the model or goes from negative to positive. - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> glmnet * 4.1-6 2022-11-27 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/learn/models/parsnip-nnet/figs/biv-boundary-1.svg b/content/learn/models/parsnip-nnet/figs/biv-boundary-1.svg deleted file mode 100644 index 55a10249..00000000 --- a/content/learn/models/parsnip-nnet/figs/biv-boundary-1.svg +++ /dev/null @@ -1,373 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -100 -200 -300 - - - - - - -2500 -5000 -7500 -A -B - -Class - - - - -One -Two - - diff --git a/content/learn/models/parsnip-nnet/figs/biv-plot-1.svg b/content/learn/models/parsnip-nnet/figs/biv-plot-1.svg deleted file mode 100644 index 8afe09a0..00000000 --- a/content/learn/models/parsnip-nnet/figs/biv-plot-1.svg +++ /dev/null @@ -1,1086 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -100 -200 -300 -400 - - - - - - - -2500 -5000 -7500 -A -B - -Class - - - - -One -Two - - diff --git a/content/learn/models/parsnip-nnet/index.Rmarkdown b/content/learn/models/parsnip-nnet/index.Rmarkdown deleted file mode 100755 index d5143a86..00000000 --- a/content/learn/models/parsnip-nnet/index.Rmarkdown +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: "Classification models using a neural network" -tags: [rsample, parsnip] -categories: [model fitting] -type: learn-subsection -weight: 2 -description: | - Train a classification model and evaluate its performance. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include=FALSE} -library(tidymodels) -pkgs <- c("tidymodels", "keras") - -theme_set(theme_bw() + theme(legend.position = "top")) -set.seed(1234) -parsnip::set_tf_seed(1234) -``` - - -## Introduction - -`r req_pkgs(pkgs)` You will also need the python keras library installed (see `?keras::install_keras()`). - -We can create classification models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict categorical quantities or class labels. Here, let's fit a single classification model using a neural network and evaluate using a validation set. While the [tune](https://tune.tidymodels.org/) package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage. - -## Fitting a neural network - - -Let's fit a model to a small, two predictor classification data set. The data are in the modeldata package (part of tidymodels) and have been split into training, validation, and test data sets. In this analysis, the test set is left untouched; this article tries to emulate a good data usage methodology where the test set would only be evaluated once at the end after a variety of models have been considered. - - -```{r biv--split} -data(bivariate) -nrow(bivariate_train) -nrow(bivariate_val) -``` - -A plot of the data shows two right-skewed predictors: - -```{r biv-plot, fig.width = 6, fig.height = 6.1} -ggplot(bivariate_train, aes(x = A, y = B, col = Class)) + - geom_point(alpha = .2) -``` - -Let's use a single hidden layer neural network to predict the outcome. To do this, we transform the predictor columns to be more symmetric (via the `step_BoxCox()` function) and on a common scale (using `step_normalize()`). We can use [recipes](https://recipes.tidymodels.org/) to do so: - -```{r biv--proc} -biv_rec <- - recipe(Class ~ ., data = bivariate_train) %>% - step_BoxCox(all_predictors())%>% - step_normalize(all_predictors()) %>% - prep(training = bivariate_train, retain = TRUE) - -# We will bake(new_data = NULL) to get the processed training set back - -# For validation: -val_normalized <- bake(biv_rec, new_data = bivariate_val, all_predictors()) -# For testing when we arrive at a final model: -test_normalized <- bake(biv_rec, new_data = bivariate_test, all_predictors()) -``` - -We can use the keras package to fit a model with 5 hidden units and a 10% dropout rate, to regularize the model: - -```{r biv--nnet} -set.seed(57974) -nnet_fit <- - mlp(epochs = 100, hidden_units = 5, dropout = 0.1) %>% - set_mode("classification") %>% - # Also set engine-specific `verbose` argument to prevent logging the results: - set_engine("keras", verbose = 0) %>% - fit(Class ~ ., data = bake(biv_rec, new_data = NULL)) - -nnet_fit -``` - -## Model performance - -In parsnip, the `predict()` function can be used to characterize performance on the validation set. Since parsnip always produces tibble outputs, these can just be column bound to the original data: - -```{r biv--perf} -val_results <- - bivariate_val %>% - bind_cols( - predict(nnet_fit, new_data = val_normalized), - predict(nnet_fit, new_data = val_normalized, type = "prob") - ) -val_results %>% slice(1:5) - -val_results %>% roc_auc(truth = Class, .pred_One) - -val_results %>% accuracy(truth = Class, .pred_class) - -val_results %>% conf_mat(truth = Class, .pred_class) -``` - -Let's also create a grid to get a visual sense of the class boundary for the validation set. - -```{r biv-boundary, fig.width = 6, fig.height = 6.1} -a_rng <- range(bivariate_train$A) -b_rng <- range(bivariate_train$B) -x_grid <- - expand.grid(A = seq(a_rng[1], a_rng[2], length.out = 100), - B = seq(b_rng[1], b_rng[2], length.out = 100)) -x_grid_trans <- bake(biv_rec, x_grid) - -# Make predictions using the transformed predictors but -# attach them to the predictors in the original units: -x_grid <- - x_grid %>% - bind_cols(predict(nnet_fit, x_grid_trans, type = "prob")) - -ggplot(x_grid, aes(x = A, y = B)) + - geom_contour(aes(z = .pred_One), breaks = .5, col = "black") + - geom_point(data = bivariate_val, aes(col = Class), alpha = 0.3) -``` - - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/learn/models/parsnip-nnet/index.markdown b/content/learn/models/parsnip-nnet/index.markdown deleted file mode 100644 index 015baa16..00000000 --- a/content/learn/models/parsnip-nnet/index.markdown +++ /dev/null @@ -1,209 +0,0 @@ ---- -title: "Classification models using a neural network" -tags: [rsample, parsnip] -categories: [model fitting] -type: learn-subsection -weight: 2 -description: | - Train a classification model and evaluate its performance. ---- - - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: keras and tidymodels. You will also need the python keras library installed (see `?keras::install_keras()`). - -We can create classification models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict categorical quantities or class labels. Here, let's fit a single classification model using a neural network and evaluate using a validation set. While the [tune](https://tune.tidymodels.org/) package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage. - -## Fitting a neural network - - -Let's fit a model to a small, two predictor classification data set. The data are in the modeldata package (part of tidymodels) and have been split into training, validation, and test data sets. In this analysis, the test set is left untouched; this article tries to emulate a good data usage methodology where the test set would only be evaluated once at the end after a variety of models have been considered. - - - -```r -data(bivariate) -nrow(bivariate_train) -#> [1] 1009 -nrow(bivariate_val) -#> [1] 300 -``` - -A plot of the data shows two right-skewed predictors: - - -```r -ggplot(bivariate_train, aes(x = A, y = B, col = Class)) + - geom_point(alpha = .2) -``` - - - -Let's use a single hidden layer neural network to predict the outcome. To do this, we transform the predictor columns to be more symmetric (via the `step_BoxCox()` function) and on a common scale (using `step_normalize()`). We can use [recipes](https://recipes.tidymodels.org/) to do so: - - -```r -biv_rec <- - recipe(Class ~ ., data = bivariate_train) %>% - step_BoxCox(all_predictors())%>% - step_normalize(all_predictors()) %>% - prep(training = bivariate_train, retain = TRUE) - -# We will bake(new_data = NULL) to get the processed training set back - -# For validation: -val_normalized <- bake(biv_rec, new_data = bivariate_val, all_predictors()) -# For testing when we arrive at a final model: -test_normalized <- bake(biv_rec, new_data = bivariate_test, all_predictors()) -``` - -We can use the keras package to fit a model with 5 hidden units and a 10% dropout rate, to regularize the model: - - -```r -set.seed(57974) -nnet_fit <- - mlp(epochs = 100, hidden_units = 5, dropout = 0.1) %>% - set_mode("classification") %>% - # Also set engine-specific `verbose` argument to prevent logging the results: - set_engine("keras", verbose = 0) %>% - fit(Class ~ ., data = bake(biv_rec, new_data = NULL)) - -nnet_fit -#> parsnip model object -#> -#> Model: "sequential" -#> ________________________________________________________________________________ -#> Layer (type) Output Shape Param # -#> ================================================================================ -#> dense (Dense) (None, 5) 15 -#> dense_1 (Dense) (None, 5) 30 -#> dropout (Dropout) (None, 5) 0 -#> dense_2 (Dense) (None, 2) 12 -#> ================================================================================ -#> Total params: 57 -#> Trainable params: 57 -#> Non-trainable params: 0 -#> ________________________________________________________________________________ -``` - -## Model performance - -In parsnip, the `predict()` function can be used to characterize performance on the validation set. Since parsnip always produces tibble outputs, these can just be column bound to the original data: - - -```r -val_results <- - bivariate_val %>% - bind_cols( - predict(nnet_fit, new_data = val_normalized), - predict(nnet_fit, new_data = val_normalized, type = "prob") - ) -val_results %>% slice(1:5) -#> # A tibble: 5 × 6 -#> A B Class .pred_class .pred_One .pred_Two -#> -#> 1 1061. 74.5 One Two 0.478 0.522 -#> 2 1241. 83.4 One Two 0.499 0.501 -#> 3 939. 71.9 One One 0.828 0.172 -#> 4 813. 77.1 One One 0.986 0.0142 -#> 5 1706. 92.8 Two Two 0.214 0.786 - -val_results %>% roc_auc(truth = Class, .pred_One) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 roc_auc binary 0.815 - -val_results %>% accuracy(truth = Class, .pred_class) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 accuracy binary 0.74 - -val_results %>% conf_mat(truth = Class, .pred_class) -#> Truth -#> Prediction One Two -#> One 151 27 -#> Two 51 71 -``` - -Let's also create a grid to get a visual sense of the class boundary for the validation set. - - -```r -a_rng <- range(bivariate_train$A) -b_rng <- range(bivariate_train$B) -x_grid <- - expand.grid(A = seq(a_rng[1], a_rng[2], length.out = 100), - B = seq(b_rng[1], b_rng[2], length.out = 100)) -x_grid_trans <- bake(biv_rec, x_grid) - -# Make predictions using the transformed predictors but -# attach them to the predictors in the original units: -x_grid <- - x_grid %>% - bind_cols(predict(nnet_fit, x_grid_trans, type = "prob")) - -ggplot(x_grid, aes(x = A, y = B)) + - geom_contour(aes(z = .pred_One), breaks = .5, col = "black") + - geom_point(data = bivariate_val, aes(col = Class), alpha = 0.3) -``` - - - - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> keras 2.9.0 2022-05-23 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ─ Python configuration ───────────────────────────────────────────── -#> python: /Users/emilhvitfeldthansen/Library/r-miniconda/envs/r-reticulate/bin/python -#> libpython: /Users/emilhvitfeldthansen/Library/r-miniconda/envs/r-reticulate/lib/libpython3.7m.dylib -#> pythonhome: /Users/emilhvitfeldthansen/Library/r-miniconda/envs/r-reticulate:/Users/emilhvitfeldthansen/Library/r-miniconda/envs/r-reticulate -#> version: 3.7.11 (default, Jul 27 2021, 07:03:16) [Clang 10.0.0 ] -#> numpy: /Users/emilhvitfeldthansen/Library/r-miniconda/envs/r-reticulate/lib/python3.7/site-packages/numpy -#> numpy_version: 1.21.6 -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/learn/models/parsnip-ranger-glmnet/index.Rmarkdown b/content/learn/models/parsnip-ranger-glmnet/index.Rmarkdown deleted file mode 100755 index c3261947..00000000 --- a/content/learn/models/parsnip-ranger-glmnet/index.Rmarkdown +++ /dev/null @@ -1,224 +0,0 @@ ---- -title: "Regression models two ways" -tags: [rsample, parsnip] -categories: [model fitting] -type: learn-subsection -weight: 1 -description: | - Create and train different kinds of regression models with different computational engines. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include=FALSE} -library(tidymodels) -library(ranger) -library(randomForest) -library(glmnet) - -preds <- c("Longitude", "Latitude", "Lot_Area", "Neighborhood", "Year_Sold") -pred_names <- paste0("`", preds, "`") - -pkgs <- c("tidymodels", "ranger", "randomForest", "glmnet") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - - -## Introduction - -`r req_pkgs(pkgs)` - -We can create regression models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict continuous or numeric quantities. Here, let's first fit a random forest model, which does _not_ require all numeric input (see discussion [here](https://bookdown.org/max/FES/categorical-trees.html)) and discuss how to use `fit()` and `fit_xy()`, as well as _data descriptors_. - -Second, let's fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip. - -## The Ames housing data - -We'll use the Ames housing data set to demonstrate how to create regression models using parsnip. First, set up the data set and create a simple training/test set split: - -```{r ames-split} -library(tidymodels) - -data(ames) - -set.seed(4595) -data_split <- initial_split(ames, strata = "Sale_Price", prop = 0.75) - -ames_train <- training(data_split) -ames_test <- testing(data_split) -``` - -The use of the test set here is _only for illustration_; normally in a data analysis these data would be saved to the very end after many models have been evaluated. - -## Random forest - -We'll start by fitting a random forest model to a small set of parameters. Let's create a model with the predictors `r knitr::combine_words(pred_names)`. A simple random forest model can be specified via: - -```{r rf-basic} -rf_defaults <- rand_forest(mode = "regression") -rf_defaults -``` - -The model will be fit with the ranger package by default. Since we didn't add any extra arguments to `fit`, _many_ of the arguments will be set to their defaults from the function `ranger::ranger()`. The help pages for the model function describe the default parameters and you can also use the `translate()` function to check out such details. - -The parsnip package provides two different interfaces to fit a model: - -- the formula interface (`fit()`), and -- the non-formula interface (`fit_xy()`). - -Let's start with the non-formula interface: - - -```{r rf-basic-xy} -preds <- c("Longitude", "Latitude", "Lot_Area", "Neighborhood", "Year_Sold") - -rf_xy_fit <- - rf_defaults %>% - set_engine("ranger") %>% - fit_xy( - x = ames_train[, preds], - y = log10(ames_train$Sale_Price) - ) - -rf_xy_fit -``` - -The non-formula interface doesn't do anything to the predictors before passing them to the underlying model function. This particular model does _not_ require indicator variables (sometimes called "dummy variables") to be created prior to fitting the model. Note that the output shows "Number of independent variables: 5". - -For regression models, we can use the basic `predict()` method, which returns a tibble with a column named `.pred`: - -```{r rf-basic-xy-pred} -test_results <- - ames_test %>% - select(Sale_Price) %>% - mutate(Sale_Price = log10(Sale_Price)) %>% - bind_cols( - predict(rf_xy_fit, new_data = ames_test[, preds]) - ) -test_results %>% slice(1:5) - -# summarize performance -test_results %>% metrics(truth = Sale_Price, estimate = .pred) -``` - -Note that: - - * If the model required indicator variables, we would have to create them manually prior to using `fit()` (perhaps using the recipes package). - * We had to manually log the outcome prior to modeling. - -Now, for illustration, let's use the formula method using some new parameter values: - -```{r rf-basic-form} -rand_forest(mode = "regression", mtry = 3, trees = 1000) %>% - set_engine("ranger") %>% - fit( - log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, - data = ames_train - ) -``` - -Suppose that we would like to use the randomForest package instead of ranger. To do so, the only part of the syntax that needs to change is the `set_engine()` argument: - - -```{r rf-rf} -rand_forest(mode = "regression", mtry = 3, trees = 1000) %>% - set_engine("randomForest") %>% - fit( - log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, - data = ames_train - ) -``` - -Look at the formula code that was printed out; one function uses the argument name `ntree` and the other uses `num.trees`. The parsnip models don't require you to know the specific names of the main arguments. - -Now suppose that we want to modify the value of `mtry` based on the number of predictors in the data. Usually, a good default value is `floor(sqrt(num_predictors))` but a pure bagging model requires an `mtry` value equal to the total number of parameters. There may be cases where you may not know how many predictors are going to be present when the model will be fit (perhaps due to the generation of indicator variables or a variable filter) so this might be difficult to know exactly ahead of time when you write your code. - -When the model it being fit by parsnip, [_data descriptors_](https://parsnip.tidymodels.org/reference/descriptors.html) are made available. These attempt to let you know what you will have available when the model is fit. When a model object is created (say using `rand_forest()`), the values of the arguments that you give it are _immediately evaluated_ unless you delay them. To delay the evaluation of any argument, you can used `rlang::expr()` to make an expression. - -Two relevant data descriptors for our example model are: - - * `.preds()`: the number of predictor _variables_ in the data set that are associated with the predictors **prior to dummy variable creation**. - * `.cols()`: the number of predictor _columns_ after dummy variables (or other encodings) are created. - -Since ranger won't create indicator values, `.preds()` would be appropriate for `mtry` for a bagging model. - -For example, let's use an expression with the `.preds()` descriptor to fit a bagging model: - -```{r bagged} -rand_forest(mode = "regression", mtry = .preds(), trees = 1000) %>% - set_engine("ranger") %>% - fit( - log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, - data = ames_train - ) -``` - - -## Regularized regression - -A linear model might work for this data set as well. We can use the `linear_reg()` parsnip model. There are two engines that can perform regularization/penalization, the glmnet and sparklyr packages. Let's use the former here. The glmnet package only implements a non-formula method, but parsnip will allow either one to be used. - -When regularization is used, the predictors should first be centered and scaled before being passed to the model. The formula method won't do that automatically so we will need to do this ourselves. We'll use the [recipes](https://recipes.tidymodels.org/) package for these steps. - -```{r glmn-form} -norm_recipe <- - recipe( - Sale_Price ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, - data = ames_train - ) %>% - step_other(Neighborhood) %>% - step_dummy(all_nominal()) %>% - step_center(all_predictors()) %>% - step_scale(all_predictors()) %>% - step_log(Sale_Price, base = 10) %>% - # estimate the means and standard deviations - prep(training = ames_train, retain = TRUE) - -# Now let's fit the model using the processed version of the data - -glmn_fit <- - linear_reg(penalty = 0.001, mixture = 0.5) %>% - set_engine("glmnet") %>% - fit(Sale_Price ~ ., data = bake(norm_recipe, new_data = NULL)) -glmn_fit -``` - -If `penalty` were not specified, all of the `lambda` values would be computed. - -To get the predictions for this specific value of `lambda` (aka `penalty`): - -```{r glmn-pred} -# First, get the processed version of the test set predictors: -test_normalized <- bake(norm_recipe, new_data = ames_test, all_predictors()) - -test_results <- - test_results %>% - rename(`random forest` = .pred) %>% - bind_cols( - predict(glmn_fit, new_data = test_normalized) %>% - rename(glmnet = .pred) - ) -test_results - -test_results %>% metrics(truth = Sale_Price, estimate = glmnet) - -test_results %>% - gather(model, prediction, -Sale_Price) %>% - ggplot(aes(x = prediction, y = Sale_Price)) + - geom_abline(col = "green", lty = 2) + - geom_point(alpha = .4) + - facet_wrap(~model) + - coord_fixed() -``` - -This final plot compares the performance of the random forest and regularized regression models. - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/models/parsnip-ranger-glmnet/index.markdown b/content/learn/models/parsnip-ranger-glmnet/index.markdown deleted file mode 100644 index 969031f8..00000000 --- a/content/learn/models/parsnip-ranger-glmnet/index.markdown +++ /dev/null @@ -1,428 +0,0 @@ ---- -title: "Regression models two ways" -tags: [rsample, parsnip] -categories: [model fitting] -type: learn-subsection -weight: 1 -description: | - Create and train different kinds of regression models with different computational engines. ---- - - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: glmnet, randomForest, ranger, and tidymodels. - -We can create regression models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict continuous or numeric quantities. Here, let's first fit a random forest model, which does _not_ require all numeric input (see discussion [here](https://bookdown.org/max/FES/categorical-trees.html)) and discuss how to use `fit()` and `fit_xy()`, as well as _data descriptors_. - -Second, let's fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip. - -## The Ames housing data - -We'll use the Ames housing data set to demonstrate how to create regression models using parsnip. First, set up the data set and create a simple training/test set split: - - -```r -library(tidymodels) - -data(ames) - -set.seed(4595) -data_split <- initial_split(ames, strata = "Sale_Price", prop = 0.75) - -ames_train <- training(data_split) -ames_test <- testing(data_split) -``` - -The use of the test set here is _only for illustration_; normally in a data analysis these data would be saved to the very end after many models have been evaluated. - -## Random forest - -We'll start by fitting a random forest model to a small set of parameters. Let's create a model with the predictors `Longitude`, `Latitude`, `Lot_Area`, `Neighborhood`, and `Year_Sold`. A simple random forest model can be specified via: - - -```r -rf_defaults <- rand_forest(mode = "regression") -rf_defaults -#> Random Forest Model Specification (regression) -#> -#> Computational engine: ranger -``` - -The model will be fit with the ranger package by default. Since we didn't add any extra arguments to `fit`, _many_ of the arguments will be set to their defaults from the function `ranger::ranger()`. The help pages for the model function describe the default parameters and you can also use the `translate()` function to check out such details. - -The parsnip package provides two different interfaces to fit a model: - -- the formula interface (`fit()`), and -- the non-formula interface (`fit_xy()`). - -Let's start with the non-formula interface: - - - -```r -preds <- c("Longitude", "Latitude", "Lot_Area", "Neighborhood", "Year_Sold") - -rf_xy_fit <- - rf_defaults %>% - set_engine("ranger") %>% - fit_xy( - x = ames_train[, preds], - y = log10(ames_train$Sale_Price) - ) - -rf_xy_fit -#> parsnip model object -#> -#> Ranger result -#> -#> Call: -#> ranger::ranger(x = maybe_data_frame(x), y = y, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) -#> -#> Type: Regression -#> Number of trees: 500 -#> Sample size: 2197 -#> Number of independent variables: 5 -#> Mtry: 2 -#> Target node size: 5 -#> Variable importance mode: none -#> Splitrule: variance -#> OOB prediction error (MSE): 0.0085 -#> R squared (OOB): 0.724 -``` - -The non-formula interface doesn't do anything to the predictors before passing them to the underlying model function. This particular model does _not_ require indicator variables (sometimes called "dummy variables") to be created prior to fitting the model. Note that the output shows "Number of independent variables: 5". - -For regression models, we can use the basic `predict()` method, which returns a tibble with a column named `.pred`: - - -```r -test_results <- - ames_test %>% - select(Sale_Price) %>% - mutate(Sale_Price = log10(Sale_Price)) %>% - bind_cols( - predict(rf_xy_fit, new_data = ames_test[, preds]) - ) -test_results %>% slice(1:5) -#> # A tibble: 5 × 2 -#> Sale_Price .pred -#> -#> 1 5.39 5.25 -#> 2 5.28 5.29 -#> 3 5.23 5.26 -#> 4 5.21 5.30 -#> 5 5.60 5.51 - -# summarize performance -test_results %>% metrics(truth = Sale_Price, estimate = .pred) -#> # A tibble: 3 × 3 -#> .metric .estimator .estimate -#> -#> 1 rmse standard 0.0945 -#> 2 rsq standard 0.733 -#> 3 mae standard 0.0629 -``` - -Note that: - - * If the model required indicator variables, we would have to create them manually prior to using `fit()` (perhaps using the recipes package). - * We had to manually log the outcome prior to modeling. - -Now, for illustration, let's use the formula method using some new parameter values: - - -```r -rand_forest(mode = "regression", mtry = 3, trees = 1000) %>% - set_engine("ranger") %>% - fit( - log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, - data = ames_train - ) -#> parsnip model object -#> -#> Ranger result -#> -#> Call: -#> ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~3, x), num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) -#> -#> Type: Regression -#> Number of trees: 1000 -#> Sample size: 2197 -#> Number of independent variables: 5 -#> Mtry: 3 -#> Target node size: 5 -#> Variable importance mode: none -#> Splitrule: variance -#> OOB prediction error (MSE): 0.0084 -#> R squared (OOB): 0.727 -``` - -Suppose that we would like to use the randomForest package instead of ranger. To do so, the only part of the syntax that needs to change is the `set_engine()` argument: - - - -```r -rand_forest(mode = "regression", mtry = 3, trees = 1000) %>% - set_engine("randomForest") %>% - fit( - log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, - data = ames_train - ) -#> parsnip model object -#> -#> -#> Call: -#> randomForest(x = maybe_data_frame(x), y = y, ntree = ~1000, mtry = min_cols(~3, x)) -#> Type of random forest: regression -#> Number of trees: 1000 -#> No. of variables tried at each split: 3 -#> -#> Mean of squared residuals: 0.00847 -#> % Var explained: 72.5 -``` - -Look at the formula code that was printed out; one function uses the argument name `ntree` and the other uses `num.trees`. The parsnip models don't require you to know the specific names of the main arguments. - -Now suppose that we want to modify the value of `mtry` based on the number of predictors in the data. Usually, a good default value is `floor(sqrt(num_predictors))` but a pure bagging model requires an `mtry` value equal to the total number of parameters. There may be cases where you may not know how many predictors are going to be present when the model will be fit (perhaps due to the generation of indicator variables or a variable filter) so this might be difficult to know exactly ahead of time when you write your code. - -When the model it being fit by parsnip, [_data descriptors_](https://parsnip.tidymodels.org/reference/descriptors.html) are made available. These attempt to let you know what you will have available when the model is fit. When a model object is created (say using `rand_forest()`), the values of the arguments that you give it are _immediately evaluated_ unless you delay them. To delay the evaluation of any argument, you can used `rlang::expr()` to make an expression. - -Two relevant data descriptors for our example model are: - - * `.preds()`: the number of predictor _variables_ in the data set that are associated with the predictors **prior to dummy variable creation**. - * `.cols()`: the number of predictor _columns_ after dummy variables (or other encodings) are created. - -Since ranger won't create indicator values, `.preds()` would be appropriate for `mtry` for a bagging model. - -For example, let's use an expression with the `.preds()` descriptor to fit a bagging model: - - -```r -rand_forest(mode = "regression", mtry = .preds(), trees = 1000) %>% - set_engine("ranger") %>% - fit( - log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, - data = ames_train - ) -#> parsnip model object -#> -#> Ranger result -#> -#> Call: -#> ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~.preds(), x), num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) -#> -#> Type: Regression -#> Number of trees: 1000 -#> Sample size: 2197 -#> Number of independent variables: 5 -#> Mtry: 5 -#> Target node size: 5 -#> Variable importance mode: none -#> Splitrule: variance -#> OOB prediction error (MSE): 0.00867 -#> R squared (OOB): 0.718 -``` - - -## Regularized regression - -A linear model might work for this data set as well. We can use the `linear_reg()` parsnip model. There are two engines that can perform regularization/penalization, the glmnet and sparklyr packages. Let's use the former here. The glmnet package only implements a non-formula method, but parsnip will allow either one to be used. - -When regularization is used, the predictors should first be centered and scaled before being passed to the model. The formula method won't do that automatically so we will need to do this ourselves. We'll use the [recipes](https://recipes.tidymodels.org/) package for these steps. - - -```r -norm_recipe <- - recipe( - Sale_Price ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, - data = ames_train - ) %>% - step_other(Neighborhood) %>% - step_dummy(all_nominal()) %>% - step_center(all_predictors()) %>% - step_scale(all_predictors()) %>% - step_log(Sale_Price, base = 10) %>% - # estimate the means and standard deviations - prep(training = ames_train, retain = TRUE) - -# Now let's fit the model using the processed version of the data - -glmn_fit <- - linear_reg(penalty = 0.001, mixture = 0.5) %>% - set_engine("glmnet") %>% - fit(Sale_Price ~ ., data = bake(norm_recipe, new_data = NULL)) -glmn_fit -#> parsnip model object -#> -#> -#> Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian", alpha = ~0.5) -#> -#> Df %Dev Lambda -#> 1 0 0.0 0.1380 -#> 2 1 2.0 0.1260 -#> 3 1 3.7 0.1150 -#> 4 1 5.3 0.1050 -#> 5 2 7.1 0.0953 -#> 6 3 9.6 0.0869 -#> 7 4 12.6 0.0791 -#> 8 5 15.4 0.0721 -#> 9 5 17.9 0.0657 -#> 10 7 20.8 0.0599 -#> 11 7 23.5 0.0545 -#> 12 7 25.8 0.0497 -#> 13 8 28.2 0.0453 -#> 14 8 30.3 0.0413 -#> 15 8 32.1 0.0376 -#> 16 8 33.7 0.0343 -#> 17 8 35.0 0.0312 -#> 18 8 36.1 0.0284 -#> 19 8 37.0 0.0259 -#> 20 9 37.9 0.0236 -#> 21 9 38.6 0.0215 -#> 22 9 39.3 0.0196 -#> 23 9 39.8 0.0179 -#> 24 9 40.3 0.0163 -#> 25 10 40.7 0.0148 -#> 26 11 41.1 0.0135 -#> 27 11 41.4 0.0123 -#> 28 11 41.6 0.0112 -#> 29 11 41.9 0.0102 -#> 30 12 42.1 0.0093 -#> 31 12 42.3 0.0085 -#> 32 12 42.4 0.0077 -#> 33 12 42.6 0.0070 -#> 34 12 42.7 0.0064 -#> 35 12 42.8 0.0059 -#> 36 12 42.8 0.0053 -#> 37 12 42.9 0.0049 -#> 38 12 43.0 0.0044 -#> 39 12 43.0 0.0040 -#> 40 12 43.0 0.0037 -#> 41 12 43.1 0.0034 -#> 42 12 43.1 0.0031 -#> 43 12 43.1 0.0028 -#> 44 12 43.1 0.0025 -#> 45 12 43.1 0.0023 -#> 46 12 43.2 0.0021 -#> 47 12 43.2 0.0019 -#> 48 12 43.2 0.0018 -#> 49 12 43.2 0.0016 -#> 50 12 43.2 0.0014 -#> 51 12 43.2 0.0013 -#> 52 12 43.2 0.0012 -#> 53 12 43.2 0.0011 -#> 54 12 43.2 0.0010 -#> 55 12 43.2 0.0009 -#> 56 12 43.2 0.0008 -#> 57 12 43.2 0.0008 -#> 58 12 43.2 0.0007 -#> 59 12 43.2 0.0006 -#> 60 12 43.2 0.0006 -#> 61 12 43.2 0.0005 -#> 62 12 43.2 0.0005 -#> 63 12 43.2 0.0004 -#> 64 12 43.2 0.0004 -#> 65 12 43.2 0.0004 -``` - -If `penalty` were not specified, all of the `lambda` values would be computed. - -To get the predictions for this specific value of `lambda` (aka `penalty`): - - -```r -# First, get the processed version of the test set predictors: -test_normalized <- bake(norm_recipe, new_data = ames_test, all_predictors()) - -test_results <- - test_results %>% - rename(`random forest` = .pred) %>% - bind_cols( - predict(glmn_fit, new_data = test_normalized) %>% - rename(glmnet = .pred) - ) -test_results -#> # A tibble: 733 × 3 -#> Sale_Price `random forest` glmnet -#> -#> 1 5.39 5.25 5.16 -#> 2 5.28 5.29 5.27 -#> 3 5.23 5.26 5.24 -#> 4 5.21 5.30 5.24 -#> 5 5.60 5.51 5.24 -#> 6 5.32 5.29 5.26 -#> 7 5.17 5.14 5.18 -#> 8 5.06 5.13 5.17 -#> 9 4.98 5.01 5.18 -#> 10 5.11 5.14 5.19 -#> # … with 723 more rows - -test_results %>% metrics(truth = Sale_Price, estimate = glmnet) -#> # A tibble: 3 × 3 -#> .metric .estimator .estimate -#> -#> 1 rmse standard 0.142 -#> 2 rsq standard 0.391 -#> 3 mae standard 0.0979 - -test_results %>% - gather(model, prediction, -Sale_Price) %>% - ggplot(aes(x = prediction, y = Sale_Price)) + - geom_abline(col = "green", lty = 2) + - geom_point(alpha = .4) + - facet_wrap(~model) + - coord_fixed() -``` - - - -This final plot compares the performance of the random forest and regularized regression models. - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> glmnet * 4.1-6 2022-11-27 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> randomForest * 4.7-1.1 2022-05-23 [1] CRAN (R 4.2.0) -#> ranger * 0.14.1 2022-06-18 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/models/pls/index.Rmarkdown b/content/learn/models/pls/index.Rmarkdown deleted file mode 100755 index 609ea6e7..00000000 --- a/content/learn/models/pls/index.Rmarkdown +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: "Multivariate analysis using partial least squares" -tags: [recipes,rsample] -categories: [pre-processing] -type: learn-subsection -weight: 6 -description: | - Build and fit a predictive model with more than one outcome. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE} -library(pls) -library(tidymodels) -library(sessioninfo) -pkgs <- c("modeldata", "pls", "tidymodels") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - - -## Introduction - -`r req_pkgs(pkgs)` - -"Multivariate analysis" usually refers to multiple _outcomes_ being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns `y1` and `y2` representing two outcomes to be predicted. The `lm()` function would look something like: - -```{r lm, eval = FALSE} -lm(cbind(y1, y2) ~ ., data = dat) -``` - -This `cbind()` call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes. - -The data that we'll use has three outcomes. From `?modeldata::meats`: - -> "These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents. - -> "For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is `-log10` of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry." - -The goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here. - -To start, let's take the two data matrices (called `endpoints` and `absorp`) and bind them together in a data frame: - -```{r data} -library(modeldata) -data(meats) -``` - -The three _outcomes_ have fairly high correlations also. - -## Preprocessing the data - -If the outcomes can be predicted using a linear model, partial least squares (PLS) is an ideal method. PLS models the data as a function of a set of unobserved _latent_ variables that are derived in a manner similar to principal component analysis (PCA). - -PLS, unlike PCA, also incorporates the outcome data when creating the PLS components. Like PCA, it tries to maximize the variance of the predictors that are explained by the components but it also tries to simultaneously maximize the correlation between those components and the outcomes. In this way, PLS _chases_ variation of the predictors and outcomes. - -Since we are working with variances and covariances, we need to standardize the data. The recipe will center and scale all of the variables. - -Many base R functions that deal with multivariate outcomes using a formula require the use of `cbind()` on the left-hand side of the formula to work with the traditional formula methods. In tidymodels, recipes do not; the outcomes can be symbolically "added" together on the left-hand side: - -```{r recipe} -norm_rec <- - recipe(water + fat + protein ~ ., data = meats) %>% - step_normalize(everything()) -``` - -Before we can finalize the PLS model, the number of PLS components to retain must be determined. This can be done using performance metrics such as the root mean squared error. However, we can also calculate the proportion of variance explained by the components for the _predictors and each of the outcomes_. This allows an informed choice to be made based on the level of evidence that the situation requires. - -Since the data set isn't large, let's use resampling to measure these proportions. With ten repeats of 10-fold cross-validation, we build the PLS model on 90% of the data and evaluate on the heldout 10%. For each of the 100 models, we extract and save the proportions. - -The folds can be created using the [rsample](https://rsample.tidymodels.org/) package and the recipe can be estimated for each resample using the [`prepper()`](https://rsample.tidymodels.org/reference/prepper.html) function: - -```{r cv} -set.seed(57343) -folds <- vfold_cv(meats, repeats = 10) - -folds <- - folds %>% - mutate(recipes = map(splits, prepper, recipe = norm_rec)) -``` - -## Partial least squares - -The complicated parts for moving forward are: - -1. Formatting the predictors and outcomes into the format that the pls package requires, and -2. Estimating the proportions. - -For the first part, the standardized outcomes and predictors need to be formatted into two separate matrices. Since we used `retain = TRUE` when prepping the recipes, we can `bake()` with `new_data = NULl` to get the processed data back out. To save the data as a matrix, the option `composition = "matrix"` will avoid saving the data as tibbles and use the required format. - -The pls package expects a simple formula to specify the model, but each side of the formula should _represent a matrix_. In other words, we need a data set with two columns where each column is a matrix. The secret to doing this is to "protect" the two matrices using `I()` when adding them to the data frame. - -The calculation for the proportion of variance explained is straightforward for the predictors; the function `pls::explvar()` will compute that. For the outcomes, the process is more complicated. A ready-made function to compute these is not obvious but there is some code inside of the summary function to do the computation (see below). - -The function `get_var_explained()` shown here will do all these computations and return a data frame with columns `components`, `source` (for the predictors, water, etc), and the `proportion` of variance that is explained by the components. - - -```{r var-explained} -library(pls) - -get_var_explained <- function(recipe, ...) { - - # Extract the predictors and outcomes into their own matrices - y_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_outcomes()) - x_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_predictors()) - - # The pls package prefers the data in a data frame where the outcome - # and predictors are in _matrices_. To make sure this is formatted - # properly, use the `I()` function to inhibit `data.frame()` from making - # all the individual columns. `pls_format` should have two columns. - pls_format <- data.frame( - endpoints = I(y_mat), - measurements = I(x_mat) - ) - # Fit the model - mod <- plsr(endpoints ~ measurements, data = pls_format) - - # Get the proportion of the predictor variance that is explained - # by the model for different number of components. - xve <- explvar(mod)/100 - - # To do the same for the outcome, it is more complex. This code - # was extracted from pls:::summary.mvr. - explained <- - pls::R2(mod, estimate = "train", intercept = FALSE)$val %>% - # subset array to matrix. abind::adrop() prevents turning it into a - # vector if dim()[2] == 1 - abind::adrop(drop = 1) %>% - # transpose so that components are in rows - t() %>% - as_tibble() %>% - # Add the predictor proportions - mutate(predictors = cumsum(xve) %>% as.vector(), - components = seq_along(xve)) %>% - # Put into a tidy format that is tall - pivot_longer( - cols = c(-components), - names_to = "source", - values_to = "proportion" - ) -} -``` - -We compute this data frame for each resample and save the results in the different columns. - -```{r get-estimates} -folds <- - folds %>% - mutate(var = map(recipes, get_var_explained), - var = unname(var)) -``` - -To extract and aggregate these data, simple row binding can be used to stack the data vertically. Most of the action happens in the first 15 components so let's filter the data and compute the _average_ proportion. - -```{r collapse-and-average} -variance_data <- - bind_rows(folds[["var"]]) %>% - filter(components <= 15) %>% - group_by(components, source) %>% - summarize(proportion = mean(proportion)) -``` - -The plot below shows that, if the protein measurement is important, you might require 10 or so components to achieve a good representation of that outcome. Note that the predictor variance is captured extremely well using a single component. This is due to the high degree of correlation in those data. - -```{r plot, fig.width=6, fig.height=4.25, out.width = '100%'} -ggplot(variance_data, aes(x = components, y = proportion, col = source)) + - geom_line(alpha = 0.5, linewidth = 1.2) + - geom_point() -``` - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/models/pls/index.markdown b/content/learn/models/pls/index.markdown deleted file mode 100644 index 7022d158..00000000 --- a/content/learn/models/pls/index.markdown +++ /dev/null @@ -1,218 +0,0 @@ ---- -title: "Multivariate analysis using partial least squares" -tags: [recipes,rsample] -categories: [pre-processing] -type: learn-subsection -weight: 6 -description: | - Build and fit a predictive model with more than one outcome. ---- - - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: modeldata, pls, and tidymodels. - -"Multivariate analysis" usually refers to multiple _outcomes_ being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns `y1` and `y2` representing two outcomes to be predicted. The `lm()` function would look something like: - - -```r -lm(cbind(y1, y2) ~ ., data = dat) -``` - -This `cbind()` call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes. - -The data that we'll use has three outcomes. From `?modeldata::meats`: - -> "These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents. - -> "For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is `-log10` of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry." - -The goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here. - -To start, let's take the two data matrices (called `endpoints` and `absorp`) and bind them together in a data frame: - - -```r -library(modeldata) -data(meats) -``` - -The three _outcomes_ have fairly high correlations also. - -## Preprocessing the data - -If the outcomes can be predicted using a linear model, partial least squares (PLS) is an ideal method. PLS models the data as a function of a set of unobserved _latent_ variables that are derived in a manner similar to principal component analysis (PCA). - -PLS, unlike PCA, also incorporates the outcome data when creating the PLS components. Like PCA, it tries to maximize the variance of the predictors that are explained by the components but it also tries to simultaneously maximize the correlation between those components and the outcomes. In this way, PLS _chases_ variation of the predictors and outcomes. - -Since we are working with variances and covariances, we need to standardize the data. The recipe will center and scale all of the variables. - -Many base R functions that deal with multivariate outcomes using a formula require the use of `cbind()` on the left-hand side of the formula to work with the traditional formula methods. In tidymodels, recipes do not; the outcomes can be symbolically "added" together on the left-hand side: - - -```r -norm_rec <- - recipe(water + fat + protein ~ ., data = meats) %>% - step_normalize(everything()) -``` - -Before we can finalize the PLS model, the number of PLS components to retain must be determined. This can be done using performance metrics such as the root mean squared error. However, we can also calculate the proportion of variance explained by the components for the _predictors and each of the outcomes_. This allows an informed choice to be made based on the level of evidence that the situation requires. - -Since the data set isn't large, let's use resampling to measure these proportions. With ten repeats of 10-fold cross-validation, we build the PLS model on 90% of the data and evaluate on the heldout 10%. For each of the 100 models, we extract and save the proportions. - -The folds can be created using the [rsample](https://rsample.tidymodels.org/) package and the recipe can be estimated for each resample using the [`prepper()`](https://rsample.tidymodels.org/reference/prepper.html) function: - - -```r -set.seed(57343) -folds <- vfold_cv(meats, repeats = 10) - -folds <- - folds %>% - mutate(recipes = map(splits, prepper, recipe = norm_rec)) -``` - -## Partial least squares - -The complicated parts for moving forward are: - -1. Formatting the predictors and outcomes into the format that the pls package requires, and -2. Estimating the proportions. - -For the first part, the standardized outcomes and predictors need to be formatted into two separate matrices. Since we used `retain = TRUE` when prepping the recipes, we can `bake()` with `new_data = NULl` to get the processed data back out. To save the data as a matrix, the option `composition = "matrix"` will avoid saving the data as tibbles and use the required format. - -The pls package expects a simple formula to specify the model, but each side of the formula should _represent a matrix_. In other words, we need a data set with two columns where each column is a matrix. The secret to doing this is to "protect" the two matrices using `I()` when adding them to the data frame. - -The calculation for the proportion of variance explained is straightforward for the predictors; the function `pls::explvar()` will compute that. For the outcomes, the process is more complicated. A ready-made function to compute these is not obvious but there is some code inside of the summary function to do the computation (see below). - -The function `get_var_explained()` shown here will do all these computations and return a data frame with columns `components`, `source` (for the predictors, water, etc), and the `proportion` of variance that is explained by the components. - - - -```r -library(pls) - -get_var_explained <- function(recipe, ...) { - - # Extract the predictors and outcomes into their own matrices - y_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_outcomes()) - x_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_predictors()) - - # The pls package prefers the data in a data frame where the outcome - # and predictors are in _matrices_. To make sure this is formatted - # properly, use the `I()` function to inhibit `data.frame()` from making - # all the individual columns. `pls_format` should have two columns. - pls_format <- data.frame( - endpoints = I(y_mat), - measurements = I(x_mat) - ) - # Fit the model - mod <- plsr(endpoints ~ measurements, data = pls_format) - - # Get the proportion of the predictor variance that is explained - # by the model for different number of components. - xve <- explvar(mod)/100 - - # To do the same for the outcome, it is more complex. This code - # was extracted from pls:::summary.mvr. - explained <- - pls::R2(mod, estimate = "train", intercept = FALSE)$val %>% - # subset array to matrix. abind::adrop() prevents turning it into a - # vector if dim()[2] == 1 - abind::adrop(drop = 1) %>% - # transpose so that components are in rows - t() %>% - as_tibble() %>% - # Add the predictor proportions - mutate(predictors = cumsum(xve) %>% as.vector(), - components = seq_along(xve)) %>% - # Put into a tidy format that is tall - pivot_longer( - cols = c(-components), - names_to = "source", - values_to = "proportion" - ) -} -``` - -We compute this data frame for each resample and save the results in the different columns. - - -```r -folds <- - folds %>% - mutate(var = map(recipes, get_var_explained), - var = unname(var)) -``` - -To extract and aggregate these data, simple row binding can be used to stack the data vertically. Most of the action happens in the first 15 components so let's filter the data and compute the _average_ proportion. - - -```r -variance_data <- - bind_rows(folds[["var"]]) %>% - filter(components <= 15) %>% - group_by(components, source) %>% - summarize(proportion = mean(proportion)) -``` - -The plot below shows that, if the protein measurement is important, you might require 10 or so components to achieve a good representation of that outcome. Note that the predictor variance is captured extremely well using a single component. This is due to the high degree of correlation in those data. - - -```r -ggplot(variance_data, aes(x = components, y = proportion, col = source)) + - geom_line(alpha = 0.5, linewidth = 1.2) + - geom_point() -``` - - - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Monterey 12.6 -#> system aarch64, darwin20 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2023-02-17 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.3 2023-01-25 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.1) -#> dplyr * 1.1.0 2023-01-29 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.1 2023-02-10 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> pls * 2.8-1 2022-07-16 [1] CRAN (R 4.2.0) -#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.0) -#> recipes * 1.0.4 2023-01-11 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.2/library -#> [2] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/models/sub-sampling/figs/merge-metrics-1.svg b/content/learn/models/sub-sampling/figs/merge-metrics-1.svg deleted file mode 100644 index 7800eeb3..00000000 --- a/content/learn/models/sub-sampling/figs/merge-metrics-1.svg +++ /dev/null @@ -1,216 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -j_index - - - - - - - - - - -roc_auc - - - - -no_sampling -rose - - -no_sampling -rose -0.85 -0.90 -0.95 -1.00 - - - - -0.00 -0.25 -0.50 -0.75 - - - - -sampling -.estimate - - diff --git a/content/learn/models/sub-sampling/index.Rmarkdown b/content/learn/models/sub-sampling/index.Rmarkdown deleted file mode 100755 index 0c8c14ad..00000000 --- a/content/learn/models/sub-sampling/index.Rmarkdown +++ /dev/null @@ -1,170 +0,0 @@ ---- -title: "Subsampling for class imbalances" -tags: [recipes, themis, discrim, parsnip] -categories: [model fitting, pre-processing] -type: learn-subsection -weight: 3 -description: | - Improve model performance in imbalanced data sets through undersampling or oversampling. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(readr) -library(klaR) -library(tidymodels) -library(discrim) -library(themis) -library(ROSE) - -pkgs <- c("tidymodels", "klaR", "themis", "discrim", "readr", "ROSE") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - - -## Introduction - -`r req_pkgs(pkgs)` - -Subsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance. - -This article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/measuring-performance.html) for more information on these metrics. - -## Simulated data - -Consider a two-class problem where the first class has a very low rate of occurrence. The data were simulated and can be imported into R using the code below: - -```{r load-data, message = FALSE} -imbal_data <- - readr::read_csv("https://bit.ly/imbal_data") %>% - mutate(Class = factor(Class)) -dim(imbal_data) -table(imbal_data$Class) -``` - -If "Class1" is the event of interest, it is very likely that a classification model would be able to achieve very good _specificity_ since almost all of the data are of the second class. _Sensitivity_, however, would likely be poor since the models will optimize accuracy (or other loss functions) by predicting everything to be the majority class. - -One result of class imbalance when there are two classes is that the default probability cutoff of 50% is inappropriate; a different cutoff that is more extreme might be able to achieve good performance. - -## Subsampling the data - -One way to alleviate this issue is to _subsample_ the data. There are a number of ways to do this but the most simple one is to _sample down_ (undersample) the majority class data until it occurs with the same frequency as the minority class. While it may seem counterintuitive, throwing out a large percentage of your data can be effective at producing a useful model that can recognize both the majority and minority classes. In some cases, this even means that the overall performance of the model is better (e.g. improved area under the ROC curve). However, subsampling almost always produces models that are _better calibrated_, meaning that the distributions of the class probabilities are more well behaved. As a result, the default 50% cutoff is much more likely to produce better sensitivity and specificity values than they would otherwise. - -Let's explore subsampling using `themis::step_rose()` in a recipe for the simulated data. It uses the ROSE (random over sampling examples) method from [Menardi, G. and Torelli, N. (2014)](https://scholar.google.com/scholar?hl=en&q=%22training+and+assessing+classification+rules+with+imbalanced+data%22). This is an example of an oversampling strategy, rather than undersampling. - -In terms of workflow: - - * It is extremely important that subsampling occurs _inside of resampling_. Otherwise, the resampling process can produce [poor estimates of model performance](https://topepo.github.io/caret/subsampling-for-class-imbalances.html#resampling). - * The subsampling process should only be applied to the analysis set. The assessment set should reflect the event rates seen "in the wild" and, for this reason, the `skip` argument to `step_downsample()` and other subsampling recipes steps has a default of `TRUE`. - -Here is a simple recipe implementing oversampling: - -```{r rec} -library(tidymodels) -library(themis) -imbal_rec <- - recipe(Class ~ ., data = imbal_data) %>% - step_rose(Class, seed = 1234) -``` - -For a model, let's use a [quadratic discriminant analysis](https://en.wikipedia.org/wiki/Quadratic_classifier#Quadratic_discriminant_analysis) (QDA) model. From the discrim package, this model can be specified using: - -```{r qda} -library(discrim) -qda_mod <- - discrim_regularized(frac_common_cov = 0, frac_identity = 0) %>% - set_engine("klaR") -``` - -To keep these objects bound together, they can be combined in a [workflow](https://workflows.tidymodels.org/): - -```{r wflw} -qda_rose_wflw <- - workflow() %>% - add_model(qda_mod) %>% - add_recipe(imbal_rec) -qda_rose_wflw -``` - -## Model performance - -Stratified, repeated 10-fold cross-validation is used to resample the model: - -```{r cv} -set.seed(5732) -cv_folds <- vfold_cv(imbal_data, strata = "Class", repeats = 5) -``` - -To measure model performance, let's use two metrics: - - * The area under the [ROC curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) is an overall assessment of performance across _all_ cutoffs. Values near one indicate very good results while values near 0.5 would imply that the model is very poor. - * The _J_ index (a.k.a. [Youden's _J_](https://en.wikipedia.org/wiki/Youden%27s_J_statistic) statistic) is `sensitivity + specificity - 1`. Values near one are once again best. - -If a model is poorly calibrated, the ROC curve value might not show diminished performance. However, the _J_ index would be lower for models with pathological distributions for the class probabilities. The yardstick package will be used to compute these metrics. - -```{r metrics} -cls_metrics <- metric_set(roc_auc, j_index) -``` - -Now, we train the models and generate the results using `tune::fit_resamples()`: - -```{r resample-rose, message=FALSE} -set.seed(2180) -qda_rose_res <- fit_resamples( - qda_rose_wflw, - resamples = cv_folds, - metrics = cls_metrics -) - -collect_metrics(qda_rose_res) -``` - -What do the results look like without using ROSE? We can create another workflow and fit the QDA model along the same resamples: - -```{r qda-only} -qda_wflw <- - workflow() %>% - add_model(qda_mod) %>% - add_formula(Class ~ .) - -set.seed(2180) -qda_only_res <- fit_resamples(qda_wflw, resamples = cv_folds, metrics = cls_metrics) -collect_metrics(qda_only_res) -``` - -It looks like ROSE helped a lot, especially with the J-index. Class imbalance sampling methods tend to greatly improve metrics based on the hard class predictions (i.e., the categorical predictions) because the default cutoff tends to be a better balance of sensitivity and specificity. - -Let's plot the metrics for each resample to see how the individual results changed. - -```{r merge-metrics} -no_sampling <- - qda_only_res %>% - collect_metrics(summarize = FALSE) %>% - dplyr::select(-.estimator) %>% - mutate(sampling = "no_sampling") - -with_sampling <- - qda_rose_res %>% - collect_metrics(summarize = FALSE) %>% - dplyr::select(-.estimator) %>% - mutate(sampling = "rose") - -bind_rows(no_sampling, with_sampling) %>% - mutate(label = paste(id2, id)) %>% - ggplot(aes(x = sampling, y = .estimate, group = label)) + - geom_line(alpha = .4) + - facet_wrap(~ .metric, scales = "free_y") -``` - -This visually demonstrates that the subsampling mostly affects metrics that use the hard class predictions. - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/models/sub-sampling/index.markdown b/content/learn/models/sub-sampling/index.markdown deleted file mode 100644 index f5f983cb..00000000 --- a/content/learn/models/sub-sampling/index.markdown +++ /dev/null @@ -1,238 +0,0 @@ ---- -title: "Subsampling for class imbalances" -tags: [recipes, themis, discrim, parsnip] -categories: [model fitting, pre-processing] -type: learn-subsection -weight: 3 -description: | - Improve model performance in imbalanced data sets through undersampling or oversampling. ---- - - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: discrim, klaR, readr, ROSE, themis, and tidymodels. - -Subsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance. - -This article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/measuring-performance.html) for more information on these metrics. - -## Simulated data - -Consider a two-class problem where the first class has a very low rate of occurrence. The data were simulated and can be imported into R using the code below: - - -```r -imbal_data <- - readr::read_csv("https://bit.ly/imbal_data") %>% - mutate(Class = factor(Class)) -dim(imbal_data) -#> [1] 1200 16 -table(imbal_data$Class) -#> -#> Class1 Class2 -#> 60 1140 -``` - -If "Class1" is the event of interest, it is very likely that a classification model would be able to achieve very good _specificity_ since almost all of the data are of the second class. _Sensitivity_, however, would likely be poor since the models will optimize accuracy (or other loss functions) by predicting everything to be the majority class. - -One result of class imbalance when there are two classes is that the default probability cutoff of 50% is inappropriate; a different cutoff that is more extreme might be able to achieve good performance. - -## Subsampling the data - -One way to alleviate this issue is to _subsample_ the data. There are a number of ways to do this but the most simple one is to _sample down_ (undersample) the majority class data until it occurs with the same frequency as the minority class. While it may seem counterintuitive, throwing out a large percentage of your data can be effective at producing a useful model that can recognize both the majority and minority classes. In some cases, this even means that the overall performance of the model is better (e.g. improved area under the ROC curve). However, subsampling almost always produces models that are _better calibrated_, meaning that the distributions of the class probabilities are more well behaved. As a result, the default 50% cutoff is much more likely to produce better sensitivity and specificity values than they would otherwise. - -Let's explore subsampling using `themis::step_rose()` in a recipe for the simulated data. It uses the ROSE (random over sampling examples) method from [Menardi, G. and Torelli, N. (2014)](https://scholar.google.com/scholar?hl=en&q=%22training+and+assessing+classification+rules+with+imbalanced+data%22). This is an example of an oversampling strategy, rather than undersampling. - -In terms of workflow: - - * It is extremely important that subsampling occurs _inside of resampling_. Otherwise, the resampling process can produce [poor estimates of model performance](https://topepo.github.io/caret/subsampling-for-class-imbalances.html#resampling). - * The subsampling process should only be applied to the analysis set. The assessment set should reflect the event rates seen "in the wild" and, for this reason, the `skip` argument to `step_downsample()` and other subsampling recipes steps has a default of `TRUE`. - -Here is a simple recipe implementing oversampling: - - -```r -library(tidymodels) -library(themis) -imbal_rec <- - recipe(Class ~ ., data = imbal_data) %>% - step_rose(Class, seed = 1234) -``` - -For a model, let's use a [quadratic discriminant analysis](https://en.wikipedia.org/wiki/Quadratic_classifier#Quadratic_discriminant_analysis) (QDA) model. From the discrim package, this model can be specified using: - - -```r -library(discrim) -qda_mod <- - discrim_regularized(frac_common_cov = 0, frac_identity = 0) %>% - set_engine("klaR") -``` - -To keep these objects bound together, they can be combined in a [workflow](https://workflows.tidymodels.org/): - - -```r -qda_rose_wflw <- - workflow() %>% - add_model(qda_mod) %>% - add_recipe(imbal_rec) -qda_rose_wflw -#> ══ Workflow ══════════════════════════════════════════════════════════ -#> Preprocessor: Recipe -#> Model: discrim_regularized() -#> -#> ── Preprocessor ────────────────────────────────────────────────────── -#> 1 Recipe Step -#> -#> • step_rose() -#> -#> ── Model ───────────────────────────────────────────────────────────── -#> Regularized Discriminant Model Specification (classification) -#> -#> Main Arguments: -#> frac_common_cov = 0 -#> frac_identity = 0 -#> -#> Computational engine: klaR -``` - -## Model performance - -Stratified, repeated 10-fold cross-validation is used to resample the model: - - -```r -set.seed(5732) -cv_folds <- vfold_cv(imbal_data, strata = "Class", repeats = 5) -``` - -To measure model performance, let's use two metrics: - - * The area under the [ROC curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) is an overall assessment of performance across _all_ cutoffs. Values near one indicate very good results while values near 0.5 would imply that the model is very poor. - * The _J_ index (a.k.a. [Youden's _J_](https://en.wikipedia.org/wiki/Youden%27s_J_statistic) statistic) is `sensitivity + specificity - 1`. Values near one are once again best. - -If a model is poorly calibrated, the ROC curve value might not show diminished performance. However, the _J_ index would be lower for models with pathological distributions for the class probabilities. The yardstick package will be used to compute these metrics. - - -```r -cls_metrics <- metric_set(roc_auc, j_index) -``` - -Now, we train the models and generate the results using `tune::fit_resamples()`: - - -```r -set.seed(2180) -qda_rose_res <- fit_resamples( - qda_rose_wflw, - resamples = cv_folds, - metrics = cls_metrics -) - -collect_metrics(qda_rose_res) -#> # A tibble: 2 × 6 -#> .metric .estimator mean n std_err .config -#> -#> 1 j_index binary 0.768 50 0.0214 Preprocessor1_Model1 -#> 2 roc_auc binary 0.951 50 0.00509 Preprocessor1_Model1 -``` - -What do the results look like without using ROSE? We can create another workflow and fit the QDA model along the same resamples: - - -```r -qda_wflw <- - workflow() %>% - add_model(qda_mod) %>% - add_formula(Class ~ .) - -set.seed(2180) -qda_only_res <- fit_resamples(qda_wflw, resamples = cv_folds, metrics = cls_metrics) -collect_metrics(qda_only_res) -#> # A tibble: 2 × 6 -#> .metric .estimator mean n std_err .config -#> -#> 1 j_index binary 0.250 50 0.0288 Preprocessor1_Model1 -#> 2 roc_auc binary 0.953 50 0.00479 Preprocessor1_Model1 -``` - -It looks like ROSE helped a lot, especially with the J-index. Class imbalance sampling methods tend to greatly improve metrics based on the hard class predictions (i.e., the categorical predictions) because the default cutoff tends to be a better balance of sensitivity and specificity. - -Let's plot the metrics for each resample to see how the individual results changed. - - -```r -no_sampling <- - qda_only_res %>% - collect_metrics(summarize = FALSE) %>% - dplyr::select(-.estimator) %>% - mutate(sampling = "no_sampling") - -with_sampling <- - qda_rose_res %>% - collect_metrics(summarize = FALSE) %>% - dplyr::select(-.estimator) %>% - mutate(sampling = "rose") - -bind_rows(no_sampling, with_sampling) %>% - mutate(label = paste(id2, id)) %>% - ggplot(aes(x = sampling, y = .estimate, group = label)) + - geom_line(alpha = .4) + - facet_wrap(~ .metric, scales = "free_y") -``` - - - -This visually demonstrates that the subsampling mostly affects metrics that use the hard class predictions. - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> discrim * 1.0.0 2022-06-23 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> klaR * 1.7-1 2022-06-27 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> readr * 2.1.3 2022-10-01 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> ROSE * 0.0-4 2021-06-14 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> themis * 1.0.0 2022-07-02 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/models/time-series/index.Rmarkdown b/content/learn/models/time-series/index.Rmarkdown deleted file mode 100755 index 2fcb1688..00000000 --- a/content/learn/models/time-series/index.Rmarkdown +++ /dev/null @@ -1,201 +0,0 @@ ---- -title: "Modeling time series with tidy resampling" -tags: [rsample] -categories: [model fitting, resampling] -type: learn-subsection -weight: 4 -description: | - Calculate performance estimates for time series forecasts using resampling. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(timetk) -library(forecast) -library(tidymodels) -library(sweep) -library(zoo) -pkgs <- c("tidymodels", "timetk", "forecast", "sweep", "zoo") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - - -## Introduction - -`r req_pkgs(pkgs)` - -"[Demo Week: Tidy Forecasting with sweep](https://www.business-science.io/code-tools/2017/10/25/demo_week_sweep.html)" is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using [rolling forecast origin resampling](https://robjhyndman.com/hyndsight/crossvalidation/). - -## Example data - -The data for this article are sales of alcoholic beverages originally from [the Federal Reserve Bank of St. Louis website](https://fred.stlouisfed.org/series/S4248SM144NCEN). - -```{r read-data} -library(tidymodels) -library(modeldata) -data("drinks") -glimpse(drinks) -``` - -Each row represents one month of sales (in millions of US dollars). - -## Time series resampling - -Suppose that we need predictions for one year ahead and our model should use the most recent data from the last 20 years. To set up this resampling scheme: - -```{r rof} -roll_rs <- rolling_origin( - drinks, - initial = 12 * 20, - assess = 12, - cumulative = FALSE - ) - -nrow(roll_rs) - -roll_rs -``` - -Each `split` element contains the information about that resample: - -```{r split} -roll_rs$splits[[1]] -``` - -For plotting, let's index each split by the first day of the assessment set: - -```{r labels} -get_date <- function(x) { - min(assessment(x)$date) -} - -start_date <- map(roll_rs$splits, get_date) -roll_rs$start_date <- do.call("c", start_date) -head(roll_rs$start_date) -``` - -This resampling scheme has `r nrow(roll_rs)` splits of the data so that there will be `r nrow(roll_rs)` ARIMA models that are fit. To create the models, we use the `auto.arima()` function from the forecast package. The rsample functions `analysis()` and `assessment()` return a data frame, so another step converts the data to a `ts` object called `mod_dat` using a function in the timetk package. - -```{r model-fun} -library(forecast) # for `auto.arima` -library(timetk) # for `tk_ts` -library(zoo) # for `as.yearmon` - -fit_model <- function(x, ...) { - # suggested by Matt Dancho: - x %>% - analysis() %>% - # Since the first day changes over resamples, adjust it - # based on the first date value in the data frame - tk_ts(start = .$date[[1]] %>% as.yearmon(), - frequency = 12, - silent = TRUE) %>% - auto.arima(...) -} -``` - -Save each model in a new column: - -```{r model-fit, warning = FALSE, message = FALSE} -roll_rs$arima <- map(roll_rs$splits, fit_model) - -# For example: -roll_rs$arima[[1]] -``` - -(There are some warnings produced by these regarding extra columns in the data that can be ignored.) - -## Model performance - -Using the model fits, let's measure performance in two ways: - - * _Interpolation_ error will measure how well the model fits to the data that were used to create the model. This is most likely optimistic since no holdout method is used. - * _Extrapolation_ or _forecast_ error evaluates the performance of the model on the data from the following year (that were not used in the model fit). - -In each case, the mean absolute percent error (MAPE) is the statistic used to characterize the model fits. The interpolation error can be computed from the `Arima` object. To make things easy, let's use the sweep package's `sw_glance()` function: - -```{r interp} -library(sweep) - -roll_rs$interpolation <- map_dbl( - roll_rs$arima, - function(x) - sw_glance(x)[["MAPE"]] - ) - -summary(roll_rs$interpolation) -``` - -For the extrapolation error, the model and split objects are required. Using these: - -```{r extrap} -get_extrap <- function(split, mod) { - n <- nrow(assessment(split)) - # Get assessment data - pred_dat <- assessment(split) %>% - mutate( - pred = as.vector(forecast(mod, h = n)$mean), - pct_error = ( S4248SM144NCEN - pred ) / S4248SM144NCEN * 100 - ) - mean(abs(pred_dat$pct_error)) -} - -roll_rs$extrapolation <- - map2_dbl(roll_rs$splits, roll_rs$arima, get_extrap) - -summary(roll_rs$extrapolation) -``` - -What do these error estimates look like over time? - -```{r plot} -roll_rs %>% - select(interpolation, extrapolation, start_date) %>% - pivot_longer(cols = matches("ation"), names_to = "error", values_to = "MAPE") %>% - ggplot(aes(x = start_date, y = MAPE, col = error)) + - geom_point() + - geom_line() -``` - -It is likely that the interpolation error is an underestimate to some degree, as mentioned above. - -It is also worth noting that `rolling_origin()` can be used over calendar periods, rather than just over a fixed window size. This is especially useful for irregular series where a fixed window size might not make sense because of missing data points, or because of calendar features like different months having a different number of days. - -The example below demonstrates this idea by splitting `drinks` into a nested set of 26 years, and rolling over years rather than months. Note that the end result accomplishes a different task than the original example; in this new case, each slice moves forward an entire year, rather than just one month. - -```{r rof-annual} -# The idea is to nest by the period to roll over, -# which in this case is the year. -roll_rs_annual <- drinks %>% - mutate(year = as.POSIXlt(date)$year + 1900) %>% - nest(data = c(date, S4248SM144NCEN)) %>% - rolling_origin( - initial = 20, - assess = 1, - cumulative = FALSE - ) - -analysis(roll_rs_annual$splits[[1]]) -``` - -The workflow to access these calendar slices is to use `bind_rows()` to join -each analysis set together. - -```{r} -mutate( - roll_rs_annual, - extracted_slice = map(splits, ~ bind_rows(analysis(.x)$data)) -) -``` - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/models/time-series/index.markdown b/content/learn/models/time-series/index.markdown deleted file mode 100644 index 31f0d814..00000000 --- a/content/learn/models/time-series/index.markdown +++ /dev/null @@ -1,312 +0,0 @@ ---- -title: "Modeling time series with tidy resampling" -tags: [rsample] -categories: [model fitting, resampling] -type: learn-subsection -weight: 4 -description: | - Calculate performance estimates for time series forecasts using resampling. ---- - - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: forecast, sweep, tidymodels, timetk, and zoo. - -"[Demo Week: Tidy Forecasting with sweep](https://www.business-science.io/code-tools/2017/10/25/demo_week_sweep.html)" is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using [rolling forecast origin resampling](https://robjhyndman.com/hyndsight/crossvalidation/). - -## Example data - -The data for this article are sales of alcoholic beverages originally from [the Federal Reserve Bank of St. Louis website](https://fred.stlouisfed.org/series/S4248SM144NCEN). - - -```r -library(tidymodels) -library(modeldata) -data("drinks") -glimpse(drinks) -#> Rows: 309 -#> Columns: 2 -#> $ date 1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-0… -#> $ S4248SM144NCEN 3459, 3458, 4002, 4564, 4221, 4529, 4466, 4137, 4126, 4… -``` - -Each row represents one month of sales (in millions of US dollars). - -## Time series resampling - -Suppose that we need predictions for one year ahead and our model should use the most recent data from the last 20 years. To set up this resampling scheme: - - -```r -roll_rs <- rolling_origin( - drinks, - initial = 12 * 20, - assess = 12, - cumulative = FALSE - ) - -nrow(roll_rs) -#> [1] 58 - -roll_rs -#> # Rolling origin forecast resampling -#> # A tibble: 58 × 2 -#> splits id -#> -#> 1 Slice01 -#> 2 Slice02 -#> 3 Slice03 -#> 4 Slice04 -#> 5 Slice05 -#> 6 Slice06 -#> 7 Slice07 -#> 8 Slice08 -#> 9 Slice09 -#> 10 Slice10 -#> # … with 48 more rows -``` - -Each `split` element contains the information about that resample: - - -```r -roll_rs$splits[[1]] -#> -#> <240/12/309> -``` - -For plotting, let's index each split by the first day of the assessment set: - - -```r -get_date <- function(x) { - min(assessment(x)$date) -} - -start_date <- map(roll_rs$splits, get_date) -roll_rs$start_date <- do.call("c", start_date) -head(roll_rs$start_date) -#> [1] "2012-01-01" "2012-02-01" "2012-03-01" "2012-04-01" "2012-05-01" -#> [6] "2012-06-01" -``` - -This resampling scheme has 58 splits of the data so that there will be 58 ARIMA models that are fit. To create the models, we use the `auto.arima()` function from the forecast package. The rsample functions `analysis()` and `assessment()` return a data frame, so another step converts the data to a `ts` object called `mod_dat` using a function in the timetk package. - - -```r -library(forecast) # for `auto.arima` -library(timetk) # for `tk_ts` -library(zoo) # for `as.yearmon` - -fit_model <- function(x, ...) { - # suggested by Matt Dancho: - x %>% - analysis() %>% - # Since the first day changes over resamples, adjust it - # based on the first date value in the data frame - tk_ts(start = .$date[[1]] %>% as.yearmon(), - frequency = 12, - silent = TRUE) %>% - auto.arima(...) -} -``` - -Save each model in a new column: - - -```r -roll_rs$arima <- map(roll_rs$splits, fit_model) - -# For example: -roll_rs$arima[[1]] -#> Series: . -#> ARIMA(4,1,1)(0,1,2)[12] -#> -#> Coefficients: -#> ar1 ar2 ar3 ar4 ma1 sma1 sma2 -#> -0.185 -0.024 0.358 -0.152 -0.831 -0.193 -0.324 -#> s.e. 0.147 0.166 0.144 0.081 0.138 0.067 0.064 -#> -#> sigma^2 = 72198: log likelihood = -1591 -#> AIC=3198 AICc=3199 BIC=3226 -``` - -(There are some warnings produced by these regarding extra columns in the data that can be ignored.) - -## Model performance - -Using the model fits, let's measure performance in two ways: - - * _Interpolation_ error will measure how well the model fits to the data that were used to create the model. This is most likely optimistic since no holdout method is used. - * _Extrapolation_ or _forecast_ error evaluates the performance of the model on the data from the following year (that were not used in the model fit). - -In each case, the mean absolute percent error (MAPE) is the statistic used to characterize the model fits. The interpolation error can be computed from the `Arima` object. To make things easy, let's use the sweep package's `sw_glance()` function: - - -```r -library(sweep) - -roll_rs$interpolation <- map_dbl( - roll_rs$arima, - function(x) - sw_glance(x)[["MAPE"]] - ) - -summary(roll_rs$interpolation) -#> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> 2.84 2.92 2.95 2.95 2.97 3.13 -``` - -For the extrapolation error, the model and split objects are required. Using these: - - -```r -get_extrap <- function(split, mod) { - n <- nrow(assessment(split)) - # Get assessment data - pred_dat <- assessment(split) %>% - mutate( - pred = as.vector(forecast(mod, h = n)$mean), - pct_error = ( S4248SM144NCEN - pred ) / S4248SM144NCEN * 100 - ) - mean(abs(pred_dat$pct_error)) -} - -roll_rs$extrapolation <- - map2_dbl(roll_rs$splits, roll_rs$arima, get_extrap) - -summary(roll_rs$extrapolation) -#> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> 2.37 3.23 3.63 3.65 4.11 5.45 -``` - -What do these error estimates look like over time? - - -```r -roll_rs %>% - select(interpolation, extrapolation, start_date) %>% - pivot_longer(cols = matches("ation"), names_to = "error", values_to = "MAPE") %>% - ggplot(aes(x = start_date, y = MAPE, col = error)) + - geom_point() + - geom_line() -``` - - - -It is likely that the interpolation error is an underestimate to some degree, as mentioned above. - -It is also worth noting that `rolling_origin()` can be used over calendar periods, rather than just over a fixed window size. This is especially useful for irregular series where a fixed window size might not make sense because of missing data points, or because of calendar features like different months having a different number of days. - -The example below demonstrates this idea by splitting `drinks` into a nested set of 26 years, and rolling over years rather than months. Note that the end result accomplishes a different task than the original example; in this new case, each slice moves forward an entire year, rather than just one month. - - -```r -# The idea is to nest by the period to roll over, -# which in this case is the year. -roll_rs_annual <- drinks %>% - mutate(year = as.POSIXlt(date)$year + 1900) %>% - nest(data = c(date, S4248SM144NCEN)) %>% - rolling_origin( - initial = 20, - assess = 1, - cumulative = FALSE - ) - -analysis(roll_rs_annual$splits[[1]]) -#> # A tibble: 20 × 2 -#> year data -#> -#> 1 1992 -#> 2 1993 -#> 3 1994 -#> 4 1995 -#> 5 1996 -#> 6 1997 -#> 7 1998 -#> 8 1999 -#> 9 2000 -#> 10 2001 -#> 11 2002 -#> 12 2003 -#> 13 2004 -#> 14 2005 -#> 15 2006 -#> 16 2007 -#> 17 2008 -#> 18 2009 -#> 19 2010 -#> 20 2011 -``` - -The workflow to access these calendar slices is to use `bind_rows()` to join -each analysis set together. - - -```r -mutate( - roll_rs_annual, - extracted_slice = map(splits, ~ bind_rows(analysis(.x)$data)) -) -#> # Rolling origin forecast resampling -#> # A tibble: 6 × 3 -#> splits id extracted_slice -#> -#> 1 Slice1 -#> 2 Slice2 -#> 3 Slice3 -#> 4 Slice4 -#> 5 Slice5 -#> 6 Slice6 -``` - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> forecast * 8.19 2022-11-20 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> sweep * 0.2.3 2020-07-10 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> timetk * 2.8.2 2022-11-17 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> zoo * 1.8-11 2022-09-17 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/statistics/bootstrap/index.Rmarkdown b/content/learn/statistics/bootstrap/index.Rmarkdown deleted file mode 100755 index 47c5ea92..00000000 --- a/content/learn/statistics/bootstrap/index.Rmarkdown +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: "Bootstrap resampling and tidy regression models" -tags: [rsample, broom] -categories: [statistical analysis, resampling] -type: learn-subsection -weight: 3 -description: | - Apply bootstrap resampling to estimate uncertainty in model parameters. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE} -library(tidymodels) -pkgs <- c("tidymodels") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - - -## Introduction - -This article only requires the tidymodels package. - -Combining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by [Andrew MacDonald here](https://rstudio-pubs-static.s3.amazonaws.com/19698_a4c472606e3c43e4b94720506e49bb7b.html), and [Hadley has explored efficient support for bootstrapping](https://github.com/hadley/dplyr/issues/269) as a potential enhancement to dplyr. The tidymodels package [broom](https://broom.tidyverse.org/) fits naturally with [dplyr](https://dplyr.tidyverse.org/) in performing these analyses. - -Bootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate. - -Let's say we want to fit a nonlinear model to the weight/mileage relationship in the `mtcars` data set. - -```{r} -library(tidymodels) - -ggplot(mtcars, aes(mpg, wt)) + - geom_point() -``` - -We might use the method of nonlinear least squares (via the `nls()` function) to fit a model. - -```{r} -nlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0)) -summary(nlsfit) - -ggplot(mtcars, aes(wt, mpg)) + - geom_point() + - geom_line(aes(y = predict(nlsfit))) -``` - -While this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data. - -## Bootstrapping models - -We can use the `bootstraps()` function in the rsample package to sample bootstrap replications. First, we construct 2000 bootstrap replicates of the data, each of which has been randomly sampled with replacement. The resulting object is an `rset`, which is a data frame with a column of `rsplit` objects. - -An `rsplit` object has two main components: an analysis data set and an assessment data set, accessible via `analysis(rsplit)` and `assessment(rsplit)` respectively. For bootstrap samples, the analysis data set is the bootstrap sample itself, and the assessment data set consists of all the out-of-bag samples. - -```{r} -set.seed(27) -boots <- bootstraps(mtcars, times = 2000, apparent = TRUE) -boots -``` - -Let's create a helper function to fit an `nls()` model on each bootstrap sample, and then use `purrr::map()` to apply this function to all the bootstrap samples at once. Similarly, we create a column of tidy coefficient information by unnesting. - -```{r} -fit_nls_on_bootstrap <- function(split) { - nls(mpg ~ k / wt + b, analysis(split), start = list(k = 1, b = 0)) -} - -boot_models <- - boots %>% - mutate(model = map(splits, fit_nls_on_bootstrap), - coef_info = map(model, tidy)) - -boot_coefs <- - boot_models %>% - unnest(coef_info) -``` - -The unnested coefficient information contains a summary of each replication combined in a single data frame: - -```{r} -boot_coefs -``` - -## Confidence intervals - -We can then calculate confidence intervals (using what is called the [percentile method](https://www.uvm.edu/~dhowell/StatPages/Randomization%20Tests/ResamplingWithR/BootstMeans/bootstrapping_means.html)): - -```{r percentiles} -percentile_intervals <- int_pctl(boot_models, coef_info) -percentile_intervals -``` - -Or we can use histograms to get a more detailed idea of the uncertainty in each estimate: - -```{r} -ggplot(boot_coefs, aes(estimate)) + - geom_histogram(bins = 30) + - facet_wrap( ~ term, scales = "free") + - geom_vline(aes(xintercept = .lower), data = percentile_intervals, col = "blue") + - geom_vline(aes(xintercept = .upper), data = percentile_intervals, col = "blue") -``` - -The rsample package also has functions for [other types of confidence intervals](https://rsample.tidymodels.org/reference/int_pctl.html). - -## Possible model fits - -We can use `augment()` to visualize the uncertainty in the fitted curve. Since there are so many bootstrap samples, we'll only show a sample of the model fits in our visualization: - -```{r} -boot_aug <- - boot_models %>% - sample_n(200) %>% - mutate(augmented = map(model, augment)) %>% - unnest(augmented) - -boot_aug -``` - -```{r} -ggplot(boot_aug, aes(wt, mpg)) + - geom_line(aes(y = .fitted, group = id), alpha = .2, col = "blue") + - geom_point() -``` - -With only a few small changes, we could easily perform bootstrapping with other kinds of predictive or hypothesis testing models, since the `tidy()` and `augment()` functions works for many statistical outputs. As another example, we could use `smooth.spline()`, which fits a cubic smoothing spline to data: - -```{r} -fit_spline_on_bootstrap <- function(split) { - data <- analysis(split) - smooth.spline(data$wt, data$mpg, df = 4) -} - -boot_splines <- - boots %>% - sample_n(200) %>% - mutate(spline = map(splits, fit_spline_on_bootstrap), - aug_train = map(spline, augment)) - -splines_aug <- - boot_splines %>% - unnest(aug_train) - -ggplot(splines_aug, aes(x, y)) + - geom_line(aes(y = .fitted, group = id), alpha = 0.2, col = "blue") + - geom_point() -``` - - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - - diff --git a/content/learn/statistics/bootstrap/index.markdown b/content/learn/statistics/bootstrap/index.markdown deleted file mode 100644 index 70f894e9..00000000 --- a/content/learn/statistics/bootstrap/index.markdown +++ /dev/null @@ -1,269 +0,0 @@ ---- -title: "Bootstrap resampling and tidy regression models" -tags: [rsample, broom] -categories: [statistical analysis, resampling] -type: learn-subsection -weight: 3 -description: | - Apply bootstrap resampling to estimate uncertainty in model parameters. ---- - - - - - - -## Introduction - -This article only requires the tidymodels package. - -Combining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by [Andrew MacDonald here](https://rstudio-pubs-static.s3.amazonaws.com/19698_a4c472606e3c43e4b94720506e49bb7b.html), and [Hadley has explored efficient support for bootstrapping](https://github.com/hadley/dplyr/issues/269) as a potential enhancement to dplyr. The tidymodels package [broom](https://broom.tidyverse.org/) fits naturally with [dplyr](https://dplyr.tidyverse.org/) in performing these analyses. - -Bootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate. - -Let's say we want to fit a nonlinear model to the weight/mileage relationship in the `mtcars` data set. - - -```r -library(tidymodels) - -ggplot(mtcars, aes(mpg, wt)) + - geom_point() -``` - - - -We might use the method of nonlinear least squares (via the `nls()` function) to fit a model. - - -```r -nlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0)) -summary(nlsfit) -#> -#> Formula: mpg ~ k/wt + b -#> -#> Parameters: -#> Estimate Std. Error t value Pr(>|t|) -#> k 45.83 4.25 10.79 7.6e-12 *** -#> b 4.39 1.54 2.85 0.0077 ** -#> --- -#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -#> -#> Residual standard error: 2.77 on 30 degrees of freedom -#> -#> Number of iterations to convergence: 1 -#> Achieved convergence tolerance: 2.88e-08 - -ggplot(mtcars, aes(wt, mpg)) + - geom_point() + - geom_line(aes(y = predict(nlsfit))) -``` - - - -While this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data. - -## Bootstrapping models - -We can use the `bootstraps()` function in the rsample package to sample bootstrap replications. First, we construct 2000 bootstrap replicates of the data, each of which has been randomly sampled with replacement. The resulting object is an `rset`, which is a data frame with a column of `rsplit` objects. - -An `rsplit` object has two main components: an analysis data set and an assessment data set, accessible via `analysis(rsplit)` and `assessment(rsplit)` respectively. For bootstrap samples, the analysis data set is the bootstrap sample itself, and the assessment data set consists of all the out-of-bag samples. - - -```r -set.seed(27) -boots <- bootstraps(mtcars, times = 2000, apparent = TRUE) -boots -#> # Bootstrap sampling with apparent sample -#> # A tibble: 2,001 × 2 -#> splits id -#> -#> 1 Bootstrap0001 -#> 2 Bootstrap0002 -#> 3 Bootstrap0003 -#> 4 Bootstrap0004 -#> 5 Bootstrap0005 -#> 6 Bootstrap0006 -#> 7 Bootstrap0007 -#> 8 Bootstrap0008 -#> 9 Bootstrap0009 -#> 10 Bootstrap0010 -#> # … with 1,991 more rows -``` - -Let's create a helper function to fit an `nls()` model on each bootstrap sample, and then use `purrr::map()` to apply this function to all the bootstrap samples at once. Similarly, we create a column of tidy coefficient information by unnesting. - - -```r -fit_nls_on_bootstrap <- function(split) { - nls(mpg ~ k / wt + b, analysis(split), start = list(k = 1, b = 0)) -} - -boot_models <- - boots %>% - mutate(model = map(splits, fit_nls_on_bootstrap), - coef_info = map(model, tidy)) - -boot_coefs <- - boot_models %>% - unnest(coef_info) -``` - -The unnested coefficient information contains a summary of each replication combined in a single data frame: - - -```r -boot_coefs -#> # A tibble: 4,002 × 8 -#> splits id model term estimate std.error statistic p.value -#> -#> 1 Bootstrap0… k 42.1 4.05 10.4 1.91e-11 -#> 2 Bootstrap0… b 5.39 1.43 3.78 6.93e- 4 -#> 3 Bootstrap0… k 49.9 5.66 8.82 7.82e-10 -#> 4 Bootstrap0… b 3.73 1.92 1.94 6.13e- 2 -#> 5 Bootstrap0… k 37.8 2.68 14.1 9.01e-15 -#> 6 Bootstrap0… b 6.73 1.17 5.75 2.78e- 6 -#> 7 Bootstrap0… k 45.6 4.45 10.2 2.70e-11 -#> 8 Bootstrap0… b 4.75 1.62 2.93 6.38e- 3 -#> 9 Bootstrap0… k 43.6 4.63 9.41 1.85e-10 -#> 10 Bootstrap0… b 5.89 1.68 3.51 1.44e- 3 -#> # … with 3,992 more rows -``` - -## Confidence intervals - -We can then calculate confidence intervals (using what is called the [percentile method](https://www.uvm.edu/~dhowell/StatPages/Randomization%20Tests/ResamplingWithR/BootstMeans/bootstrapping_means.html)): - - -```r -percentile_intervals <- int_pctl(boot_models, coef_info) -percentile_intervals -#> # A tibble: 2 × 6 -#> term .lower .estimate .upper .alpha .method -#> -#> 1 b 0.0475 4.12 7.31 0.05 percentile -#> 2 k 37.6 46.7 59.8 0.05 percentile -``` - -Or we can use histograms to get a more detailed idea of the uncertainty in each estimate: - - -```r -ggplot(boot_coefs, aes(estimate)) + - geom_histogram(bins = 30) + - facet_wrap( ~ term, scales = "free") + - geom_vline(aes(xintercept = .lower), data = percentile_intervals, col = "blue") + - geom_vline(aes(xintercept = .upper), data = percentile_intervals, col = "blue") -``` - - - -The rsample package also has functions for [other types of confidence intervals](https://rsample.tidymodels.org/reference/int_pctl.html). - -## Possible model fits - -We can use `augment()` to visualize the uncertainty in the fitted curve. Since there are so many bootstrap samples, we'll only show a sample of the model fits in our visualization: - - -```r -boot_aug <- - boot_models %>% - sample_n(200) %>% - mutate(augmented = map(model, augment)) %>% - unnest(augmented) - -boot_aug -#> # A tibble: 6,400 × 8 -#> splits id model coef_info mpg wt .fitted .resid -#> -#> 1 Bootstrap1644 16.4 4.07 15.6 0.829 -#> 2 Bootstrap1644 19.7 2.77 21.9 -2.21 -#> 3 Bootstrap1644 19.2 3.84 16.4 2.84 -#> 4 Bootstrap1644 21.4 2.78 21.8 -0.437 -#> 5 Bootstrap1644 26 2.14 27.8 -1.75 -#> 6 Bootstrap1644 33.9 1.84 32.0 1.88 -#> 7 Bootstrap1644 32.4 2.2 27.0 5.35 -#> 8 Bootstrap1644 30.4 1.62 36.1 -5.70 -#> 9 Bootstrap1644 21.5 2.46 24.4 -2.86 -#> 10 Bootstrap1644 26 2.14 27.8 -1.75 -#> # … with 6,390 more rows -``` - - -```r -ggplot(boot_aug, aes(wt, mpg)) + - geom_line(aes(y = .fitted, group = id), alpha = .2, col = "blue") + - geom_point() -``` - - - -With only a few small changes, we could easily perform bootstrapping with other kinds of predictive or hypothesis testing models, since the `tidy()` and `augment()` functions works for many statistical outputs. As another example, we could use `smooth.spline()`, which fits a cubic smoothing spline to data: - - -```r -fit_spline_on_bootstrap <- function(split) { - data <- analysis(split) - smooth.spline(data$wt, data$mpg, df = 4) -} - -boot_splines <- - boots %>% - sample_n(200) %>% - mutate(spline = map(splits, fit_spline_on_bootstrap), - aug_train = map(spline, augment)) - -splines_aug <- - boot_splines %>% - unnest(aug_train) - -ggplot(splines_aug, aes(x, y)) + - geom_line(aes(y = .fitted, group = id), alpha = 0.2, col = "blue") + - geom_point() -``` - - - - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - - diff --git a/content/learn/statistics/infer/figs/visualize-1.svg b/content/learn/statistics/infer/figs/visualize-1.svg deleted file mode 100644 index 0c22f6b4..00000000 --- a/content/learn/statistics/infer/figs/visualize-1.svg +++ /dev/null @@ -1,95 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -250 -500 -750 -1000 - - - - - - - - - - -38 -39 -40 -41 -42 -stat -count -Simulation-Based Null Distribution - - diff --git a/content/learn/statistics/infer/figs/visualize2-1.svg b/content/learn/statistics/infer/figs/visualize2-1.svg deleted file mode 100644 index 747c9dc5..00000000 --- a/content/learn/statistics/infer/figs/visualize2-1.svg +++ /dev/null @@ -1,5099 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -250 -500 -750 -1000 - - - - - - - - - - -38 -39 -40 -41 -42 -stat -count -Simulation-Based Null Distribution - - diff --git a/content/learn/statistics/infer/figs/viz_null_fits-1.svg b/content/learn/statistics/infer/figs/viz_null_fits-1.svg deleted file mode 100644 index c7b353ac..00000000 --- a/content/learn/statistics/infer/figs/viz_null_fits-1.svg +++ /dev/null @@ -1,3251 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -50 -100 -150 -200 - - - - - - - - - --0.2 --0.1 -0.0 -0.1 -age -count - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -50 -100 -150 -200 - - - - - - - - - --2.5 -0.0 -2.5 -5.0 -collegedegree -count - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -50 -100 -150 - - - - - - - -35 -40 -45 -intercept -count -Simulation-Based Null Distributions - - diff --git a/content/learn/statistics/infer/figs/viz_t_ci-1.svg b/content/learn/statistics/infer/figs/viz_t_ci-1.svg deleted file mode 100644 index 98e9eea8..00000000 --- a/content/learn/statistics/infer/figs/viz_t_ci-1.svg +++ /dev/null @@ -1,80 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.0 -0.1 -0.2 -0.3 -0.4 - - - - - - - - - -40 -41 -42 -43 -stat -density -Rescaled Theoretical Distribution - - diff --git a/content/learn/statistics/infer/figs/viz_t_dist-1.svg b/content/learn/statistics/infer/figs/viz_t_dist-1.svg deleted file mode 100644 index a54c0030..00000000 --- a/content/learn/statistics/infer/figs/viz_t_dist-1.svg +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.0 -0.1 -0.2 -0.3 -0.4 - - - - - - - - --2 -0 -2 -t stat -density -Theoretical t Null Distribution - - diff --git a/content/learn/statistics/infer/index.Rmarkdown b/content/learn/statistics/infer/index.Rmarkdown deleted file mode 100644 index c62ecda7..00000000 --- a/content/learn/statistics/infer/index.Rmarkdown +++ /dev/null @@ -1,348 +0,0 @@ ---- -title: "Hypothesis testing using resampling and tidy data" -tags: [infer] -categories: [statistical analysis] -type: learn-subsection -weight: 4 -description: | - Perform common hypothesis tests for statistical inference using flexible functions. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE} -library(tidymodels) -library(sessioninfo) -pkgs <- c("tidymodels") -theme_set(theme_bw() + theme(legend.position = "top")) -set.seed(1234) -``` - -## Introduction - -This article only requires the tidymodels package. - -The tidymodels package [infer](https://infer.tidymodels.org/) implements an expressive grammar to perform statistical inference that coheres with the `tidyverse` design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs. - -Regardless of which hypothesis test we're using, we're still asking the same kind of question: - ->Is the effect or difference in our observed data real, or due to chance? - -To answer this question, we start by assuming that the observed data came from some world where "nothing is going on" (i.e. the observed effect was simply due to random chance), and call this assumption our **null hypothesis**. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the **alternate hypothesis**, which supposes that the effect present in the observed data is actually due to the fact that "something is going on.") We then calculate a **test statistic** from our data that describes the observed effect. We can use this test statistic to calculate a **p-value**, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined **significance level** `alpha`, then we can reject our null hypothesis. - -If you are new to hypothesis testing, take a look at - -* [Section 9.2 of _Statistical Inference via Data Science_](https://moderndive.com/9-hypothesis-testing.html#understanding-ht) -* The American Statistical Association's recent [statement on p-values](https://doi.org/10.1080/00031305.2016.1154108) - -The workflow of this package is designed around these ideas. Starting from some data set, - -+ `specify()` allows you to specify the variable, or relationship between variables, that you're interested in, -+ `hypothesize()` allows you to declare the null hypothesis, -+ `generate()` allows you to generate data reflecting the null hypothesis, and -+ `calculate()` allows you to calculate a distribution of statistics from the generated data to form the null distribution. - -Throughout this vignette, we make use of `gss`, a data set available in infer containing a sample of 500 observations of 11 variables from the *General Social Survey*. - -```{r load-gss, warning = FALSE, message = FALSE} -library(tidymodels) # Includes the infer package - -# load in the data set -data(gss) - -# take a look at its structure -dplyr::glimpse(gss) -``` - -Each row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this data set is a representative sample of a population we want to learn about: American adults. - -## Specify variables - -The `specify()` function can be used to specify which of the variables in the data set you're interested in. If you're only interested in, say, the `age` of the respondents, you might write: - -```{r specify-example, warning = FALSE, message = FALSE} -gss %>% - specify(response = age) -``` - -On the front end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. What do we see if we check the class of this object, though? - -```{r specify-one, warning = FALSE, message = FALSE} -gss %>% - specify(response = age) %>% - class() -``` - -We can see that the infer class has been appended on top of the dataframe classes; this new class stores some extra metadata. - -If you're interested in two variables (`age` and `partyid`, for example) you can `specify()` their relationship in one of two (equivalent) ways: - -```{r specify-two, warning = FALSE, message = FALSE} -# as a formula -gss %>% - specify(age ~ partyid) - -# with the named arguments -gss %>% - specify(response = age, explanatory = partyid) -``` - -If you're doing inference on one proportion or a difference in proportions, you will need to use the `success` argument to specify which level of your `response` variable is a success. For instance, if you're interested in the proportion of the population with a college degree, you might use the following code: - -```{r specify-success, warning = FALSE, message = FALSE} -# specifying for inference on proportions -gss %>% - specify(response = college, success = "degree") -``` - -## Declare the hypothesis - -The next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of "independence" or "point" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`: - -```{r hypothesize-independence, warning = FALSE, message = FALSE} -gss %>% - specify(college ~ partyid, success = "degree") %>% - hypothesize(null = "independence") -``` - -If you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write: - -```{r hypothesize-40-hr-week, warning = FALSE, message = FALSE} -gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) -``` - -Again, from the front-end, the dataframe outputted from `hypothesize()` looks almost exactly the same as it did when it came out of `specify()`, but infer now "knows" your null hypothesis. - -## Generate the distribution - -Once we've asserted our null hypothesis using `hypothesize()`, we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the `type` argument: - -* `bootstrap`: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data. -* `permute`: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample. -* `simulate`: A value will be sampled from a theoretical distribution with parameters specified in `hypothesize()` for each replicate. (This option is currently only applicable for testing point estimates.) - -Continuing on with our example above, about the average number of hours worked a week, we might write: - -```{r generate-point, warning = FALSE, message = FALSE} -gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 5000, type = "bootstrap") -``` - -In the above example, we take 5000 bootstrap samples to form our null distribution. - -To generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 5000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age: - -```{r generate-permute, warning = FALSE, message = FALSE} -gss %>% - specify(partyid ~ age) %>% - hypothesize(null = "independence") %>% - generate(reps = 5000, type = "permute") -``` - -## Calculate statistics - -Depending on whether you're carrying out computation-based inference or theory-based inference, you will either supply `calculate()` with the output of `generate()` or `hypothesize()`, respectively. The function, for one, takes in a `stat` argument, which is currently one of `"mean"`, `"median"`, `"sum"`, `"sd"`, `"prop"`, `"count"`, `"diff in means"`, `"diff in medians"`, `"diff in props"`, `"Chisq"`, `"F"`, `"t"`, `"z"`, `"slope"`, or `"correlation"`. For example, continuing our example above to calculate the null distribution of mean hours worked per week: - -```{r calculate-point, warning = FALSE, message = FALSE} -gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 5000, type = "bootstrap") %>% - calculate(stat = "mean") -``` - -The output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or `"t"` and `"z"` statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write: - -```{r specify-diff-in-means, warning = FALSE, message = FALSE} -gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 5000, type = "permute") %>% - calculate("diff in means", order = c("degree", "no degree")) -``` - -## Other utilities - -The infer package also offers several utilities to extract meaning out of summary statistics and null distributions; the package provides functions to visualize where a statistic is relative to a distribution (with `visualize()`), calculate p-values (with `get_p_value()`), and calculate confidence intervals (with `get_confidence_interval()`). - -To illustrate, we'll go back to the example of determining whether the mean number of hours worked per week is 40 hours. - -```{r utilities-examples} -# find the point estimate -point_estimate <- gss %>% - specify(response = hours) %>% - calculate(stat = "mean") - -# generate a null distribution -null_dist <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 5000, type = "bootstrap") %>% - calculate(stat = "mean") -``` - -(Notice the warning: `Removed 1244 rows containing missing values.` This would be worth noting if you were actually carrying out this hypothesis test.) - -Our point estimate `r point_estimate` seems *pretty* close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn't 40. - -We could initially just visualize the null distribution. - -```{r visualize, warning = FALSE, message = FALSE} -null_dist %>% - visualize() -``` - -Where does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this. - -```{r visualize2, warning = FALSE, message = FALSE} -null_dist %>% - visualize() + - shade_p_value(obs_stat = point_estimate, direction = "two_sided") -``` - -Notice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the `+` operator to apply the `shade_p_value()` function. This is because `visualize()` outputs a plot object from ggplot2 instead of a dataframe, and the `+` operator is needed to add the p-value layer to the plot object.) The red bar looks like it's slightly far out on the right tail of the null distribution, so observing a sample mean of `r point_estimate` hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though? - -```{r get_p_value, warning = FALSE, message = FALSE} -# get a two-tailed p-value -p_value <- null_dist %>% - get_p_value(obs_stat = point_estimate, direction = "two_sided") - -p_value -``` - -It looks like the p-value is `r p_value`, which is pretty small---if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (`r abs(point_estimate-40)` hours) from 40 would be `r p_value`. This may or may not be statistically significantly different, depending on the significance level `alpha` you decided on *before* you ran this analysis. If you had set `alpha = .05`, then this difference would be statistically significant, but if you had set `alpha = .01`, then it would not be. - -To get a confidence interval around our estimate, we can write: - -```{r get_conf, message = FALSE, warning = FALSE} -# start with the null distribution -null_dist %>% - # calculate the confidence interval around the point estimate - get_confidence_interval(point_estimate = point_estimate, - # at the 95% confidence level - level = .95, - # using the standard error - type = "se") -``` - -As you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level `alpha = .05`. - -## Theoretical methods - -The infer package also provides functionality to use theoretical methods for `"Chisq"`, `"F"`, `"t"` and `"z"` distributions. - -Generally, to find a null distribution using theory-based methods, use the same code that you would use to find the observed statistic elsewhere, replacing calls to `calculate()` with `assume()`. For example, to calculate the observed `"t"` statistic (a standardized mean): - -```{r obs_t, message = FALSE, warning = FALSE} -# calculate an observed t statistic -obs_t <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - calculate(stat = "t") -``` - -Then, to define a theoretical `"t"` distribution, we could write: - -```{r t_dist, message = FALSE, warning = FALSE} -# switch out `calculate()` with `assume()` to define a distribution -t_dist <- gss %>% - specify(response = hours) %>% - assume(distribution = "t") -``` - -From here, the theoretical distribution interfaces in the same way that simulation-based null distributions do. For example, to interface with p-values: - -```{r viz_t_dist, message = FALSE, warning = FALSE} -# visualize the theoretical null distribution -visualize(t_dist) + - shade_p_value(obs_stat = obs_t, direction = "greater") - -# more exactly, calculate the p-value -get_p_value(t_dist, obs_t, "greater") -``` - -Confidence intervals lie on the scale of the data rather than on the standardized scale of the theoretical distribution, so be sure to use the unstandardized observed statistic when working with confidence intervals. - -```{r theor_obs_mean, message = FALSE, warning = FALSE} -# calculate the point estimate -obs_mean <- gss %>% - specify(response = hours) %>% - calculate(stat = "mean") - -# find the theory-based confidence interval -theor_ci <- - get_confidence_interval( - x = t_dist, - level = .95, - point_estimate = obs_mean - ) - -theor_ci -``` - -When visualized, the `"t"` distribution will be recentered and rescaled to align with the scale of the observed data. - -```{r viz_t_ci} -# visualize the theoretical sampling distribution -visualize(t_dist) + - shade_confidence_interval(theor_ci) -``` - -## Multiple regression - -To accommodate randomization-based inference with multiple explanatory variables, the package implements an alternative workflow based on model fitting. Rather than `calculate()`ing statistics from resampled data, this side of the package allows you to `fit()` linear models on data resampled according to the null hypothesis, supplying model coefficients for each explanatory variable. For the most part, you can just switch out `calculate()` for `fit()` in your `calculate()`-based workflows. - -As an example, suppose that we want to fit `hours` worked per week using the respondent `age` and `college` completion status. We could first begin by fitting a linear model to the observed data. - -```{r mult_fit} -observed_fit <- gss %>% - specify(hours ~ age + college) %>% - fit() -``` - -Now, to generate null distributions for each of these terms, we can fit 1000 models to resamples of the `gss` dataset, where the response `hours` is permuted in each. Note that this code is the same as the above except for the addition of the `hypothesize()` and `generate()` step. - -```{r null_fits} -null_fits <- gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - fit() - -null_fits -``` - -To permute variables other than the response variable, the `variables` argument to `generate()` allows you to choose columns from the data to permute. Note that any derived effects that depend on these columns (e.g., interaction effects) will also be affected. - -Beyond this point, observed fits and distributions from null fits interface exactly like analogous outputs from `calculate()`. For instance, we can use the following code to calculate a 95% confidence interval from these objects. - -```{r null_cis} -get_confidence_interval( - null_fits, - point_estimate = observed_fit, - level = .95 -) -``` - -Or, we can shade p-values for each of these observed regression coefficients from the observed data. - -```{r viz_null_fits} -visualize(null_fits) + - shade_p_value(observed_fit, direction = "both") -``` - -That's it! This vignette covers most all of the key functionality of infer. See `help(package = "infer")` for a full list of functions and vignettes. - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/statistics/infer/index.markdown b/content/learn/statistics/infer/index.markdown deleted file mode 100644 index 236b66a5..00000000 --- a/content/learn/statistics/infer/index.markdown +++ /dev/null @@ -1,626 +0,0 @@ ---- -title: "Hypothesis testing using resampling and tidy data" -tags: [infer] -categories: [statistical analysis] -type: learn-subsection -weight: 4 -description: | - Perform common hypothesis tests for statistical inference using flexible functions. ---- - - - - - -## Introduction - -This article only requires the tidymodels package. - -The tidymodels package [infer](https://infer.tidymodels.org/) implements an expressive grammar to perform statistical inference that coheres with the `tidyverse` design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs. - -Regardless of which hypothesis test we're using, we're still asking the same kind of question: - ->Is the effect or difference in our observed data real, or due to chance? - -To answer this question, we start by assuming that the observed data came from some world where "nothing is going on" (i.e. the observed effect was simply due to random chance), and call this assumption our **null hypothesis**. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the **alternate hypothesis**, which supposes that the effect present in the observed data is actually due to the fact that "something is going on.") We then calculate a **test statistic** from our data that describes the observed effect. We can use this test statistic to calculate a **p-value**, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined **significance level** `alpha`, then we can reject our null hypothesis. - -If you are new to hypothesis testing, take a look at - -* [Section 9.2 of _Statistical Inference via Data Science_](https://moderndive.com/9-hypothesis-testing.html#understanding-ht) -* The American Statistical Association's recent [statement on p-values](https://doi.org/10.1080/00031305.2016.1154108) - -The workflow of this package is designed around these ideas. Starting from some data set, - -+ `specify()` allows you to specify the variable, or relationship between variables, that you're interested in, -+ `hypothesize()` allows you to declare the null hypothesis, -+ `generate()` allows you to generate data reflecting the null hypothesis, and -+ `calculate()` allows you to calculate a distribution of statistics from the generated data to form the null distribution. - -Throughout this vignette, we make use of `gss`, a data set available in infer containing a sample of 500 observations of 11 variables from the *General Social Survey*. - - -```r -library(tidymodels) # Includes the infer package - -# load in the data set -data(gss) - -# take a look at its structure -dplyr::glimpse(gss) -#> Rows: 500 -#> Columns: 11 -#> $ year 2014, 1994, 1998, 1996, 1994, 1996, 1990, 2016, 2000, 1998, 20… -#> $ age 36, 34, 24, 42, 31, 32, 48, 36, 30, 33, 21, 30, 38, 49, 25, 56… -#> $ sex male, female, male, male, male, female, female, female, female… -#> $ college degree, no degree, degree, no degree, degree, no degree, no de… -#> $ partyid ind, rep, ind, ind, rep, rep, dem, ind, rep, dem, dem, ind, de… -#> $ hompop 3, 4, 1, 4, 2, 4, 2, 1, 5, 2, 4, 3, 4, 4, 2, 2, 3, 2, 1, 2, 5,… -#> $ hours 50, 31, 40, 40, 40, 53, 32, 20, 40, 40, 23, 52, 38, 72, 48, 40… -#> $ income $25000 or more, $20000 - 24999, $25000 or more, $25000 or more… -#> $ class middle class, working class, working class, working class, mid… -#> $ finrela below average, below average, below average, above average, ab… -#> $ weight 0.896, 1.083, 0.550, 1.086, 1.083, 1.086, 1.063, 0.478, 1.099,… -``` - -Each row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this data set is a representative sample of a population we want to learn about: American adults. - -## Specify variables - -The `specify()` function can be used to specify which of the variables in the data set you're interested in. If you're only interested in, say, the `age` of the respondents, you might write: - - -```r -gss %>% - specify(response = age) -#> Response: age (numeric) -#> # A tibble: 500 × 1 -#> age -#> -#> 1 36 -#> 2 34 -#> 3 24 -#> 4 42 -#> 5 31 -#> 6 32 -#> 7 48 -#> 8 36 -#> 9 30 -#> 10 33 -#> # … with 490 more rows -``` - -On the front end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. What do we see if we check the class of this object, though? - - -```r -gss %>% - specify(response = age) %>% - class() -#> [1] "infer" "tbl_df" "tbl" "data.frame" -``` - -We can see that the infer class has been appended on top of the dataframe classes; this new class stores some extra metadata. - -If you're interested in two variables (`age` and `partyid`, for example) you can `specify()` their relationship in one of two (equivalent) ways: - - -```r -# as a formula -gss %>% - specify(age ~ partyid) -#> Response: age (numeric) -#> Explanatory: partyid (factor) -#> # A tibble: 500 × 2 -#> age partyid -#> -#> 1 36 ind -#> 2 34 rep -#> 3 24 ind -#> 4 42 ind -#> 5 31 rep -#> 6 32 rep -#> 7 48 dem -#> 8 36 ind -#> 9 30 rep -#> 10 33 dem -#> # … with 490 more rows - -# with the named arguments -gss %>% - specify(response = age, explanatory = partyid) -#> Response: age (numeric) -#> Explanatory: partyid (factor) -#> # A tibble: 500 × 2 -#> age partyid -#> -#> 1 36 ind -#> 2 34 rep -#> 3 24 ind -#> 4 42 ind -#> 5 31 rep -#> 6 32 rep -#> 7 48 dem -#> 8 36 ind -#> 9 30 rep -#> 10 33 dem -#> # … with 490 more rows -``` - -If you're doing inference on one proportion or a difference in proportions, you will need to use the `success` argument to specify which level of your `response` variable is a success. For instance, if you're interested in the proportion of the population with a college degree, you might use the following code: - - -```r -# specifying for inference on proportions -gss %>% - specify(response = college, success = "degree") -#> Response: college (factor) -#> # A tibble: 500 × 1 -#> college -#> -#> 1 degree -#> 2 no degree -#> 3 degree -#> 4 no degree -#> 5 degree -#> 6 no degree -#> 7 no degree -#> 8 degree -#> 9 degree -#> 10 no degree -#> # … with 490 more rows -``` - -## Declare the hypothesis - -The next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of "independence" or "point" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`: - - -```r -gss %>% - specify(college ~ partyid, success = "degree") %>% - hypothesize(null = "independence") -#> Response: college (factor) -#> Explanatory: partyid (factor) -#> Null Hypothesis: independence -#> # A tibble: 500 × 2 -#> college partyid -#> -#> 1 degree ind -#> 2 no degree rep -#> 3 degree ind -#> 4 no degree ind -#> 5 degree rep -#> 6 no degree rep -#> 7 no degree dem -#> 8 degree ind -#> 9 degree rep -#> 10 no degree dem -#> # … with 490 more rows -``` - -If you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write: - - -```r -gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) -#> Response: hours (numeric) -#> Null Hypothesis: point -#> # A tibble: 500 × 1 -#> hours -#> -#> 1 50 -#> 2 31 -#> 3 40 -#> 4 40 -#> 5 40 -#> 6 53 -#> 7 32 -#> 8 20 -#> 9 40 -#> 10 40 -#> # … with 490 more rows -``` - -Again, from the front-end, the dataframe outputted from `hypothesize()` looks almost exactly the same as it did when it came out of `specify()`, but infer now "knows" your null hypothesis. - -## Generate the distribution - -Once we've asserted our null hypothesis using `hypothesize()`, we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the `type` argument: - -* `bootstrap`: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data. -* `permute`: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample. -* `simulate`: A value will be sampled from a theoretical distribution with parameters specified in `hypothesize()` for each replicate. (This option is currently only applicable for testing point estimates.) - -Continuing on with our example above, about the average number of hours worked a week, we might write: - - -```r -gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 5000, type = "bootstrap") -#> Response: hours (numeric) -#> Null Hypothesis: point -#> # A tibble: 2,500,000 × 2 -#> # Groups: replicate [5,000] -#> replicate hours -#> -#> 1 1 58.6 -#> 2 1 35.6 -#> 3 1 28.6 -#> 4 1 38.6 -#> 5 1 28.6 -#> 6 1 38.6 -#> 7 1 38.6 -#> 8 1 57.6 -#> 9 1 58.6 -#> 10 1 38.6 -#> # … with 2,499,990 more rows -``` - -In the above example, we take 5000 bootstrap samples to form our null distribution. - -To generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 5000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age: - - -```r -gss %>% - specify(partyid ~ age) %>% - hypothesize(null = "independence") %>% - generate(reps = 5000, type = "permute") -#> Response: partyid (factor) -#> Explanatory: age (numeric) -#> Null Hypothesis: independence -#> # A tibble: 2,500,000 × 3 -#> # Groups: replicate [5,000] -#> partyid age replicate -#> -#> 1 ind 36 1 -#> 2 ind 34 1 -#> 3 ind 24 1 -#> 4 rep 42 1 -#> 5 dem 31 1 -#> 6 dem 32 1 -#> 7 dem 48 1 -#> 8 rep 36 1 -#> 9 ind 30 1 -#> 10 dem 33 1 -#> # … with 2,499,990 more rows -``` - -## Calculate statistics - -Depending on whether you're carrying out computation-based inference or theory-based inference, you will either supply `calculate()` with the output of `generate()` or `hypothesize()`, respectively. The function, for one, takes in a `stat` argument, which is currently one of `"mean"`, `"median"`, `"sum"`, `"sd"`, `"prop"`, `"count"`, `"diff in means"`, `"diff in medians"`, `"diff in props"`, `"Chisq"`, `"F"`, `"t"`, `"z"`, `"slope"`, or `"correlation"`. For example, continuing our example above to calculate the null distribution of mean hours worked per week: - - -```r -gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 5000, type = "bootstrap") %>% - calculate(stat = "mean") -#> Response: hours (numeric) -#> Null Hypothesis: point -#> # A tibble: 5,000 × 2 -#> replicate stat -#> -#> 1 1 39.8 -#> 2 2 39.6 -#> 3 3 39.8 -#> 4 4 39.2 -#> 5 5 39.0 -#> 6 6 39.8 -#> 7 7 40.6 -#> 8 8 40.6 -#> 9 9 40.4 -#> 10 10 39.0 -#> # … with 4,990 more rows -``` - -The output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or `"t"` and `"z"` statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write: - - -```r -gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 5000, type = "permute") %>% - calculate("diff in means", order = c("degree", "no degree")) -#> Response: age (numeric) -#> Explanatory: college (factor) -#> Null Hypothesis: independence -#> # A tibble: 5,000 × 2 -#> replicate stat -#> -#> 1 1 -0.0378 -#> 2 2 1.55 -#> 3 3 0.465 -#> 4 4 1.39 -#> 5 5 -0.161 -#> 6 6 -0.179 -#> 7 7 0.0151 -#> 8 8 0.914 -#> 9 9 -1.32 -#> 10 10 -0.426 -#> # … with 4,990 more rows -``` - -## Other utilities - -The infer package also offers several utilities to extract meaning out of summary statistics and null distributions; the package provides functions to visualize where a statistic is relative to a distribution (with `visualize()`), calculate p-values (with `get_p_value()`), and calculate confidence intervals (with `get_confidence_interval()`). - -To illustrate, we'll go back to the example of determining whether the mean number of hours worked per week is 40 hours. - - -```r -# find the point estimate -point_estimate <- gss %>% - specify(response = hours) %>% - calculate(stat = "mean") - -# generate a null distribution -null_dist <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 5000, type = "bootstrap") %>% - calculate(stat = "mean") -``` - -(Notice the warning: `Removed 1244 rows containing missing values.` This would be worth noting if you were actually carrying out this hypothesis test.) - -Our point estimate 41.382 seems *pretty* close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn't 40. - -We could initially just visualize the null distribution. - - -```r -null_dist %>% - visualize() -``` - - - -Where does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this. - - -```r -null_dist %>% - visualize() + - shade_p_value(obs_stat = point_estimate, direction = "two_sided") -``` - - - -Notice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the `+` operator to apply the `shade_p_value()` function. This is because `visualize()` outputs a plot object from ggplot2 instead of a dataframe, and the `+` operator is needed to add the p-value layer to the plot object.) The red bar looks like it's slightly far out on the right tail of the null distribution, so observing a sample mean of 41.382 hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though? - - -```r -# get a two-tailed p-value -p_value <- null_dist %>% - get_p_value(obs_stat = point_estimate, direction = "two_sided") - -p_value -#> # A tibble: 1 × 1 -#> p_value -#> -#> 1 0.046 -``` - -It looks like the p-value is 0.046, which is pretty small---if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (1.382 hours) from 40 would be 0.046. This may or may not be statistically significantly different, depending on the significance level `alpha` you decided on *before* you ran this analysis. If you had set `alpha = .05`, then this difference would be statistically significant, but if you had set `alpha = .01`, then it would not be. - -To get a confidence interval around our estimate, we can write: - - -```r -# start with the null distribution -null_dist %>% - # calculate the confidence interval around the point estimate - get_confidence_interval(point_estimate = point_estimate, - # at the 95% confidence level - level = .95, - # using the standard error - type = "se") -#> # A tibble: 1 × 2 -#> lower_ci upper_ci -#> -#> 1 40.1 42.7 -``` - -As you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level `alpha = .05`. - -## Theoretical methods - -The infer package also provides functionality to use theoretical methods for `"Chisq"`, `"F"`, `"t"` and `"z"` distributions. - -Generally, to find a null distribution using theory-based methods, use the same code that you would use to find the observed statistic elsewhere, replacing calls to `calculate()` with `assume()`. For example, to calculate the observed `"t"` statistic (a standardized mean): - - -```r -# calculate an observed t statistic -obs_t <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - calculate(stat = "t") -``` - -Then, to define a theoretical `"t"` distribution, we could write: - - -```r -# switch out `calculate()` with `assume()` to define a distribution -t_dist <- gss %>% - specify(response = hours) %>% - assume(distribution = "t") -``` - -From here, the theoretical distribution interfaces in the same way that simulation-based null distributions do. For example, to interface with p-values: - - -```r -# visualize the theoretical null distribution -visualize(t_dist) + - shade_p_value(obs_stat = obs_t, direction = "greater") -``` - - - -```r - -# more exactly, calculate the p-value -get_p_value(t_dist, obs_t, "greater") -#> # A tibble: 1 × 1 -#> p_value -#> -#> 1 0.0188 -``` - -Confidence intervals lie on the scale of the data rather than on the standardized scale of the theoretical distribution, so be sure to use the unstandardized observed statistic when working with confidence intervals. - - -```r -# calculate the point estimate -obs_mean <- gss %>% - specify(response = hours) %>% - calculate(stat = "mean") - -# find the theory-based confidence interval -theor_ci <- - get_confidence_interval( - x = t_dist, - level = .95, - point_estimate = obs_mean - ) - -theor_ci -#> # A tibble: 1 × 2 -#> lower_ci upper_ci -#> -#> 1 40.1 42.7 -``` - -When visualized, the `"t"` distribution will be recentered and rescaled to align with the scale of the observed data. - - -```r -# visualize the theoretical sampling distribution -visualize(t_dist) + - shade_confidence_interval(theor_ci) -``` - - - -## Multiple regression - -To accommodate randomization-based inference with multiple explanatory variables, the package implements an alternative workflow based on model fitting. Rather than `calculate()`ing statistics from resampled data, this side of the package allows you to `fit()` linear models on data resampled according to the null hypothesis, supplying model coefficients for each explanatory variable. For the most part, you can just switch out `calculate()` for `fit()` in your `calculate()`-based workflows. - -As an example, suppose that we want to fit `hours` worked per week using the respondent `age` and `college` completion status. We could first begin by fitting a linear model to the observed data. - - -```r -observed_fit <- gss %>% - specify(hours ~ age + college) %>% - fit() -``` - -Now, to generate null distributions for each of these terms, we can fit 1000 models to resamples of the `gss` dataset, where the response `hours` is permuted in each. Note that this code is the same as the above except for the addition of the `hypothesize()` and `generate()` step. - - -```r -null_fits <- gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% - fit() - -null_fits -#> # A tibble: 3,000 × 3 -#> # Groups: replicate [1,000] -#> replicate term estimate -#> -#> 1 1 intercept 43.5 -#> 2 1 age -0.0453 -#> 3 1 collegedegree -0.940 -#> 4 2 intercept 41.7 -#> 5 2 age -0.000105 -#> 6 2 collegedegree -1.03 -#> 7 3 intercept 39.4 -#> 8 3 age 0.0534 -#> 9 3 collegedegree -0.354 -#> 10 4 intercept 40.4 -#> # … with 2,990 more rows -``` - -To permute variables other than the response variable, the `variables` argument to `generate()` allows you to choose columns from the data to permute. Note that any derived effects that depend on these columns (e.g., interaction effects) will also be affected. - -Beyond this point, observed fits and distributions from null fits interface exactly like analogous outputs from `calculate()`. For instance, we can use the following code to calculate a 95% confidence interval from these objects. - - -```r -get_confidence_interval( - null_fits, - point_estimate = observed_fit, - level = .95 -) -#> # A tibble: 3 × 3 -#> term lower_ci upper_ci -#> -#> 1 age -0.0971 0.0872 -#> 2 collegedegree -2.79 2.61 -#> 3 intercept 37.8 45.5 -``` - -Or, we can shade p-values for each of these observed regression coefficients from the observed data. - - -```r -visualize(null_fits) + - shade_p_value(observed_fit, direction = "both") -``` - - - -That's it! This vignette covers most all of the key functionality of infer. See `help(package = "infer")` for a full list of functions and vignettes. - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/statistics/k-means/figs/unnamed-chunk-8-1.svg b/content/learn/statistics/k-means/figs/unnamed-chunk-8-1.svg deleted file mode 100644 index e071d098..00000000 --- a/content/learn/statistics/k-means/figs/unnamed-chunk-8-1.svg +++ /dev/null @@ -1,3108 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -7 - - - - - - - - - - -8 - - - - - - - - - - -9 - - - - - - - - - - -4 - - - - - - - - - - -5 - - - - - - - - - - -6 - - - - - - - - - - -1 - - - - - - - - - - -2 - - - - - - - - - - -3 - - - - - --5 -0 -5 - - - --5 -0 -5 - - - --5 -0 -5 --2.5 -0.0 -2.5 - - - --2.5 -0.0 -2.5 - - - --2.5 -0.0 -2.5 - - - -x1 -x2 - -.cluster - - - - - - - - - - - - - - - - - - -1 -2 -3 -4 -5 -6 -7 -8 -9 - - diff --git a/content/learn/statistics/k-means/index.Rmarkdown b/content/learn/statistics/k-means/index.Rmarkdown deleted file mode 100755 index 63ac2f0a..00000000 --- a/content/learn/statistics/k-means/index.Rmarkdown +++ /dev/null @@ -1,176 +0,0 @@ ---- -title: "K-means clustering with tidy data principles" -tags: [broom] -categories: [statistical analysis] -type: learn-subsection -weight: 2 -description: | - Summarize clustering characteristics and estimate the best number of clusters for a data set. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE} -library(tidymodels) -pkgs <- c("tidymodels") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - -## Introduction - -This article only requires the tidymodels package. - -K-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions: - -- `tidy()` -- `augment()` -- `glance()` - -Let's start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster: - -```{r} -library(tidymodels) - -set.seed(27) - -centers <- tibble( - cluster = factor(1:3), - num_points = c(100, 150, 50), # number points in each cluster - x1 = c(5, 0, -3), # x1 coordinate of cluster center - x2 = c(-1, 1, -2) # x2 coordinate of cluster center -) - -labelled_points <- - centers %>% - mutate( - x1 = map2(num_points, x1, rnorm), - x2 = map2(num_points, x2, rnorm) - ) %>% - select(-num_points) %>% - unnest(cols = c(x1, x2)) - -ggplot(labelled_points, aes(x1, x2, color = cluster)) + - geom_point(alpha = 0.3) -``` - -This is an ideal case for k-means clustering. - -## How does K-means work? - -Rather than using equations, this short animation using the [artwork](https://github.com/allisonhorst/stats-illustrations) of Allison Horst explains the clustering process: - -```{r illustrations, echo = FALSE, results = 'asis', fig.align="center"} -knitr::include_graphics("kmeans.gif") -``` - -## Clustering in R - -We'll use the built-in `kmeans()` function, which accepts a data frame with all numeric columns as it's primary argument. - -```{r} -points <- - labelled_points %>% - select(-cluster) - -kclust <- kmeans(points, centers = 3) -kclust -summary(kclust) -``` - -The output is a list of vectors, where each component has a different length. There's one of length `r nrow(points)`, the same as our original data set. There are two elements of length 3 (`withinss` and `tot.withinss`) and `centers` is a matrix with 3 rows. And then there are the elements of length 1: `totss`, `tot.withinss`, `betweenss`, and `iter`. (The value `ifault` indicates possible algorithm problems.) - -These differing lengths have important meaning when we want to tidy our data set; they signify that each type of component communicates a *different kind* of information. - -- `cluster` (`r nrow(points)` values) contains information about each *point* -- `centers`, `withinss`, and `size` (3 values) contain information about each *cluster* -- `totss`, `tot.withinss`, `betweenss`, and `iter` (1 value) contain information about the *full clustering* - -Which of these do we want to extract? There is no right answer; each of them may be interesting to an analyst. Because they communicate entirely different information (not to mention there's no straightforward way to combine them), they are extracted by separate functions. `augment` adds the point classifications to the original data set: - -```{r} -augment(kclust, points) -``` - -The `tidy()` function summarizes on a per-cluster level: - -```{r} -tidy(kclust) -``` - -And as it always does, the `glance()` function extracts a single-row summary: - -```{r} -glance(kclust) -``` - -## Exploratory clustering - -While these summaries are useful, they would not have been too difficult to extract out from the data set yourself. The real power comes from combining these analyses with other tools like [dplyr](https://dplyr.tidyverse.org/). - -Let's say we want to explore the effect of different choices of `k`, from 1 to 9, on this clustering. First cluster the data 9 times, each using a different value of `k`, then create columns containing the tidied, glanced and augmented data: - -```{r} -kclusts <- - tibble(k = 1:9) %>% - mutate( - kclust = map(k, ~kmeans(points, .x)), - tidied = map(kclust, tidy), - glanced = map(kclust, glance), - augmented = map(kclust, augment, points) - ) - -kclusts -``` - -We can turn these into three separate data sets each representing a different type of data: using `tidy()`, using `augment()`, and using `glance()`. Each of these goes into a separate data set as they represent different types of data. - -```{r} -clusters <- - kclusts %>% - unnest(cols = c(tidied)) - -assignments <- - kclusts %>% - unnest(cols = c(augmented)) - -clusterings <- - kclusts %>% - unnest(cols = c(glanced)) -``` - -Now we can plot the original points using the data from `augment()`, with each point colored according to the predicted cluster. - -```{r, fig.width = 7, fig.height = 7} -p1 <- - ggplot(assignments, aes(x = x1, y = x2)) + - geom_point(aes(color = .cluster), alpha = 0.8) + - facet_wrap(~ k) -p1 -``` - -Already we get a good sense of the proper number of clusters (3), and how the k-means algorithm functions when `k` is too high or too low. We can then add the centers of the cluster using the data from `tidy()`: - -```{r} -p2 <- p1 + geom_point(data = clusters, size = 10, shape = "x") -p2 -``` - -The data from `glance()` fills a different but equally important purpose; it lets us view trends of some summary statistics across values of `k`. Of particular interest is the total within sum of squares, saved in the `tot.withinss` column. - -```{r} -ggplot(clusterings, aes(k, tot.withinss)) + - geom_line() + - geom_point() -``` - -This represents the variance within the clusters. It decreases as `k` increases, but notice a bend (or "elbow") around `k = 3`. This bend indicates that additional clusters beyond the third have little value. (See [here](https://web.stanford.edu/~hastie/Papers/gap.pdf) for a more mathematically rigorous interpretation and implementation of this method). Thus, all three methods of tidying data provided by broom are useful for summarizing clustering output. - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/statistics/k-means/index.markdown b/content/learn/statistics/k-means/index.markdown deleted file mode 100644 index 8cb6c52d..00000000 --- a/content/learn/statistics/k-means/index.markdown +++ /dev/null @@ -1,292 +0,0 @@ ---- -title: "K-means clustering with tidy data principles" -tags: [broom] -categories: [statistical analysis] -type: learn-subsection -weight: 2 -description: | - Summarize clustering characteristics and estimate the best number of clusters for a data set. ---- - - - - - -## Introduction - -This article only requires the tidymodels package. - -K-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions: - -- `tidy()` -- `augment()` -- `glance()` - -Let's start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster: - - -```r -library(tidymodels) - -set.seed(27) - -centers <- tibble( - cluster = factor(1:3), - num_points = c(100, 150, 50), # number points in each cluster - x1 = c(5, 0, -3), # x1 coordinate of cluster center - x2 = c(-1, 1, -2) # x2 coordinate of cluster center -) - -labelled_points <- - centers %>% - mutate( - x1 = map2(num_points, x1, rnorm), - x2 = map2(num_points, x2, rnorm) - ) %>% - select(-num_points) %>% - unnest(cols = c(x1, x2)) - -ggplot(labelled_points, aes(x1, x2, color = cluster)) + - geom_point(alpha = 0.3) -``` - - - -This is an ideal case for k-means clustering. - -## How does K-means work? - -Rather than using equations, this short animation using the [artwork](https://github.com/allisonhorst/stats-illustrations) of Allison Horst explains the clustering process: - - - -## Clustering in R - -We'll use the built-in `kmeans()` function, which accepts a data frame with all numeric columns as it's primary argument. - - -```r -points <- - labelled_points %>% - select(-cluster) - -kclust <- kmeans(points, centers = 3) -kclust -#> K-means clustering with 3 clusters of sizes 148, 51, 101 -#> -#> Cluster means: -#> x1 x2 -#> 1 0.0885 1.05 -#> 2 -3.1429 -2.00 -#> 3 5.0040 -1.05 -#> -#> Clustering vector: -#> [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 -#> [38] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 -#> [75] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 -#> [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -#> [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -#> [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -#> [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 -#> [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 -#> [297] 2 2 2 2 -#> -#> Within cluster sum of squares by cluster: -#> [1] 299 109 243 -#> (between_SS / total_SS = 82.5 %) -#> -#> Available components: -#> -#> [1] "cluster" "centers" "totss" "withinss" "tot.withinss" -#> [6] "betweenss" "size" "iter" "ifault" -summary(kclust) -#> Length Class Mode -#> cluster 300 -none- numeric -#> centers 6 -none- numeric -#> totss 1 -none- numeric -#> withinss 3 -none- numeric -#> tot.withinss 1 -none- numeric -#> betweenss 1 -none- numeric -#> size 3 -none- numeric -#> iter 1 -none- numeric -#> ifault 1 -none- numeric -``` - -The output is a list of vectors, where each component has a different length. There's one of length 300, the same as our original data set. There are two elements of length 3 (`withinss` and `tot.withinss`) and `centers` is a matrix with 3 rows. And then there are the elements of length 1: `totss`, `tot.withinss`, `betweenss`, and `iter`. (The value `ifault` indicates possible algorithm problems.) - -These differing lengths have important meaning when we want to tidy our data set; they signify that each type of component communicates a *different kind* of information. - -- `cluster` (300 values) contains information about each *point* -- `centers`, `withinss`, and `size` (3 values) contain information about each *cluster* -- `totss`, `tot.withinss`, `betweenss`, and `iter` (1 value) contain information about the *full clustering* - -Which of these do we want to extract? There is no right answer; each of them may be interesting to an analyst. Because they communicate entirely different information (not to mention there's no straightforward way to combine them), they are extracted by separate functions. `augment` adds the point classifications to the original data set: - - -```r -augment(kclust, points) -#> # A tibble: 300 × 3 -#> x1 x2 .cluster -#> -#> 1 6.91 -2.74 3 -#> 2 6.14 -2.45 3 -#> 3 4.24 -0.946 3 -#> 4 3.54 0.287 3 -#> 5 3.91 0.408 3 -#> 6 5.30 -1.58 3 -#> 7 5.01 -1.77 3 -#> 8 6.16 -1.68 3 -#> 9 7.13 -2.17 3 -#> 10 5.24 -2.42 3 -#> # … with 290 more rows -``` - -The `tidy()` function summarizes on a per-cluster level: - - -```r -tidy(kclust) -#> # A tibble: 3 × 5 -#> x1 x2 size withinss cluster -#> -#> 1 0.0885 1.05 148 299. 1 -#> 2 -3.14 -2.00 51 109. 2 -#> 3 5.00 -1.05 101 243. 3 -``` - -And as it always does, the `glance()` function extracts a single-row summary: - - -```r -glance(kclust) -#> # A tibble: 1 × 4 -#> totss tot.withinss betweenss iter -#> -#> 1 3724. 651. 3073. 2 -``` - -## Exploratory clustering - -While these summaries are useful, they would not have been too difficult to extract out from the data set yourself. The real power comes from combining these analyses with other tools like [dplyr](https://dplyr.tidyverse.org/). - -Let's say we want to explore the effect of different choices of `k`, from 1 to 9, on this clustering. First cluster the data 9 times, each using a different value of `k`, then create columns containing the tidied, glanced and augmented data: - - -```r -kclusts <- - tibble(k = 1:9) %>% - mutate( - kclust = map(k, ~kmeans(points, .x)), - tidied = map(kclust, tidy), - glanced = map(kclust, glance), - augmented = map(kclust, augment, points) - ) - -kclusts -#> # A tibble: 9 × 5 -#> k kclust tidied glanced augmented -#> -#> 1 1 -#> 2 2 -#> 3 3 -#> 4 4 -#> 5 5 -#> 6 6 -#> 7 7 -#> 8 8 -#> 9 9 -``` - -We can turn these into three separate data sets each representing a different type of data: using `tidy()`, using `augment()`, and using `glance()`. Each of these goes into a separate data set as they represent different types of data. - - -```r -clusters <- - kclusts %>% - unnest(cols = c(tidied)) - -assignments <- - kclusts %>% - unnest(cols = c(augmented)) - -clusterings <- - kclusts %>% - unnest(cols = c(glanced)) -``` - -Now we can plot the original points using the data from `augment()`, with each point colored according to the predicted cluster. - - -```r -p1 <- - ggplot(assignments, aes(x = x1, y = x2)) + - geom_point(aes(color = .cluster), alpha = 0.8) + - facet_wrap(~ k) -p1 -``` - - - -Already we get a good sense of the proper number of clusters (3), and how the k-means algorithm functions when `k` is too high or too low. We can then add the centers of the cluster using the data from `tidy()`: - - -```r -p2 <- p1 + geom_point(data = clusters, size = 10, shape = "x") -p2 -``` - - - -The data from `glance()` fills a different but equally important purpose; it lets us view trends of some summary statistics across values of `k`. Of particular interest is the total within sum of squares, saved in the `tot.withinss` column. - - -```r -ggplot(clusterings, aes(k, tot.withinss)) + - geom_line() + - geom_point() -``` - - - -This represents the variance within the clusters. It decreases as `k` increases, but notice a bend (or "elbow") around `k = 3`. This bend indicates that additional clusters beyond the third have little value. (See [here](https://web.stanford.edu/~hastie/Papers/gap.pdf) for a more mathematically rigorous interpretation and implementation of this method). Thus, all three methods of tidying data provided by broom are useful for summarizing clustering output. - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/statistics/tidy-analysis/index.Rmarkdown b/content/learn/statistics/tidy-analysis/index.Rmarkdown deleted file mode 100755 index 96287bbd..00000000 --- a/content/learn/statistics/tidy-analysis/index.Rmarkdown +++ /dev/null @@ -1,197 +0,0 @@ ---- -title: "Correlation and regression fundamentals with tidy data principles" -tags: [broom] -categories: [statistical analysis] -type: learn-subsection -weight: 1 -description: | - Analyze the results of correlation tests and simple regression models for many data sets at once. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE} -library(tidymodels) -pkgs <- c("tidymodels") -theme_set(theme_bw() + theme(legend.position = "top")) -``` - -## Introduction - -This article only requires the tidymodels package. - -While the tidymodels package [broom](https://broom.tidyverse.org/) is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the `nest()/unnest()` functions from [tidyr](https://tidyr.tidyverse.org/) and the `map()` function in [purrr](https://purrr.tidyverse.org/). - -## Correlation analysis - -Let's demonstrate this with a simple data set, the built-in `Orange`. We start by coercing `Orange` to a `tibble`. This gives a nicer print method that will be especially useful later on when we start working with list-columns. - -```{r} -library(tidymodels) - -data(Orange) - -Orange <- as_tibble(Orange) -Orange -``` - -This contains 35 observations of three variables: `Tree`, `age`, and `circumference`. `Tree` is a factor with five levels describing five trees. As might be expected, age and circumference are correlated: - -```{r} -cor(Orange$age, Orange$circumference) - -library(ggplot2) - -ggplot(Orange, aes(age, circumference, color = Tree)) + - geom_line() -``` - -Suppose you want to test for correlations individually *within* each tree. You can do this with dplyr's `group_by`: - -```{r} -Orange %>% - group_by(Tree) %>% - summarize(correlation = cor(age, circumference)) -``` - -(Note that the correlations are much higher than the aggregated one, and also we can now see the correlation is similar across trees). - -Suppose that instead of simply estimating a correlation, we want to perform a hypothesis test with `cor.test()`: - -```{r} -ct <- cor.test(Orange$age, Orange$circumference) -ct -``` - -This test output contains multiple values we may be interested in. Some are vectors of length 1, such as the p-value and the estimate, and some are longer, such as the confidence interval. We can get this into a nicely organized tibble using the `tidy()` function: - -```{r} -tidy(ct) -``` - -Often, we want to perform multiple tests or fit multiple models, each on a different part of the data. In this case, we recommend a `nest-map-unnest` workflow. For example, suppose we want to perform correlation tests for each different tree. We start by `nest`ing our data based on the group of interest: - -```{r} -nested <- - Orange %>% - nest(data = c(age, circumference)) -``` - -Then we perform a correlation test for each nested tibble using `purrr::map()`: - -```{r} -nested %>% - mutate(test = map(data, ~ cor.test(.x$age, .x$circumference))) -``` - -This results in a list-column of S3 objects. We want to tidy each of the objects, which we can also do with `map()`. - -```{r} -nested %>% - mutate( - test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col - tidied = map(test, tidy) - ) -``` - -Finally, we want to unnest the tidied data frames so we can see the results in a flat tibble. All together, this looks like: - -```{r} -Orange %>% - nest(data = c(age, circumference)) %>% - mutate( - test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col - tidied = map(test, tidy) - ) %>% - unnest(cols = tidied) %>% - select(-data, -test) -``` - -## Regression models - -This type of workflow becomes even more useful when applied to regressions. Untidy output for a regression looks like: - -```{r} -lm_fit <- lm(age ~ circumference, data = Orange) -summary(lm_fit) -``` - -When we tidy these results, we get multiple rows of output for each model: - -```{r} -tidy(lm_fit) -``` - -Now we can handle multiple regressions at once using exactly the same workflow as before: - -```{r} -Orange %>% - nest(data = c(-Tree)) %>% - mutate( - fit = map(data, ~ lm(age ~ circumference, data = .x)), - tidied = map(fit, tidy) - ) %>% - unnest(tidied) %>% - select(-data, -fit) -``` - -You can just as easily use multiple predictors in the regressions, as shown here on the `mtcars` dataset. We nest the data into automatic vs. manual cars (the `am` column), then perform the regression within each nested tibble. - -```{r} -data(mtcars) -mtcars <- as_tibble(mtcars) # to play nicely with list-cols -mtcars - -mtcars %>% - nest(data = c(-am)) %>% - mutate( - fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), # S3 list-col - tidied = map(fit, tidy) - ) %>% - unnest(tidied) %>% - select(-data, -fit) -``` - -What if you want not just the `tidy()` output, but the `augment()` and `glance()` outputs as well, while still performing each regression only once? Since we're using list-columns, we can just fit the model once and use multiple list-columns to store the tidied, glanced and augmented outputs. - -```{r} -regressions <- - mtcars %>% - nest(data = c(-am)) %>% - mutate( - fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), - tidied = map(fit, tidy), - glanced = map(fit, glance), - augmented = map(fit, augment) - ) - -regressions %>% - select(tidied) %>% - unnest(tidied) - -regressions %>% - select(glanced) %>% - unnest(glanced) - -regressions %>% - select(augmented) %>% - unnest(augmented) -``` - -By combining the estimates and p-values across all groups into the same tidy data frame (instead of a list of output model objects), a new class of analyses and visualizations becomes straightforward. This includes: - -- sorting by p-value or estimate to find the most significant terms across all tests, -- p-value histograms, and -- volcano plots comparing p-values to effect size estimates. - -In each of these cases, we can easily filter, facet, or distinguish based on the `term` column. In short, this makes the tools of tidy data analysis available for the *results* of data analysis and models, not just the inputs. - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/statistics/tidy-analysis/index.markdown b/content/learn/statistics/tidy-analysis/index.markdown deleted file mode 100644 index 45fdc7f9..00000000 --- a/content/learn/statistics/tidy-analysis/index.markdown +++ /dev/null @@ -1,396 +0,0 @@ ---- -title: "Correlation and regression fundamentals with tidy data principles" -tags: [broom] -categories: [statistical analysis] -type: learn-subsection -weight: 1 -description: | - Analyze the results of correlation tests and simple regression models for many data sets at once. ---- - - - - - -## Introduction - -This article only requires the tidymodels package. - -While the tidymodels package [broom](https://broom.tidyverse.org/) is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the `nest()/unnest()` functions from [tidyr](https://tidyr.tidyverse.org/) and the `map()` function in [purrr](https://purrr.tidyverse.org/). - -## Correlation analysis - -Let's demonstrate this with a simple data set, the built-in `Orange`. We start by coercing `Orange` to a `tibble`. This gives a nicer print method that will be especially useful later on when we start working with list-columns. - - -```r -library(tidymodels) - -data(Orange) - -Orange <- as_tibble(Orange) -Orange -#> # A tibble: 35 × 3 -#> Tree age circumference -#> -#> 1 1 118 30 -#> 2 1 484 58 -#> 3 1 664 87 -#> 4 1 1004 115 -#> 5 1 1231 120 -#> 6 1 1372 142 -#> 7 1 1582 145 -#> 8 2 118 33 -#> 9 2 484 69 -#> 10 2 664 111 -#> # … with 25 more rows -``` - -This contains 35 observations of three variables: `Tree`, `age`, and `circumference`. `Tree` is a factor with five levels describing five trees. As might be expected, age and circumference are correlated: - - -```r -cor(Orange$age, Orange$circumference) -#> [1] 0.914 - -library(ggplot2) - -ggplot(Orange, aes(age, circumference, color = Tree)) + - geom_line() -``` - - - -Suppose you want to test for correlations individually *within* each tree. You can do this with dplyr's `group_by`: - - -```r -Orange %>% - group_by(Tree) %>% - summarize(correlation = cor(age, circumference)) -#> # A tibble: 5 × 2 -#> Tree correlation -#> -#> 1 3 0.988 -#> 2 1 0.985 -#> 3 5 0.988 -#> 4 2 0.987 -#> 5 4 0.984 -``` - -(Note that the correlations are much higher than the aggregated one, and also we can now see the correlation is similar across trees). - -Suppose that instead of simply estimating a correlation, we want to perform a hypothesis test with `cor.test()`: - - -```r -ct <- cor.test(Orange$age, Orange$circumference) -ct -#> -#> Pearson's product-moment correlation -#> -#> data: Orange$age and Orange$circumference -#> t = 13, df = 33, p-value = 2e-14 -#> alternative hypothesis: true correlation is not equal to 0 -#> 95 percent confidence interval: -#> 0.834 0.956 -#> sample estimates: -#> cor -#> 0.914 -``` - -This test output contains multiple values we may be interested in. Some are vectors of length 1, such as the p-value and the estimate, and some are longer, such as the confidence interval. We can get this into a nicely organized tibble using the `tidy()` function: - - -```r -tidy(ct) -#> # A tibble: 1 × 8 -#> estimate statistic p.value parameter conf.low conf.high method alternative -#> -#> 1 0.914 12.9 1.93e-14 33 0.834 0.956 Pearson'… two.sided -``` - -Often, we want to perform multiple tests or fit multiple models, each on a different part of the data. In this case, we recommend a `nest-map-unnest` workflow. For example, suppose we want to perform correlation tests for each different tree. We start by `nest`ing our data based on the group of interest: - - -```r -nested <- - Orange %>% - nest(data = c(age, circumference)) -``` - -Then we perform a correlation test for each nested tibble using `purrr::map()`: - - -```r -nested %>% - mutate(test = map(data, ~ cor.test(.x$age, .x$circumference))) -#> # A tibble: 5 × 3 -#> Tree data test -#> -#> 1 1 -#> 2 2 -#> 3 3 -#> 4 4 -#> 5 5 -``` - -This results in a list-column of S3 objects. We want to tidy each of the objects, which we can also do with `map()`. - - -```r -nested %>% - mutate( - test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col - tidied = map(test, tidy) - ) -#> # A tibble: 5 × 4 -#> Tree data test tidied -#> -#> 1 1 -#> 2 2 -#> 3 3 -#> 4 4 -#> 5 5 -``` - -Finally, we want to unnest the tidied data frames so we can see the results in a flat tibble. All together, this looks like: - - -```r -Orange %>% - nest(data = c(age, circumference)) %>% - mutate( - test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col - tidied = map(test, tidy) - ) %>% - unnest(cols = tidied) %>% - select(-data, -test) -#> # A tibble: 5 × 9 -#> Tree estimate statistic p.value parameter conf.low conf.high method -#> -#> 1 1 0.985 13.0 0.0000485 5 0.901 0.998 Pearson's pro… -#> 2 2 0.987 13.9 0.0000343 5 0.914 0.998 Pearson's pro… -#> 3 3 0.988 14.4 0.0000290 5 0.919 0.998 Pearson's pro… -#> 4 4 0.984 12.5 0.0000573 5 0.895 0.998 Pearson's pro… -#> 5 5 0.988 14.1 0.0000318 5 0.916 0.998 Pearson's pro… -#> # … with 1 more variable: alternative -``` - -## Regression models - -This type of workflow becomes even more useful when applied to regressions. Untidy output for a regression looks like: - - -```r -lm_fit <- lm(age ~ circumference, data = Orange) -summary(lm_fit) -#> -#> Call: -#> lm(formula = age ~ circumference, data = Orange) -#> -#> Residuals: -#> Min 1Q Median 3Q Max -#> -317.9 -140.9 -17.2 96.5 471.2 -#> -#> Coefficients: -#> Estimate Std. Error t value Pr(>|t|) -#> (Intercept) 16.604 78.141 0.21 0.83 -#> circumference 7.816 0.606 12.90 1.9e-14 *** -#> --- -#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -#> -#> Residual standard error: 203 on 33 degrees of freedom -#> Multiple R-squared: 0.835, Adjusted R-squared: 0.83 -#> F-statistic: 166 on 1 and 33 DF, p-value: 1.93e-14 -``` - -When we tidy these results, we get multiple rows of output for each model: - - -```r -tidy(lm_fit) -#> # A tibble: 2 × 5 -#> term estimate std.error statistic p.value -#> -#> 1 (Intercept) 16.6 78.1 0.212 8.33e- 1 -#> 2 circumference 7.82 0.606 12.9 1.93e-14 -``` - -Now we can handle multiple regressions at once using exactly the same workflow as before: - - -```r -Orange %>% - nest(data = c(-Tree)) %>% - mutate( - fit = map(data, ~ lm(age ~ circumference, data = .x)), - tidied = map(fit, tidy) - ) %>% - unnest(tidied) %>% - select(-data, -fit) -#> # A tibble: 10 × 6 -#> Tree term estimate std.error statistic p.value -#> -#> 1 1 (Intercept) -265. 98.6 -2.68 0.0436 -#> 2 1 circumference 11.9 0.919 13.0 0.0000485 -#> 3 2 (Intercept) -132. 83.1 -1.59 0.172 -#> 4 2 circumference 7.80 0.560 13.9 0.0000343 -#> 5 3 (Intercept) -210. 85.3 -2.46 0.0574 -#> 6 3 circumference 12.0 0.835 14.4 0.0000290 -#> 7 4 (Intercept) -76.5 88.3 -0.867 0.426 -#> 8 4 circumference 7.17 0.572 12.5 0.0000573 -#> 9 5 (Intercept) -54.5 76.9 -0.709 0.510 -#> 10 5 circumference 8.79 0.621 14.1 0.0000318 -``` - -You can just as easily use multiple predictors in the regressions, as shown here on the `mtcars` dataset. We nest the data into automatic vs. manual cars (the `am` column), then perform the regression within each nested tibble. - - -```r -data(mtcars) -mtcars <- as_tibble(mtcars) # to play nicely with list-cols -mtcars -#> # A tibble: 32 × 11 -#> mpg cyl disp hp drat wt qsec vs am gear carb -#> -#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 -#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 -#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 -#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 -#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 -#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 -#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 -#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 -#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 -#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 -#> # … with 22 more rows - -mtcars %>% - nest(data = c(-am)) %>% - mutate( - fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), # S3 list-col - tidied = map(fit, tidy) - ) %>% - unnest(tidied) %>% - select(-data, -fit) -#> # A tibble: 8 × 6 -#> am term estimate std.error statistic p.value -#> -#> 1 1 (Intercept) 4.28 3.46 1.24 0.247 -#> 2 1 mpg -0.101 0.0294 -3.43 0.00750 -#> 3 1 qsec 0.0398 0.151 0.264 0.798 -#> 4 1 gear -0.0229 0.349 -0.0656 0.949 -#> 5 0 (Intercept) 4.92 1.40 3.52 0.00309 -#> 6 0 mpg -0.192 0.0443 -4.33 0.000591 -#> 7 0 qsec 0.0919 0.0983 0.935 0.365 -#> 8 0 gear 0.147 0.368 0.398 0.696 -``` - -What if you want not just the `tidy()` output, but the `augment()` and `glance()` outputs as well, while still performing each regression only once? Since we're using list-columns, we can just fit the model once and use multiple list-columns to store the tidied, glanced and augmented outputs. - - -```r -regressions <- - mtcars %>% - nest(data = c(-am)) %>% - mutate( - fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), - tidied = map(fit, tidy), - glanced = map(fit, glance), - augmented = map(fit, augment) - ) - -regressions %>% - select(tidied) %>% - unnest(tidied) -#> # A tibble: 8 × 5 -#> term estimate std.error statistic p.value -#> -#> 1 (Intercept) 4.28 3.46 1.24 0.247 -#> 2 mpg -0.101 0.0294 -3.43 0.00750 -#> 3 qsec 0.0398 0.151 0.264 0.798 -#> 4 gear -0.0229 0.349 -0.0656 0.949 -#> 5 (Intercept) 4.92 1.40 3.52 0.00309 -#> 6 mpg -0.192 0.0443 -4.33 0.000591 -#> 7 qsec 0.0919 0.0983 0.935 0.365 -#> 8 gear 0.147 0.368 0.398 0.696 - -regressions %>% - select(glanced) %>% - unnest(glanced) -#> # A tibble: 2 × 12 -#> r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC -#> -#> 1 0.833 0.778 0.291 15.0 0.000759 3 -0.00580 10.0 12.8 -#> 2 0.625 0.550 0.522 8.32 0.00170 3 -12.4 34.7 39.4 -#> # … with 3 more variables: deviance , df.residual , nobs - -regressions %>% - select(augmented) %>% - unnest(augmented) -#> # A tibble: 32 × 10 -#> wt mpg qsec gear .fitted .resid .hat .sigma .cooksd .std.resid -#> -#> 1 2.62 21 16.5 4 2.73 -0.107 0.517 0.304 0.0744 -0.527 -#> 2 2.88 21 17.0 4 2.75 0.126 0.273 0.304 0.0243 0.509 -#> 3 2.32 22.8 18.6 4 2.63 -0.310 0.312 0.279 0.188 -1.29 -#> 4 2.2 32.4 19.5 4 1.70 0.505 0.223 0.233 0.278 1.97 -#> 5 1.62 30.4 18.5 4 1.86 -0.244 0.269 0.292 0.0889 -0.982 -#> 6 1.84 33.9 19.9 4 1.56 0.274 0.286 0.286 0.125 1.12 -#> 7 1.94 27.3 18.9 4 2.19 -0.253 0.151 0.293 0.0394 -0.942 -#> 8 2.14 26 16.7 5 2.21 -0.0683 0.277 0.307 0.00732 -0.276 -#> 9 1.51 30.4 16.9 5 1.77 -0.259 0.430 0.284 0.263 -1.18 -#> 10 3.17 15.8 14.5 5 3.15 0.0193 0.292 0.308 0.000644 0.0789 -#> # … with 22 more rows -``` - -By combining the estimates and p-values across all groups into the same tidy data frame (instead of a list of output model objects), a new class of analyses and visualizations becomes straightforward. This includes: - -- sorting by p-value or estimate to find the most significant terms across all tests, -- p-value histograms, and -- volcano plots comparing p-values to effect size estimates. - -In each of these cases, we can easily filter, facet, or distinguish based on the `term` column. In short, this makes the tools of tidy data analysis available for the *results* of data analysis and models, not just the inputs. - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/statistics/xtabs/figs/visualize-indep-1.svg b/content/learn/statistics/xtabs/figs/visualize-indep-1.svg deleted file mode 100644 index 251cba6c..00000000 --- a/content/learn/statistics/xtabs/figs/visualize-indep-1.svg +++ /dev/null @@ -1,5092 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -400 -800 -1200 - - - - - - - - - -0 -5 -10 -15 -20 -stat -count -Simulation-Based Null Distribution - - diff --git a/content/learn/statistics/xtabs/figs/visualize-indep-both-1.svg b/content/learn/statistics/xtabs/figs/visualize-indep-both-1.svg deleted file mode 100644 index 3bcd083d..00000000 --- a/content/learn/statistics/xtabs/figs/visualize-indep-both-1.svg +++ /dev/null @@ -1,5093 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.05 -0.10 -0.15 - - - - - - - - - -0 -5 -10 -15 -20 -Chi-Square stat -density -Simulation-Based and Theoretical Chi-Square Null Distributions - - diff --git a/content/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg b/content/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg deleted file mode 100644 index 64483773..00000000 --- a/content/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg +++ /dev/null @@ -1,5087 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -500 -1000 -1500 - - - - - - - - -0 -10 -20 -30 -stat -count -Simulation-Based Null Distribution - - diff --git a/content/learn/statistics/xtabs/index.Rmarkdown b/content/learn/statistics/xtabs/index.Rmarkdown deleted file mode 100644 index 9d9928cd..00000000 --- a/content/learn/statistics/xtabs/index.Rmarkdown +++ /dev/null @@ -1,221 +0,0 @@ ---- -title: "Statistical analysis of contingency tables" -tags: [infer] -categories: [statistical analysis] -type: learn-subsection -weight: 5 -description: | - Use tests of independence and goodness of fit to analyze tables of counts. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE} -library(tidymodels) -library(sessioninfo) -pkgs <- c("tidymodels") -theme_set(theme_bw() + theme(legend.position = "top")) -set.seed(1234) -``` - - -## Introduction - -This article only requires that you have the tidymodels package installed. - -In this vignette, we'll walk through conducting a $\chi^2$ (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We'll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we'll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution. - -Throughout this vignette, we'll make use of the `ad_data` data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from [Craig-Schapiro _et al_ (2011)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3079734/). See `?ad_data` for more information on the variables included and their source. One of the main research questions in these data were how a person's genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows: - -```{r glimpse-ad_data-actual, warning = FALSE, message = FALSE} -library(tidymodels) # Includes the infer package - -data(ad_data, package = "modeldata") -ad_data %>% - select(Genotype, Class) -``` - -The three main genetic variants are called E2, E3, and E4. The values in `Genotype` represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of "E2E4" means E2 from one parent and E4 from the other). - -## Test of independence - -To carry out a chi-squared test of independence, we'll examine the association between their cognitive ability (impaired and healthy) and the genetic makeup. This is what the relationship looks like in the sample data: - -```{r plot-indep, echo = FALSE} -ad_data %>% - ggplot() + - aes(y = Genotype, fill = Class) + - geom_bar(position = "fill") + - scale_fill_brewer(type = "qual") + - labs(y = "Genotype: Apolipoprotein E Genetics", - x = "Proportion") -``` - -If there were no relationship, we would expect to see the purple bars reaching to the same length, regardless of cognitive ability. Are the differences we see here, though, just due to random noise? - -First, to calculate the observed statistic, we can use `specify()` and `calculate()`. - -```{r calc-obs-stat-indep, warning = FALSE, message = FALSE} -# calculate the observed statistic -observed_indep_statistic <- ad_data %>% - specify(Genotype ~ Class) %>% - calculate(stat = "Chisq") -``` - -The observed $\chi^2$ statistic is `r observed_indep_statistic`. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between cognitive ability and genetics. - -We can `generate()` the null distribution in one of two ways: using randomization or theory-based methods. The randomization approach permutes the response and explanatory variables, so that each person's genetics is matched up with a random cognitive rating from the sample in order to break up any association between the two. - -```{r generate-null-indep, warning = FALSE, message = FALSE} -# generate the null distribution using randomization -null_distribution_simulated <- ad_data %>% - specify(Genotype ~ Class) %>% - hypothesize(null = "independence") %>% - generate(reps = 5000, type = "permute") %>% - calculate(stat = "Chisq") -``` - -Note that, in the line `specify(Genotype ~ Class)` above, we could use the equivalent syntax `specify(response = Genotype, explanatory = Class)`. The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization. - -```{r generate-null-indep-t, warning = FALSE, message = FALSE} -# generate the null distribution by theoretical approximation -null_distribution_theoretical <- ad_data %>% - specify(Genotype ~ Class) %>% - hypothesize(null = "independence") %>% - # note that we skip the generation step here! - calculate(stat = "Chisq") -``` - -To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: - -```{r visualize-indep, warning = FALSE, message = FALSE} -# visualize the null distribution and test statistic! -null_distribution_simulated %>% - visualize() + - shade_p_value(observed_indep_statistic, - direction = "greater") -``` - -We could also visualize the observed statistic against the theoretical null distribution. Note that we skip the `generate()` and `calculate()` steps when using the theoretical approach, and that we now need to provide `method = "theoretical"` to `visualize()`. - -```{r visualize-indep-theor, warning = FALSE, message = FALSE} -# visualize the theoretical null distribution and test statistic! -ad_data %>% - specify(Genotype ~ Class) %>% - hypothesize(null = "independence") %>% - visualize(method = "theoretical") + - shade_p_value(observed_indep_statistic, - direction = "greater") -``` - -To visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and further provide `method = "both"`. - -```{r visualize-indep-both, warning = FALSE, message = FALSE} -# visualize both null distributions and the test statistic! -null_distribution_simulated %>% - visualize(method = "both") + - shade_p_value(observed_indep_statistic, - direction = "greater") -``` - -Either way, it looks like our observed test statistic would be fairly unlikely if there were actually no association between cognition and genotype. More exactly, we can calculate the p-value: - -```{r p-value-indep, warning = FALSE, message = FALSE} -# calculate the p value from the observed statistic and null distribution -p_value_independence <- null_distribution_simulated %>% - get_p_value(obs_stat = observed_indep_statistic, - direction = "greater") - -p_value_independence -``` - -Thus, if there were really no relationship between cognition and genotype, the probability that we would see a statistic as or more extreme than `r observed_indep_statistic` is approximately `r p_value_independence`. - -Note that, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this: - -```{r chisq-indep-wrapper, message = FALSE, warning = FALSE} -chisq_test(ad_data, Genotype ~ Class) -``` - - -## Goodness of fit - -Now, moving on to a chi-squared goodness of fit test, we'll take a look at just the genotype data. Many papers have investigated the relationship of Apolipoprotein E to diseases. For example, [Song _et al_ (2004)](https://annals.org/aim/article-abstract/717641/meta-analysis-apolipoprotein-e-genotypes-risk-coronary-heart-disease) conducted a meta-analysis of numerous studies that looked at this gene and heart disease. In their paper, they describe the frequency of the different genotypes across many samples. For the cognition study, it might be interesting to see if our sample of genotypes was consistent with this literature (treating the rates, for this analysis, as known). - -The rates of the meta-analysis and our observed data are: - -```{r rates} -# Song, Y., Stampfer, M. J., & Liu, S. (2004). Meta-Analysis: Apolipoprotein E -# Genotypes and Risk for Coronary Heart Disease. Annals of Internal Medicine, -# 141(2), 137. -meta_rates <- c("E2E2" = 0.71, "E2E3" = 11.4, "E2E4" = 2.32, - "E3E3" = 61.0, "E3E4" = 22.6, "E4E4" = 2.22) -meta_rates <- meta_rates/sum(meta_rates) # these add up to slightly > 100% - -obs_rates <- table(ad_data$Genotype)/nrow(ad_data) -round(cbind(obs_rates, meta_rates) * 100, 2) -``` - -Suppose our null hypothesis is that `Genotype` follows the same frequency distribution as the meta-analysis. Lets now test whether this difference in distributions is statistically significant. - -First, to carry out this hypothesis test, we would calculate our observed statistic. - -```{r observed-gof-statistic, warning = FALSE, message = FALSE} -# calculating the null distribution -observed_gof_statistic <- ad_data %>% - specify(response = Genotype) %>% - hypothesize(null = "point", p = meta_rates) %>% - calculate(stat = "Chisq") -``` - -The observed statistic is `r observed_gof_statistic`. Now, generating a null distribution, by just dropping in a call to `generate()`: - - -```{r null-distribution-gof, warning = FALSE, message = FALSE} -# generating a null distribution -null_distribution_gof <- ad_data %>% - specify(response = Genotype) %>% - hypothesize(null = "point", p = meta_rates) %>% - generate(reps = 5000, type = "draw") %>% - calculate(stat = "Chisq") -``` - -Again, to get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: - -```{r visualize-indep-gof, warning = FALSE, message = FALSE} -# visualize the null distribution and test statistic! -null_distribution_gof %>% - visualize() + - shade_p_value(observed_gof_statistic, - direction = "greater") -``` - -This statistic seems like it would be unlikely if our rates were the same as the rates from the meta-analysis! How unlikely, though? Calculating the p-value: - -```{r get-p-value-gof, warning = FALSE, message = FALSE} -# calculate the p-value -p_value_gof <- null_distribution_gof %>% - get_p_value(observed_gof_statistic, - direction = "greater") - -p_value_gof -``` - -Thus, if each genotype occurred at the same rate as the Song paper, the probability that we would see a distribution like the one we did is approximately `r p_value_gof`. - -Again, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out chi-squared goodness of fit tests on tidy data. The syntax goes like this: - -```{r chisq-gof-wrapper, message = FALSE, warning = FALSE} -chisq_test(ad_data, response = Genotype, p = meta_rates) -``` - - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/statistics/xtabs/index.markdown b/content/learn/statistics/xtabs/index.markdown deleted file mode 100644 index 37ad4930..00000000 --- a/content/learn/statistics/xtabs/index.markdown +++ /dev/null @@ -1,299 +0,0 @@ ---- -title: "Statistical analysis of contingency tables" -tags: [infer] -categories: [statistical analysis] -type: learn-subsection -weight: 5 -description: | - Use tests of independence and goodness of fit to analyze tables of counts. ---- - - - - - - -## Introduction - -This article only requires that you have the tidymodels package installed. - -In this vignette, we'll walk through conducting a `\(\chi^2\)` (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We'll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we'll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution. - -Throughout this vignette, we'll make use of the `ad_data` data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from [Craig-Schapiro _et al_ (2011)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3079734/). See `?ad_data` for more information on the variables included and their source. One of the main research questions in these data were how a person's genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows: - - -```r -library(tidymodels) # Includes the infer package - -data(ad_data, package = "modeldata") -ad_data %>% - select(Genotype, Class) -#> # A tibble: 333 × 2 -#> Genotype Class -#> -#> 1 E3E3 Control -#> 2 E3E4 Control -#> 3 E3E4 Control -#> 4 E3E4 Control -#> 5 E3E3 Control -#> 6 E4E4 Impaired -#> 7 E2E3 Control -#> 8 E2E3 Control -#> 9 E3E3 Control -#> 10 E2E3 Impaired -#> # … with 323 more rows -``` - -The three main genetic variants are called E2, E3, and E4. The values in `Genotype` represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of "E2E4" means E2 from one parent and E4 from the other). - -## Test of independence - -To carry out a chi-squared test of independence, we'll examine the association between their cognitive ability (impaired and healthy) and the genetic makeup. This is what the relationship looks like in the sample data: - - - -If there were no relationship, we would expect to see the purple bars reaching to the same length, regardless of cognitive ability. Are the differences we see here, though, just due to random noise? - -First, to calculate the observed statistic, we can use `specify()` and `calculate()`. - - -```r -# calculate the observed statistic -observed_indep_statistic <- ad_data %>% - specify(Genotype ~ Class) %>% - calculate(stat = "Chisq") -``` - -The observed `\(\chi^2\)` statistic is 21.577. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between cognitive ability and genetics. - -We can `generate()` the null distribution in one of two ways: using randomization or theory-based methods. The randomization approach permutes the response and explanatory variables, so that each person's genetics is matched up with a random cognitive rating from the sample in order to break up any association between the two. - - -```r -# generate the null distribution using randomization -null_distribution_simulated <- ad_data %>% - specify(Genotype ~ Class) %>% - hypothesize(null = "independence") %>% - generate(reps = 5000, type = "permute") %>% - calculate(stat = "Chisq") -``` - -Note that, in the line `specify(Genotype ~ Class)` above, we could use the equivalent syntax `specify(response = Genotype, explanatory = Class)`. The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization. - - -```r -# generate the null distribution by theoretical approximation -null_distribution_theoretical <- ad_data %>% - specify(Genotype ~ Class) %>% - hypothesize(null = "independence") %>% - # note that we skip the generation step here! - calculate(stat = "Chisq") -``` - -To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: - - -```r -# visualize the null distribution and test statistic! -null_distribution_simulated %>% - visualize() + - shade_p_value(observed_indep_statistic, - direction = "greater") -``` - - - -We could also visualize the observed statistic against the theoretical null distribution. Note that we skip the `generate()` and `calculate()` steps when using the theoretical approach, and that we now need to provide `method = "theoretical"` to `visualize()`. - - -```r -# visualize the theoretical null distribution and test statistic! -ad_data %>% - specify(Genotype ~ Class) %>% - hypothesize(null = "independence") %>% - visualize(method = "theoretical") + - shade_p_value(observed_indep_statistic, - direction = "greater") -``` - - - -To visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and further provide `method = "both"`. - - -```r -# visualize both null distributions and the test statistic! -null_distribution_simulated %>% - visualize(method = "both") + - shade_p_value(observed_indep_statistic, - direction = "greater") -``` - - - -Either way, it looks like our observed test statistic would be fairly unlikely if there were actually no association between cognition and genotype. More exactly, we can calculate the p-value: - - -```r -# calculate the p value from the observed statistic and null distribution -p_value_independence <- null_distribution_simulated %>% - get_p_value(obs_stat = observed_indep_statistic, - direction = "greater") - -p_value_independence -#> # A tibble: 1 × 1 -#> p_value -#> -#> 1 0.0006 -``` - -Thus, if there were really no relationship between cognition and genotype, the probability that we would see a statistic as or more extreme than 21.577 is approximately 6\times 10^{-4}. - -Note that, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this: - - -```r -chisq_test(ad_data, Genotype ~ Class) -#> # A tibble: 1 × 3 -#> statistic chisq_df p_value -#> -#> 1 21.6 5 0.000630 -``` - - -## Goodness of fit - -Now, moving on to a chi-squared goodness of fit test, we'll take a look at just the genotype data. Many papers have investigated the relationship of Apolipoprotein E to diseases. For example, [Song _et al_ (2004)](https://annals.org/aim/article-abstract/717641/meta-analysis-apolipoprotein-e-genotypes-risk-coronary-heart-disease) conducted a meta-analysis of numerous studies that looked at this gene and heart disease. In their paper, they describe the frequency of the different genotypes across many samples. For the cognition study, it might be interesting to see if our sample of genotypes was consistent with this literature (treating the rates, for this analysis, as known). - -The rates of the meta-analysis and our observed data are: - - -```r -# Song, Y., Stampfer, M. J., & Liu, S. (2004). Meta-Analysis: Apolipoprotein E -# Genotypes and Risk for Coronary Heart Disease. Annals of Internal Medicine, -# 141(2), 137. -meta_rates <- c("E2E2" = 0.71, "E2E3" = 11.4, "E2E4" = 2.32, - "E3E3" = 61.0, "E3E4" = 22.6, "E4E4" = 2.22) -meta_rates <- meta_rates/sum(meta_rates) # these add up to slightly > 100% - -obs_rates <- table(ad_data$Genotype)/nrow(ad_data) -round(cbind(obs_rates, meta_rates) * 100, 2) -#> obs_rates meta_rates -#> E2E2 0.6 0.71 -#> E2E3 11.1 11.37 -#> E2E4 2.4 2.31 -#> E3E3 50.1 60.85 -#> E3E4 31.8 22.54 -#> E4E4 3.9 2.21 -``` - -Suppose our null hypothesis is that `Genotype` follows the same frequency distribution as the meta-analysis. Lets now test whether this difference in distributions is statistically significant. - -First, to carry out this hypothesis test, we would calculate our observed statistic. - - -```r -# calculating the null distribution -observed_gof_statistic <- ad_data %>% - specify(response = Genotype) %>% - hypothesize(null = "point", p = meta_rates) %>% - calculate(stat = "Chisq") -``` - -The observed statistic is 23.384. Now, generating a null distribution, by just dropping in a call to `generate()`: - - - -```r -# generating a null distribution -null_distribution_gof <- ad_data %>% - specify(response = Genotype) %>% - hypothesize(null = "point", p = meta_rates) %>% - generate(reps = 5000, type = "draw") %>% - calculate(stat = "Chisq") -``` - -Again, to get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: - - -```r -# visualize the null distribution and test statistic! -null_distribution_gof %>% - visualize() + - shade_p_value(observed_gof_statistic, - direction = "greater") -``` - - - -This statistic seems like it would be unlikely if our rates were the same as the rates from the meta-analysis! How unlikely, though? Calculating the p-value: - - -```r -# calculate the p-value -p_value_gof <- null_distribution_gof %>% - get_p_value(observed_gof_statistic, - direction = "greater") - -p_value_gof -#> # A tibble: 1 × 1 -#> p_value -#> -#> 1 0.001 -``` - -Thus, if each genotype occurred at the same rate as the Song paper, the probability that we would see a distribution like the one we did is approximately 0.001. - -Again, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out chi-squared goodness of fit tests on tidy data. The syntax goes like this: - - -```r -chisq_test(ad_data, response = Genotype, p = meta_rates) -#> # A tibble: 1 × 3 -#> statistic chisq_df p_value -#> -#> 1 23.4 5 0.000285 -``` - - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/work/bayes-opt/figs/bo-param-plot-1.svg b/content/learn/work/bayes-opt/figs/bo-param-plot-1.svg deleted file mode 100644 index bbd5a756..00000000 --- a/content/learn/work/bayes-opt/figs/bo-param-plot-1.svg +++ /dev/null @@ -1,380 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# Components - - - - - - - - - - -Cost (log-2) - - - - - - - - - - -Radial Basis Function sigma (log-10) - - - - - - - - -0 -10 -20 -30 -40 -50 - - - - - - -0 -10 -20 -30 -40 -50 - - - - - - -0 -10 -20 -30 -40 -50 --10.0 --7.5 --5.0 --2.5 -0.0 - - - - - --10 --5 -0 -5 - - - - -0 -5 -10 -15 -20 - - - - - -Iterations - - diff --git a/content/learn/work/bayes-opt/figs/bo-plot-1.svg b/content/learn/work/bayes-opt/figs/bo-plot-1.svg deleted file mode 100644 index 7b81ae14..00000000 --- a/content/learn/work/bayes-opt/figs/bo-plot-1.svg +++ /dev/null @@ -1,280 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.25 -0.50 -0.75 - - - - - - - - - -0 -10 -20 -30 -40 -50 -Iteration -roc_auc - - diff --git a/content/learn/work/bayes-opt/index.Rmarkdown b/content/learn/work/bayes-opt/index.Rmarkdown deleted file mode 100755 index 157cecd2..00000000 --- a/content/learn/work/bayes-opt/index.Rmarkdown +++ /dev/null @@ -1,191 +0,0 @@ ---- -title: "Iterative Bayesian optimization of a classification model" -tags: [tune, dials, parsnip, recipes, workflows] -categories: [model tuning] -type: learn-subsection -weight: 3 -description: | - Identify the best hyperparameters for a model using Bayesian optimization of iterative search. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include=FALSE} -library(tidymodels) -library(tune) -library(kernlab) -library(rlang) -library(doMC) -library(themis) -registerDoMC(cores = parallel::detectCores()) - -pkgs <- c("modeldata", "kernlab", "tidymodels", "themis") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - -## Introduction - -`r req_pkgs(pkgs)` - -Many of the examples for model tuning focus on [grid search](/learn/work/tune-svm/). For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, _iterative search_ can be used to analyze the existing tuning parameter results and then _predict_ which tuning parameters to try next. - -There are a variety of methods for iterative search and the focus in this article is on _Bayesian optimization_. For more information on this method, these resources might be helpful: - -* [_Practical bayesian optimization of machine learning algorithms_](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=Practical+Bayesian+Optimization+of+Machine+Learning+Algorithms&btnG=) (2012). J Snoek, H Larochelle, and RP Adams. Advances in neural information. - -* [_A Tutorial on Bayesian Optimization for Machine Learning_](https://www.cs.toronto.edu/~rgrosse/courses/csc411_f18/tutorials/tut8_adams_slides.pdf) (2018). R Adams. - - * [_Gaussian Processes for Machine Learning_](http://www.gaussianprocess.org/gpml/) (2006). C E Rasmussen and C Williams. - -* [Other articles!](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q="Bayesian+Optimization"&btnG=) - - -## Cell segmenting revisited - -To demonstrate this approach to tuning models, let's return to the cell segmentation data from the [Getting Started](/start/resampling/) article on resampling: - -```{r import-data} -library(tidymodels) -library(modeldata) - -# Load data -data(cells) - -set.seed(2369) -tr_te_split <- initial_split(cells %>% select(-case), prop = 3/4) -cell_train <- training(tr_te_split) -cell_test <- testing(tr_te_split) - -set.seed(1697) -folds <- vfold_cv(cell_train, v = 10) -``` - -## The tuning scheme - -Since the predictors are highly correlated, we can used a recipe to convert the original predictors to principal component scores. There is also slight class imbalance in these data; about `r floor(mean(cells$class == "PS") * 100)`% of the data are poorly segmented. To mitigate this, the data will be down-sampled at the end of the pre-processing so that the number of poorly and well segmented cells occur with equal frequency. We can use a recipe for all this pre-processing, but the number of principal components will need to be _tuned_ so that we have enough (but not too many) representations of the data. - -```{r recipe} -library(themis) - -cell_pre_proc <- - recipe(class ~ ., data = cell_train) %>% - step_YeoJohnson(all_predictors()) %>% - step_normalize(all_predictors()) %>% - step_pca(all_predictors(), num_comp = tune()) %>% - step_downsample(class) -``` - -In this analysis, we will use a support vector machine to model the data. Let's use a radial basis function (RBF) kernel and tune its main parameter ($\sigma$). Additionally, the main SVM parameter, the cost value, also needs optimization. - -```{r model} -svm_mod <- - svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) %>% - set_engine("kernlab") -``` - -These two objects (the recipe and model) will be combined into a single object via the `workflow()` function from the [workflows](https://workflows.tidymodels.org/) package; this object will be used in the optimization process. - -```{r workflow} -svm_wflow <- - workflow() %>% - add_model(svm_mod) %>% - add_recipe(cell_pre_proc) -``` - -From this object, we can derive information about what parameters are slated to be tuned. A parameter set is derived by: - -```{r pset} -svm_set <- extract_parameter_set_dials(svm_wflow) -svm_set -``` - -The default range for the number of PCA components is rather small for this data set. A member of the parameter set can be modified using the `update()` function. Let's constrain the search to one to twenty components by updating the `num_comp` parameter. Additionally, the lower bound of this parameter is set to zero which specifies that the original predictor set should also be evaluated (i.e., with no PCA step at all): - -```{r update} -svm_set <- - svm_set %>% - update(num_comp = num_comp(c(0L, 20L))) -``` - -## Sequential tuning - -Bayesian optimization is a sequential method that uses a model to predict new candidate parameters for assessment. When scoring potential parameter value, the mean and variance of performance are predicted. The strategy used to define how these two statistical quantities are used is defined by an _acquisition function_. - -For example, one approach for scoring new candidates is to use a confidence bound. Suppose accuracy is being optimized. For a metric that we want to maximize, a lower confidence bound can be used. The multiplier on the standard error (denoted as $\kappa$) is a value that can be used to make trade-offs between **exploration** and **exploitation**. - - * **Exploration** means that the search will consider candidates in untested space. - - * **Exploitation** focuses in areas where the previous best results occurred. - -The variance predicted by the Bayesian model is mostly spatial variation; the value will be large for candidate values that are not close to values that have already been evaluated. If the standard error multiplier is high, the search process will be more likely to avoid areas without candidate values in the vicinity. - -We'll use another acquisition function, _expected improvement_, that determines which candidates are likely to be helpful relative to the current best results. This is the default acquisition function. More information on these functions can be found in the [package vignette for acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html). - -```{r search, cache = TRUE} -set.seed(12) -search_res <- - svm_wflow %>% - tune_bayes( - resamples = folds, - # To use non-default parameter ranges - param_info = svm_set, - # Generate five at semi-random to start - initial = 5, - iter = 50, - # How to measure performance? - metrics = metric_set(roc_auc), - control = control_bayes(no_improve = 30, verbose = TRUE) - ) -``` - -The resulting tibble is a stacked set of rows of the rsample object with an additional column for the iteration number: - -```{r show-iters} -search_res -``` - -As with grid search, we can summarize the results over resamples: - -```{r summarize-iters} -estimates <- - collect_metrics(search_res) %>% - arrange(.iter) - -estimates -``` - - -The best performance of the initial set of candidate values was `AUC = `r max(estimates$mean[estimates$.iter == 0])` `. The best results were achieved at iteration `r estimates$.iter[which.max(estimates$mean)]` with a corresponding AUC value of `r max(estimates$mean)`. The five best results are: - -```{r best} -show_best(search_res, metric = "roc_auc") -``` - -A plot of the search iterations can be created via: - -```{r bo-plot} -autoplot(search_res, type = "performance") -``` - -There are many parameter combinations have roughly equivalent results. - -How did the parameters change over iterations? - - -```{r bo-param-plot, fig.width=9} -autoplot(search_res, type = "parameters") + - labs(x = "Iterations", y = NULL) -``` - - - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` - diff --git a/content/learn/work/bayes-opt/index.markdown b/content/learn/work/bayes-opt/index.markdown deleted file mode 100644 index 30d884e0..00000000 --- a/content/learn/work/bayes-opt/index.markdown +++ /dev/null @@ -1,879 +0,0 @@ ---- -title: "Iterative Bayesian optimization of a classification model" -tags: [tune, dials, parsnip, recipes, workflows] -categories: [model tuning] -type: learn-subsection -weight: 3 -description: | - Identify the best hyperparameters for a model using Bayesian optimization of iterative search. ---- - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: kernlab, modeldata, themis, and tidymodels. - -Many of the examples for model tuning focus on [grid search](/learn/work/tune-svm/). For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, _iterative search_ can be used to analyze the existing tuning parameter results and then _predict_ which tuning parameters to try next. - -There are a variety of methods for iterative search and the focus in this article is on _Bayesian optimization_. For more information on this method, these resources might be helpful: - -* [_Practical bayesian optimization of machine learning algorithms_](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=Practical+Bayesian+Optimization+of+Machine+Learning+Algorithms&btnG=) (2012). J Snoek, H Larochelle, and RP Adams. Advances in neural information. - -* [_A Tutorial on Bayesian Optimization for Machine Learning_](https://www.cs.toronto.edu/~rgrosse/courses/csc411_f18/tutorials/tut8_adams_slides.pdf) (2018). R Adams. - - * [_Gaussian Processes for Machine Learning_](http://www.gaussianprocess.org/gpml/) (2006). C E Rasmussen and C Williams. - -* [Other articles!](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q="Bayesian+Optimization"&btnG=) - - -## Cell segmenting revisited - -To demonstrate this approach to tuning models, let's return to the cell segmentation data from the [Getting Started](/start/resampling/) article on resampling: - - -```r -library(tidymodels) -library(modeldata) - -# Load data -data(cells) - -set.seed(2369) -tr_te_split <- initial_split(cells %>% select(-case), prop = 3/4) -cell_train <- training(tr_te_split) -cell_test <- testing(tr_te_split) - -set.seed(1697) -folds <- vfold_cv(cell_train, v = 10) -``` - -## The tuning scheme - -Since the predictors are highly correlated, we can used a recipe to convert the original predictors to principal component scores. There is also slight class imbalance in these data; about 64% of the data are poorly segmented. To mitigate this, the data will be down-sampled at the end of the pre-processing so that the number of poorly and well segmented cells occur with equal frequency. We can use a recipe for all this pre-processing, but the number of principal components will need to be _tuned_ so that we have enough (but not too many) representations of the data. - - -```r -library(themis) - -cell_pre_proc <- - recipe(class ~ ., data = cell_train) %>% - step_YeoJohnson(all_predictors()) %>% - step_normalize(all_predictors()) %>% - step_pca(all_predictors(), num_comp = tune()) %>% - step_downsample(class) -``` - -In this analysis, we will use a support vector machine to model the data. Let's use a radial basis function (RBF) kernel and tune its main parameter ($\sigma$). Additionally, the main SVM parameter, the cost value, also needs optimization. - - -```r -svm_mod <- - svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) %>% - set_engine("kernlab") -``` - -These two objects (the recipe and model) will be combined into a single object via the `workflow()` function from the [workflows](https://workflows.tidymodels.org/) package; this object will be used in the optimization process. - - -```r -svm_wflow <- - workflow() %>% - add_model(svm_mod) %>% - add_recipe(cell_pre_proc) -``` - -From this object, we can derive information about what parameters are slated to be tuned. A parameter set is derived by: - - -```r -svm_set <- extract_parameter_set_dials(svm_wflow) -svm_set -#> Collection of 3 parameters for tuning -#> -#> identifier type object -#> cost cost nparam[+] -#> rbf_sigma rbf_sigma nparam[+] -#> num_comp num_comp nparam[+] -``` - -The default range for the number of PCA components is rather small for this data set. A member of the parameter set can be modified using the `update()` function. Let's constrain the search to one to twenty components by updating the `num_comp` parameter. Additionally, the lower bound of this parameter is set to zero which specifies that the original predictor set should also be evaluated (i.e., with no PCA step at all): - - -```r -svm_set <- - svm_set %>% - update(num_comp = num_comp(c(0L, 20L))) -``` - -## Sequential tuning - -Bayesian optimization is a sequential method that uses a model to predict new candidate parameters for assessment. When scoring potential parameter value, the mean and variance of performance are predicted. The strategy used to define how these two statistical quantities are used is defined by an _acquisition function_. - -For example, one approach for scoring new candidates is to use a confidence bound. Suppose accuracy is being optimized. For a metric that we want to maximize, a lower confidence bound can be used. The multiplier on the standard error (denoted as `\(\kappa\)`) is a value that can be used to make trade-offs between **exploration** and **exploitation**. - - * **Exploration** means that the search will consider candidates in untested space. - - * **Exploitation** focuses in areas where the previous best results occurred. - -The variance predicted by the Bayesian model is mostly spatial variation; the value will be large for candidate values that are not close to values that have already been evaluated. If the standard error multiplier is high, the search process will be more likely to avoid areas without candidate values in the vicinity. - -We'll use another acquisition function, _expected improvement_, that determines which candidates are likely to be helpful relative to the current best results. This is the default acquisition function. More information on these functions can be found in the [package vignette for acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html). - - -```r -set.seed(12) -search_res <- - svm_wflow %>% - tune_bayes( - resamples = folds, - # To use non-default parameter ranges - param_info = svm_set, - # Generate five at semi-random to start - initial = 5, - iter = 50, - # How to measure performance? - metrics = metric_set(roc_auc), - control = control_bayes(no_improve = 30, verbose = TRUE) - ) -#> -#> ❯ Generating a set of 5 initial parameter results -#> ✓ Initialization complete -#> -#> Optimizing roc_auc using the expected improvement -#> -#> ── Iteration 1 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8761 (@iter 0) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.00386, rbf_sigma=0.00513, num_comp=19 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.879 (+/-0.0109) -#> -#> ── Iteration 2 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.879 (@iter 1) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.0338, rbf_sigma=0.00332, num_comp=13 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8736 (+/-0.0127) -#> -#> ── Iteration 3 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.879 (@iter 1) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.275, rbf_sigma=0.00304, num_comp=20 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8792 (+/-0.0118) -#> -#> ── Iteration 4 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8792 (@iter 3) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=9.56, rbf_sigma=0.00426, num_comp=20 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8869 (+/-0.00942) -#> -#> ── Iteration 5 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8869 (@iter 4) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=26, rbf_sigma=0.00617, num_comp=19 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8894 (+/-0.00914) -#> -#> ── Iteration 6 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8894 (@iter 5) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=28.5, rbf_sigma=0.0054, num_comp=2 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7961 (+/-0.00994) -#> -#> ── Iteration 7 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8894 (@iter 5) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=19.7, rbf_sigma=0.802, num_comp=19 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7641 (+/-0.00967) -#> -#> ── Iteration 8 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8894 (@iter 5) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=9.84, rbf_sigma=0.00434, num_comp=17 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8863 (+/-0.00992) -#> -#> ── Iteration 9 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8894 (@iter 5) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=12.8, rbf_sigma=0.0138, num_comp=20 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8821 (+/-0.00847) -#> -#> ── Iteration 10 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8894 (@iter 5) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=20.2, rbf_sigma=0.00842, num_comp=18 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8889 (+/-0.00942) -#> -#> ── Iteration 11 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8894 (@iter 5) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=19.5, rbf_sigma=0.0082, num_comp=15 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8963 (+/-0.00915) -#> -#> ── Iteration 12 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8963 (@iter 11) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=17.1, rbf_sigma=0.0096, num_comp=14 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8986 (+/-0.00935) -#> -#> ── Iteration 13 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=1.91, rbf_sigma=1.21e-10, num_comp=20 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.3458 (+/-0.115) -#> -#> ── Iteration 14 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=23.2, rbf_sigma=0.0127, num_comp=14 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8932 (+/-0.00991) -#> -#> ── Iteration 15 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=23.1, rbf_sigma=0.00885, num_comp=12 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8958 (+/-0.00939) -#> -#> ── Iteration 16 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=19.9, rbf_sigma=0.00783, num_comp=13 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8955 (+/-0.00977) -#> -#> ── Iteration 17 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=24.4, rbf_sigma=0.0241, num_comp=14 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8758 (+/-0.0127) -#> -#> ── Iteration 18 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=4.83, rbf_sigma=0.00892, num_comp=14 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8922 (+/-0.00973) -#> -#> ── Iteration 19 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=5.53, rbf_sigma=0.921, num_comp=0 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7764 (+/-0.0178) -#> -#> ── Iteration 20 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=22.9, rbf_sigma=0.00957, num_comp=15 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8939 (+/-0.00948) -#> -#> ── Iteration 21 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.00119, rbf_sigma=0.843, num_comp=9 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.3579 (+/-0.112) -#> -#> ── Iteration 22 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.00135, rbf_sigma=0.00161, num_comp=18 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.3477 (+/-0.114) -#> -#> ── Iteration 23 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.00883, rbf_sigma=0.0108, num_comp=16 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8773 (+/-0.0118) -#> -#> ── Iteration 24 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.0186, rbf_sigma=0.00653, num_comp=14 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8746 (+/-0.0124) -#> -#> ── Iteration 25 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.00127, rbf_sigma=0.0133, num_comp=19 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8813 (+/-0.0105) -#> -#> ── Iteration 26 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8986 (@iter 12) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=4.41, rbf_sigma=0.0208, num_comp=9 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.9 (+/-0.00937) -#> -#> ── Iteration 27 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9 (@iter 26) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=5.55, rbf_sigma=0.153, num_comp=4 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8824 (+/-0.0107) -#> -#> ── Iteration 28 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9 (@iter 26) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=6.12, rbf_sigma=0.0497, num_comp=0 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8723 (+/-0.0106) -#> -#> ── Iteration 29 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9 (@iter 26) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=2.69, rbf_sigma=0.115, num_comp=20 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8717 (+/-0.0096) -#> -#> ── Iteration 30 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9 (@iter 26) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.0037, rbf_sigma=0.00707, num_comp=20 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8812 (+/-0.0111) -#> -#> ── Iteration 31 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9 (@iter 26) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.00169, rbf_sigma=0.01, num_comp=5 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8758 (+/-0.0115) -#> -#> ── Iteration 32 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9 (@iter 26) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=27.3, rbf_sigma=0.185, num_comp=12 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.831 (+/-0.0112) -#> -#> ── Iteration 33 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9 (@iter 26) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=2.23, rbf_sigma=0.286, num_comp=13 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8524 (+/-0.00999) -#> -#> ── Iteration 34 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9 (@iter 26) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=7.56, rbf_sigma=0.0162, num_comp=9 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.9001 (+/-0.0097) -#> -#> ── Iteration 35 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=4.49, rbf_sigma=0.0377, num_comp=12 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8905 (+/-0.0102) -#> -#> ── Iteration 36 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=30.9, rbf_sigma=0.00156, num_comp=20 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8836 (+/-0.0098) -#> -#> ── Iteration 37 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=13.5, rbf_sigma=0.000277, num_comp=17 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8743 (+/-0.012) -#> -#> ── Iteration 38 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=5.38, rbf_sigma=0.000549, num_comp=19 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8794 (+/-0.0114) -#> -#> ── Iteration 39 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=27.9, rbf_sigma=3.41e-05, num_comp=13 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8727 (+/-0.013) -#> -#> ── Iteration 40 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=4.23, rbf_sigma=5.66e-05, num_comp=19 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8768 (+/-0.0111) -#> -#> ── Iteration 41 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=7.35, rbf_sigma=6.16e-05, num_comp=2 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7963 (+/-0.00995) -#> -#> ── Iteration 42 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=29.6, rbf_sigma=7.84e-05, num_comp=19 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.878 (+/-0.0118) -#> -#> ── Iteration 43 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=9.14, rbf_sigma=1.29e-05, num_comp=20 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8774 (+/-0.0115) -#> -#> ── Iteration 44 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=17.3, rbf_sigma=3.31e-05, num_comp=19 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8766 (+/-0.0119) -#> -#> ── Iteration 45 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=27.7, rbf_sigma=4.99e-06, num_comp=0 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8708 (+/-0.0125) -#> -#> ── Iteration 46 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=3.32, rbf_sigma=8.21e-06, num_comp=3 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8663 (+/-0.0131) -#> -#> ── Iteration 47 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=25.6, rbf_sigma=1.09e-05, num_comp=5 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8742 (+/-0.0114) -#> -#> ── Iteration 48 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.804, rbf_sigma=2.45e-06, num_comp=18 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.3479 (+/-0.114) -#> -#> ── Iteration 49 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=5.26, rbf_sigma=2.08e-05, num_comp=11 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8736 (+/-0.0124) -#> -#> ── Iteration 50 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.9001 (@iter 34) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i cost=0.00125, rbf_sigma=0.0301, num_comp=1 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7725 (+/-0.0106) -``` - -The resulting tibble is a stacked set of rows of the rsample object with an additional column for the iteration number: - - -```r -search_res -#> # Tuning results -#> # 10-fold cross-validation -#> # A tibble: 510 × 5 -#> splits id .metrics .notes .iter -#> -#> 1 Fold01 0 -#> 2 Fold02 0 -#> 3 Fold03 0 -#> 4 Fold04 0 -#> 5 Fold05 0 -#> 6 Fold06 0 -#> 7 Fold07 0 -#> 8 Fold08 0 -#> 9 Fold09 0 -#> 10 Fold10 0 -#> # … with 500 more rows -``` - -As with grid search, we can summarize the results over resamples: - - -```r -estimates <- - collect_metrics(search_res) %>% - arrange(.iter) - -estimates -#> # A tibble: 55 × 10 -#> cost rbf_sigma num_comp .metric .estimator mean n std_err .config -#> -#> 1 0.00207 1.56e-5 10 roc_auc binary 0.344 10 0.114 Prepro… -#> 2 0.348 4.43e-2 1 roc_auc binary 0.773 10 0.0106 Prepro… -#> 3 15.5 1.28e-7 20 roc_auc binary 0.346 10 0.115 Prepro… -#> 4 1.45 2.04e-3 15 roc_auc binary 0.876 10 0.0122 Prepro… -#> 5 0.0304 6.41e-9 5 roc_auc binary 0.344 10 0.114 Prepro… -#> 6 0.00386 5.13e-3 19 roc_auc binary 0.879 10 0.0109 Iter1 -#> 7 0.0338 3.32e-3 13 roc_auc binary 0.874 10 0.0127 Iter2 -#> 8 0.275 3.04e-3 20 roc_auc binary 0.879 10 0.0118 Iter3 -#> 9 9.56 4.26e-3 20 roc_auc binary 0.887 10 0.00942 Iter4 -#> 10 26.0 6.17e-3 19 roc_auc binary 0.889 10 0.00914 Iter5 -#> # … with 45 more rows, and 1 more variable: .iter -``` - - -The best performance of the initial set of candidate values was `AUC = 0.876 `. The best results were achieved at iteration 34 with a corresponding AUC value of 0.9. The five best results are: - - -```r -show_best(search_res, metric = "roc_auc") -#> # A tibble: 5 × 10 -#> cost rbf_sigma num_comp .metric .estimator mean n std_err .config .iter -#> -#> 1 7.56 0.0162 9 roc_auc binary 0.900 10 0.00970 Iter34 34 -#> 2 4.41 0.0208 9 roc_auc binary 0.900 10 0.00937 Iter26 26 -#> 3 17.1 0.00960 14 roc_auc binary 0.899 10 0.00935 Iter12 12 -#> 4 19.5 0.00820 15 roc_auc binary 0.896 10 0.00915 Iter11 11 -#> 5 23.1 0.00885 12 roc_auc binary 0.896 10 0.00939 Iter15 15 -``` - -A plot of the search iterations can be created via: - - -```r -autoplot(search_res, type = "performance") -``` - - - -There are many parameter combinations have roughly equivalent results. - -How did the parameters change over iterations? - - - -```r -autoplot(search_res, type = "parameters") + - labs(x = "Iterations", y = NULL) -``` - - - - - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> kernlab * 0.9-31 2022-06-09 [1] CRAN (R 4.2.0) -#> modeldata * 1.0.1 2022-09-06 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang * 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> themis * 1.0.0 2022-07-02 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` - diff --git a/content/learn/work/case-weights/index.Rmarkdown b/content/learn/work/case-weights/index.Rmarkdown deleted file mode 100644 index 4e25e506..00000000 --- a/content/learn/work/case-weights/index.Rmarkdown +++ /dev/null @@ -1,165 +0,0 @@ ---- -title: "Creating case weights based on time" -tags: [parsnip,tune,recipes,workflows] -categories: [model fitting] -type: learn-subsection -weight: 5 -description: | - Create models that use coefficients, extract them from fitted models, and visualize them. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -pkgs <- c("tidymodels") -``` - -```{r load, include=FALSE} -library(tidymodels) - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - -## Introduction - -`r req_pkgs(pkgs)` - -This article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others. - -## Example Data - -To demonstrate we will use the Chicago data from the modeldata package. - -```{r} -library(tidymodels) -data(Chicago) - -Chicago <- Chicago %>% - select(ridership, date, one_of(stations)) -``` - -From `?Chicago` - -> These data are from Kuhn and Johnson (2020) and contain an abbreviated training set for modeling the number of people (in thousands) who enter the Clark and Lake L station. - -> The date column corresponds to the current date. The columns with station names (Austin through California) are a sample of the columns used in the original analysis (for filesize reasons). These are 14 day lag variables (i.e. date - 14 days). There are columns related to weather and sports team schedules. - -For simplicity, we have limited our view to the date and station variables. - -## Creating weights - -This data set contains daily information from `r min(Chicago$date)` to `r max(Chicago$date)`. We will pretend that it is January 1st, 2016 and we want to predict the ridership for the remainder of 2016 using the date and station variables as predictors. Without any weighting, all the previous observations would have the same influence on the model. This may not be ideal since some observations appear a long time ago and not be as representative of the future as more recent observations. - -We could just use recent observations to fit the model, ensuring that the training data stays as close to the testing data as possible. While a tempting idea, it would throw out a lot of informative data. Instead let us assign a weight to each observation, related to how long ago the observation was taken. This way we are not completely throwing away any observation; we are only giving less weight to data farther in the past. - -We need to decide on a way to calculate the case weights. The main thing constraint is that the weight cannot be negative, and it would be nice if today was weighted as 1. So we need a function that is 1 when `x = 0` and decreasing otherwise. There are many kinds of functions like that, and we will be using this exponential decay function - -$$ weight = base ^ x $$ - -where `base` is some constant and `x` is the number of days. To make sure that we select a reasonable `base`, we need to do some manual testing, starting with looking at how old the oldest observation is. - -```{r} -difftime("2016-01-01", min(Chicago$date)) -``` - -Using this information we can visualize the weight curve, to see if we like the value of `base`. - -```{r} -tibble_days <- tibble(days = 0:5457) - -tibble_days %>% - ggplot(aes(days)) + - geom_function(fun = ~ 0.99 ^ .x) -``` - -setting `base` to 0.99 appears to be down weighted too much. Any observation more than a year old would have no influence. - -Let us try a few more values to find - -```{r} -map_dfr( - c(0.99, 0.999, 0.9999), - ~ tibble_days %>% mutate(base = factor(.x), value = .x ^ days) -) %>% - ggplot(aes(days, value, group = base, color = base)) + - geom_line() -``` - -From this, we could pick something around 0.999 since it gives a better balance. Let's create a small function to help us encode this weight. - -```{r} -weights_from_dates <- function(x, ref) { - if_else( - condition = x >= ref, - true = 1, # <- Notice that I'm setting any future weight to 1. - false = 0.999 ^ as.numeric(difftime(ref, x, units = "days")) - ) -} -``` - -We then modify `Chicago` to add a weight column, explicitly making it an importance weight with `importance_weight()`. - -```{r} -Chicago <- Chicago %>% - mutate(weight = weights_from_dates(date, "2016-01-01"), - weight = importance_weights(weight)) -``` - -This approach to creating importance weights from dates is not limited to cases where we have daily observations. You are free to create similar weights if you have gaps or repeated observations within the same day. Likewise, you don't need to use days as the unit. Seconds, weeks, or years could be used as well. - -## Modeling - -We start by splitting up our data into a training and testing set based on the day `"2016-01-01"`. We added weights to the data set before splitting it so each set has weights. - -```{r} -Chicago_train <- Chicago %>% filter(date < "2016-01-01") -Chicago_test <- Chicago %>% filter(date >= "2016-01-01") -``` - -Next, we are going to create a recipe. The weights won't have any influence on the preprocessing since none of these operations are supervised and we are using importance weights. - -```{r} -base_recipe <- - recipe(ridership ~ ., data = Chicago_train) %>% - # Create date features - step_date(date) %>% - step_holiday(date, keep_original_cols = FALSE) %>% - # Remove any columns with a single unique value - step_zv(all_predictors()) %>% - # Normalize all the numerical features - step_normalize(all_numeric_predictors()) %>% - # Perform PCA to reduce the correlation bet the stations - step_pca(all_numeric_predictors(), threshold = 0.95) -``` - -Next we need to build the rest of the workflow. We use a linear regression specification - -```{r} -lm_spec <- - linear_reg() %>% - set_engine("lm") -``` - -and we add these together in the workflow. To activate the case weights, we use the `add_case_weights()` function to specify the name of the case weights being used. - -```{r} -lm_wflow <- - workflow() %>% - add_case_weights(weight) %>% - add_recipe(base_recipe) %>% - add_model(lm_spec) - -lm_wflow -``` - -With all that done we can fit the workflow with the usual syntax: - -```{r} -lm_fit <- fit(lm_wflow, data = Chicago_train) -lm_fit -``` - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/learn/work/case-weights/index.markdown b/content/learn/work/case-weights/index.markdown deleted file mode 100644 index b5d10b47..00000000 --- a/content/learn/work/case-weights/index.markdown +++ /dev/null @@ -1,260 +0,0 @@ ---- -title: "Creating case weights based on time" -tags: [parsnip,tune,recipes,workflows] -categories: [model fitting] -type: learn-subsection -weight: 5 -description: | - Create models that use coefficients, extract them from fitted models, and visualize them. ---- - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: tidymodels. - -This article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others. - -## Example Data - -To demonstrate we will use the Chicago data from the modeldata package. - - -```r -library(tidymodels) -data(Chicago) - -Chicago <- Chicago %>% - select(ridership, date, one_of(stations)) -``` - -From `?Chicago` - -> These data are from Kuhn and Johnson (2020) and contain an abbreviated training set for modeling the number of people (in thousands) who enter the Clark and Lake L station. - -> The date column corresponds to the current date. The columns with station names (Austin through California) are a sample of the columns used in the original analysis (for filesize reasons). These are 14 day lag variables (i.e. date - 14 days). There are columns related to weather and sports team schedules. - -For simplicity, we have limited our view to the date and station variables. - -## Creating weights - -This data set contains daily information from 2001-01-22 to 2016-08-28. We will pretend that it is January 1st, 2016 and we want to predict the ridership for the remainder of 2016 using the date and station variables as predictors. Without any weighting, all the previous observations would have the same influence on the model. This may not be ideal since some observations appear a long time ago and not be as representative of the future as more recent observations. - -We could just use recent observations to fit the model, ensuring that the training data stays as close to the testing data as possible. While a tempting idea, it would throw out a lot of informative data. Instead let us assign a weight to each observation, related to how long ago the observation was taken. This way we are not completely throwing away any observation; we are only giving less weight to data farther in the past. - -We need to decide on a way to calculate the case weights. The main thing constraint is that the weight cannot be negative, and it would be nice if today was weighted as 1. So we need a function that is 1 when `x = 0` and decreasing otherwise. There are many kinds of functions like that, and we will be using this exponential decay function - -$$ weight = base ^ x $$ - -where `base` is some constant and `x` is the number of days. To make sure that we select a reasonable `base`, we need to do some manual testing, starting with looking at how old the oldest observation is. - - -```r -difftime("2016-01-01", min(Chicago$date)) -#> Time difference of 5457 days -``` - -Using this information we can visualize the weight curve, to see if we like the value of `base`. - - -```r -tibble_days <- tibble(days = 0:5457) - -tibble_days %>% - ggplot(aes(days)) + - geom_function(fun = ~ 0.99 ^ .x) -``` - - - -setting `base` to 0.99 appears to be down weighted too much. Any observation more than a year old would have no influence. - -Let us try a few more values to find - - -```r -map_dfr( - c(0.99, 0.999, 0.9999), - ~ tibble_days %>% mutate(base = factor(.x), value = .x ^ days) -) %>% - ggplot(aes(days, value, group = base, color = base)) + - geom_line() -``` - - - -From this, we could pick something around 0.999 since it gives a better balance. Let's create a small function to help us encode this weight. - - -```r -weights_from_dates <- function(x, ref) { - if_else( - condition = x >= ref, - true = 1, # <- Notice that I'm setting any future weight to 1. - false = 0.999 ^ as.numeric(difftime(ref, x, units = "days")) - ) -} -``` - -We then modify `Chicago` to add a weight column, explicitly making it an importance weight with `importance_weight()`. - - -```r -Chicago <- Chicago %>% - mutate(weight = weights_from_dates(date, "2016-01-01"), - weight = importance_weights(weight)) -``` - -This approach to creating importance weights from dates is not limited to cases where we have daily observations. You are free to create similar weights if you have gaps or repeated observations within the same day. Likewise, you don't need to use days as the unit. Seconds, weeks, or years could be used as well. - -## Modeling - -We start by splitting up our data into a training and testing set based on the day `"2016-01-01"`. We added weights to the data set before splitting it so each set has weights. - - -```r -Chicago_train <- Chicago %>% filter(date < "2016-01-01") -Chicago_test <- Chicago %>% filter(date >= "2016-01-01") -``` - -Next, we are going to create a recipe. The weights won't have any influence on the preprocessing since none of these operations are supervised and we are using importance weights. - - -```r -base_recipe <- - recipe(ridership ~ ., data = Chicago_train) %>% - # Create date features - step_date(date) %>% - step_holiday(date, keep_original_cols = FALSE) %>% - # Remove any columns with a single unique value - step_zv(all_predictors()) %>% - # Normalize all the numerical features - step_normalize(all_numeric_predictors()) %>% - # Perform PCA to reduce the correlation bet the stations - step_pca(all_numeric_predictors(), threshold = 0.95) -``` - -Next we need to build the rest of the workflow. We use a linear regression specification - - -```r -lm_spec <- - linear_reg() %>% - set_engine("lm") -``` - -and we add these together in the workflow. To activate the case weights, we use the `add_case_weights()` function to specify the name of the case weights being used. - - -```r -lm_wflow <- - workflow() %>% - add_case_weights(weight) %>% - add_recipe(base_recipe) %>% - add_model(lm_spec) - -lm_wflow -#> ══ Workflow ══════════════════════════════════════════════════════════ -#> Preprocessor: Recipe -#> Model: linear_reg() -#> -#> ── Preprocessor ────────────────────────────────────────────────────── -#> 5 Recipe Steps -#> -#> • step_date() -#> • step_holiday() -#> • step_zv() -#> • step_normalize() -#> • step_pca() -#> -#> ── Case Weights ────────────────────────────────────────────────────── -#> weight -#> -#> ── Model ───────────────────────────────────────────────────────────── -#> Linear Regression Model Specification (regression) -#> -#> Computational engine: lm -``` - -With all that done we can fit the workflow with the usual syntax: - - -```r -lm_fit <- fit(lm_wflow, data = Chicago_train) -lm_fit -#> ══ Workflow [trained] ════════════════════════════════════════════════ -#> Preprocessor: Recipe -#> Model: linear_reg() -#> -#> ── Preprocessor ────────────────────────────────────────────────────── -#> 5 Recipe Steps -#> -#> • step_date() -#> • step_holiday() -#> • step_zv() -#> • step_normalize() -#> • step_pca() -#> -#> ── Case Weights ────────────────────────────────────────────────────── -#> weight -#> -#> ── Model ───────────────────────────────────────────────────────────── -#> -#> Call: -#> stats::lm(formula = ..y ~ ., data = data, weights = weights) -#> -#> Coefficients: -#> (Intercept) date_dowMon date_dowTue date_dowWed date_dowThu -#> 1.76260 13.30765 14.68903 14.62018 14.38231 -#> date_dowFri date_dowSat date_monthFeb date_monthMar date_monthApr -#> 13.69543 1.22823 0.36434 1.34823 1.40990 -#> date_monthMay date_monthJun date_monthJul date_monthAug date_monthSep -#> 1.18819 2.59830 2.21972 2.40700 1.93206 -#> date_monthOct date_monthNov date_monthDec PC1 PC2 -#> 2.65555 0.90901 -0.00475 0.07301 -1.59102 -#> PC3 PC4 PC5 -#> 0.60839 -0.20531 -0.69601 -``` - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/learn/work/multi-scale/figs/unnamed-chunk-10-1.svg b/content/learn/work/multi-scale/figs/unnamed-chunk-10-1.svg deleted file mode 100644 index 1494c987..00000000 --- a/content/learn/work/multi-scale/figs/unnamed-chunk-10-1.svg +++ /dev/null @@ -1,5420 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -42 -° -N -43 -° -N -44 -° -N -45 -° -N -46 -° -N - - - - - - - - - - -80 -° -W -78 -° -W -76 -° -W -74 -° -W -72 -° -W - - -2000 -4000 -6000 -8000 -10000 -RMSE - - - - - - - - - - - - diff --git a/content/learn/work/multi-scale/figs/unnamed-chunk-11-1.svg b/content/learn/work/multi-scale/figs/unnamed-chunk-11-1.svg deleted file mode 100644 index 7e029cf2..00000000 --- a/content/learn/work/multi-scale/figs/unnamed-chunk-11-1.svg +++ /dev/null @@ -1,89 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -2000 -3000 -4000 -5000 -25000 -50000 -75000 -100000 -cellsize -value -name - - - - -mae -rmse - - diff --git a/content/learn/work/multi-scale/figs/unnamed-chunk-12-1.svg b/content/learn/work/multi-scale/figs/unnamed-chunk-12-1.svg deleted file mode 100644 index 1a534d7e..00000000 --- a/content/learn/work/multi-scale/figs/unnamed-chunk-12-1.svg +++ /dev/null @@ -1,86 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -6000 -7000 -8000 -25000 -50000 -75000 -100000 -cellsize -mean -.metric - - - - -mae -rmse - - diff --git a/content/learn/work/multi-scale/figs/unnamed-chunk-4-1.svg b/content/learn/work/multi-scale/figs/unnamed-chunk-4-1.svg deleted file mode 100644 index 47603003..00000000 --- a/content/learn/work/multi-scale/figs/unnamed-chunk-4-1.svg +++ /dev/null @@ -1,5584 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -41 -° -N -42 -° -N -43 -° -N -44 -° -N -45 -° -N -46 -° -N - - - - - - - - - - - -80 -° -W -78 -° -W -76 -° -W -74 -° -W -72 -° -W - -Fold - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Fold01 -Fold02 -Fold03 -Fold04 -Fold05 -Fold06 -Fold07 -Fold08 -Fold09 -Fold10 -Fold11 -Fold12 -Fold13 -Fold14 -Fold15 -Fold16 -Fold17 -Fold18 -Fold19 -Fold20 -Fold21 -Fold22 -Fold23 -Fold24 -Fold25 -Fold26 -Fold27 -Fold28 -Fold29 -Fold30 -Fold31 -Fold32 -Fold33 -Fold34 -Fold35 - - diff --git a/content/learn/work/multi-scale/figs/unnamed-chunk-5-1.svg b/content/learn/work/multi-scale/figs/unnamed-chunk-5-1.svg deleted file mode 100644 index e2957aab..00000000 --- a/content/learn/work/multi-scale/figs/unnamed-chunk-5-1.svg +++ /dev/null @@ -1,5564 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -41 -° -N -42 -° -N -43 -° -N -44 -° -N -45 -° -N -46 -° -N -47 -° -N - - - - - - - - - - - - -80 -° -W -78 -° -W -76 -° -W -74 -° -W -72 -° -W - -Fold - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Fold01 -Fold02 -Fold03 -Fold04 -Fold05 -Fold06 -Fold07 -Fold08 -Fold09 -Fold10 -Fold11 -Fold12 -Fold13 -Fold14 -Fold15 -Fold16 -Fold17 -Fold18 -Fold19 -Fold20 -Fold21 -Fold22 -Fold23 -Fold24 -Fold25 -Fold26 -Fold27 -Fold28 -Fold29 -Fold30 - - diff --git a/content/learn/work/multi-scale/figs/unnamed-chunk-6-1.svg b/content/learn/work/multi-scale/figs/unnamed-chunk-6-1.svg deleted file mode 100644 index a9db9b44..00000000 --- a/content/learn/work/multi-scale/figs/unnamed-chunk-6-1.svg +++ /dev/null @@ -1,5564 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -41 -° -N -42 -° -N -43 -° -N -44 -° -N -45 -° -N -46 -° -N -47 -° -N - - - - - - - - - - - - -80 -° -W -78 -° -W -76 -° -W -74 -° -W -72 -° -W - -Fold - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Fold01 -Fold02 -Fold03 -Fold04 -Fold05 -Fold06 -Fold07 -Fold08 -Fold09 -Fold10 -Fold11 -Fold12 -Fold13 -Fold14 -Fold15 -Fold16 -Fold17 -Fold18 -Fold19 -Fold20 -Fold21 -Fold22 -Fold23 -Fold24 -Fold25 -Fold26 -Fold27 -Fold28 -Fold29 -Fold30 - - diff --git a/content/learn/work/multi-scale/index.Rmarkdown b/content/learn/work/multi-scale/index.Rmarkdown deleted file mode 100644 index 0b12cbd2..00000000 --- a/content/learn/work/multi-scale/index.Rmarkdown +++ /dev/null @@ -1,253 +0,0 @@ ---- -title: "Multi-scale model assessment with spatialsample" -tags: [spatialsample, rsample, parsnip] -categories: [tuning] -type: learn-subsection -weight: 5 -description: | - Assess how accurate a model is when aggregating predictions to different spatial scales. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -library(tidymodels) -library(spatialsample) -library(sf) -library(dplyr) -library(ggplot2) - -pkgs <- c("tidymodels", "spatialsample") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - -## Introduction - -`r req_pkgs(pkgs)` - -Modeling spatially structured data is complicated. In addition to the usual difficulty of statistical modeling, models of spatially structured data may have spatial structure in their errors, with different regions being more or less well-described by a given model. This also means that it can be hard to tell how well your model performs when its predictions are aggregated to different scales, which is common when models fit to data from point measurements (for instance, the sale prices of individual homes) are used to try and estimate quantities over an entire area (the average value of all homes in a city or state). If model accuracy is only investigated at individual aggregation scales, such as when accuracy is only assessed for the original point measurements or across the entire study area as a whole, then local differences in accuracy might be "smoothed out" accidentally resulting in an inaccurate picture of model performance. - -For this reason, researchers (most notably, [Riemann et al. (2010)](https://www.nrs.fs.fed.us/pubs/jrnl/2010/nrs_2010_riemann_001.pdf)) have suggested assessing models at multiple scales of spatial aggregation to ensure cross-scale differences in model accuracy are identified and reported. This is not the same thing as [tuning a model](https://www.tidymodels.org/start/tuning/), where we're looking to select the best hyperparameters for our final model fit; instead, we want to assess how that final model performs when its predictions are aggregated to multiple scales. This post walks through how to do that using the [spatialsample](https://spatialsample.tidymodels.org/) package. - -## Multi-scale Assessment - -Because Riemann et al. were working with data from the US Forest Inventory and Analysis (FIA) program, we're going to do the same. However, because our main goal is to show how spatialsample can support this type of analysis, we won't spend a ton of time worrying about any of the quirks of FIA data or on feature engineering. Instead, we're going to use a simple linear model to see if we can predict how much aboveground biomass ("AGB"; all the non-root woody bits of trees) there is in a forest based on how many trees there are. We'll use all the FIA field data from New York State, USA. - -Because we're mostly interested in assessing our models, let's not focus on how exactly to download and wrangle the FIA data. If you're curious, the code is in a hidden chunk here: - -
    -Pre-processing code - -```{r, eval=FALSE} -library(dplyr) - -# Download the FIA database for New York over the internet, -# and unzip it into our local directory -# -# This updates annually, which means that this post likely won't -# generate the exact same results after 2022 -httr::GET( - "https://apps.fs.usda.gov/fia/datamart/Databases/SQLite_FIADB_NY.zip", - httr::write_disk("SQLite_FIADB_NY.zip", TRUE) -) - -unzip("SQLite_FIADB_NY.zip") - -# We're going to work with the database through dplyr's database connections -# -# But first, we need to create a DBI connection to the database and -# load out tables: -con <- DBI::dbConnect(RSQLite::SQLite(), dbname = "FIADB_NY.db") -trees <- tbl(con, "TREE") - -plots <- tbl(con, "PLOT") - -# The FIA database has every measurement ever collected by the program; -# we'll filter to only the most recent survey for each of the plots. -# -# Plots are measured on a rolling 7 year basis, so we'll also cut out any -# plots which might not be remeasured anymore with a call to filter() -plots <- plots %>% - group_by(PLOT) %>% - filter(INVYR == max(INVYR, na.rm = TRUE)) %>% - ungroup() %>% - filter(INVYR > 2009) %>% - collect() - -copy_to(con, plots, "newest_plots", TRUE) -newest_plots <- tbl(con, "newest_plots") - -# Now we'll use a filtering join to select only trees measured in the most -# recent sample at each plot -# -# We'll also count how many trees were at each plot, -# sum up their AGB, -# and save out a few other useful columns like latitude and longitude -plot_measurements <- trees %>% - right_join(newest_plots, by = c("INVYR", "PLOT")) %>% - group_by(PLOT) %>% - summarise( - yr = mean(INVYR, na.rm = TRUE), - plot = mean(PLOT, na.rm = TRUE), - lat = mean(LAT, na.rm = TRUE), - long = mean(LON, na.rm = TRUE), - n_trees = n(), - agb = sum(DRYBIO_AG, na.rm = TRUE) - ) %>% - collect() %>% - mutate( - # Because of how we joined, `n_trees` is always at least 1 -- - # even if there are 0 trees - n_trees = ifelse(is.na(agb) & n_trees == 1, 0, n_trees), - agb = ifelse(is.na(agb), 0, agb) - ) - -DBI::dbDisconnect(con) - -readr::write_csv(plot_measurements, "plots.csv") -``` - -
    - -With that pre-processing done, it's time to load our data and turn it into an sf object. We're going to reproject our data to use a coordinate reference system that the US government tends to use for national data products, like the FIA: - -```{r message=FALSE} -library(sf) - -invisible(sf_proj_network(TRUE)) - -plot_measurements <- - readr::read_csv("https://www.tidymodels.org/learn/work/multi-scale/plots.csv") %>% - st_as_sf(coords = c("long", "lat"), crs = 4326) %>% - st_transform(5070) -``` - -This is what we're going to resample. We want to assess our model's performance at multiple scales, following the approach in Riemann et al. That means we need to do the following: - -1. Block our study area using multiple sets of regular hexagons of different sizes, and assign our data to the hexagon it falls into within each set. -2. Perform leave-one-block-out cross-validation with each of those sets, fitting our model to `n - 1` of the `n` hexagons we've created and assessing it on the hold-out hexagon. -3. Calculate model accuracy for each size based on the aggregated predictions for each of those held-out hexes. - -So to get started, we need to block our study area. We can do this using the `spatial_block_cv()` function from spatialsample. We'll generate ten different sets of hexagon tiles, using `cellsize` arguments of between 10,000 and 100,000 meters. The code to do that, and to store all of our resamples in a single tibble, looks like this: - -```{r, message=FALSE, warning=FALSE} -set.seed(123) -library(dplyr) -library(spatialsample) -cellsize <- seq(10, 100, 10) * 1000 - -create_resample <- function(cellsize) { - spatial_block_cv( - plot_measurements, - v = Inf, - cellsize = cellsize, - square = FALSE - ) -} - -riemann_resamples <- tibble( - cellsize = cellsize, - resamples = purrr::map(cellsize, create_resample) -) -``` - -Two things to highlight about this code: - -+ `cellsize` is in meters because our coordinate reference system is in meters. This argument represents the length of the [apothem](https://en.wikipedia.org/wiki/Apothem), from the center of each polygon to the middle of the side. -+ `v` is `Inf` because we want to perform leave-one-block-out cross-validation, but we don't know how many blocks there will be before they're created. This is the supported way to do leave-one-X-out cross-validation in spatialsample > 0.2.0 (another option is to set `v = NULL`). - -If we want, we can visualize a few of our resamples, to get a sense of what our tiling looks like: - -```{r} -autoplot(riemann_resamples$resamples[[9]]) -``` - -```{r} -autoplot(riemann_resamples$resamples[[10]]) -``` - -And that's step 1 of the process completed! Now we need to move on to step 2, and actually fit models to each of these resamples. As a heads-up, this is a _lot_ of models, and so is going to take a while: - -```{r} -riemann_resamples$resamples %>% purrr::map_dbl(nrow) %>% sum() -``` - -Linear regression was invented around 1805, long before the Analytical Engine was a twinkle in Babbage's eye. Whenever you get frustrated at how long it takes to fit many models, it's nice to take a step back and recognize that we're asking our poor, overworked computers to fit roughly as many models as were used in the first ~100 years of the technique's life. - -Now let's load the rest of the tidymodels packages, then use them to define a workflow (from the workflows package), specifying the formula and model that we want to fit to each resample: - -```{r message=FALSE} -library(tidymodels) - -lm_workflow <- workflow(agb ~ n_trees, linear_reg()) -``` - -Next, we'll actually apply that workflow a few thousand times! Now as we said at the start, we aren't looking to tune our models using these resamples. Instead, we're looking to see how well our point predictions do at estimating AGB across larger areas. As such, we don't really care about calculating model metrics for each hexagon, and we'll set our code to only calculate a single metric (root-mean-squared error, or RMSE) to save a little bit of time. We'll also use the `control_resamples()` function with `save_pred = TRUE` to make sure we keep the predictions we're making across each resample. We can add these predictions as a new column to our resamples using the following: - -```{r message=FALSE} -riemann_resamples <- riemann_resamples %>% - mutate( - resampled_outputs = purrr::map( - resamples, - fit_resamples, - object = lm_workflow, - metrics = metric_set(rmse), - control = control_resamples(save_pred = TRUE) - ) - ) -``` - -The `riemann_resamples` object now includes both our original resamples as well as the predictions generated from each run of the model. We can use the following code to "unnest" our predictions and estimate both the average "true" AGB and our average prediction at each hexagon: - -```{r} -riemann_metrics <- riemann_resamples %>% - select(cellsize, resampled_outputs) %>% - unnest(resampled_outputs) %>% - transmute( - cellsize = cellsize, - mean_agb = purrr::map_dbl(.predictions, function(x) mean(x$agb)), - mean_pred = purrr::map_dbl(.predictions, function(x) mean(x$.pred)) - ) - -head(riemann_metrics) -``` - -Now that we've got our "true" and estimated AGB for each hexagon, all that's left is for us to calculate our model accuracy metrics for each aggregation scale we investigated. We can use functions from yardstick to quickly calculate our root-mean-squared error (RMSE) and mean absolute error (MAE) for each cell size we investigated: - -```{r} -riemann_metrics <- riemann_metrics %>% - group_by(cellsize) %>% - summarize(rmse = rmse_vec(mean_agb, mean_pred), - mae = mae_vec(mean_agb, mean_pred)) -``` - -And just like that, we've got a multi-scale assessment of our model's accuracy! To repeat a point from earlier, we aren't using this as a way to tune our model. Instead, we can use our results to investigate and report how well our model does at different levels of aggregation. For instance, while it appears that both RMSE and MAE improve as we aggregate our predictions to larger and larger hexagons, some scales have a much larger difference between the two metrics than others. This hints that, at those specific scales, a few individual hexagons are large outliers driving RMSE higher, which might indicate that our model isn't performing well in a few specific locations: - -```{r} -library(ggplot2) - -riemann_metrics %>% - pivot_longer(-cellsize) %>% - ggplot(aes(cellsize, value, color = name)) + - geom_line() + - geom_point() + - theme_minimal() -``` - -## References - -Riemann, R., Wilston, B. T., Lister, A., and Parks, S. 2010. An effective assessment protocol for continuous geospatial datasets of forest characteristics using USFS Forest Inventory and Analysis (FIA) data. Remote Sensing of Environment, 114, pp. 2337-2353. doi: 10.1016/j.rse.2010.05.010. - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/learn/work/multi-scale/index.markdown b/content/learn/work/multi-scale/index.markdown deleted file mode 100644 index c3e7ba02..00000000 --- a/content/learn/work/multi-scale/index.markdown +++ /dev/null @@ -1,298 +0,0 @@ ---- -title: "Multi-scale model assessment with spatialsample" -tags: [spatialsample, rsample, parsnip] -categories: [tuning] -type: learn-subsection -weight: 5 -description: | - Assess how accurate a model is when aggregating predictions to different spatial scales. ---- - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: spatialsample and tidymodels. - -Modeling spatially structured data is complicated. In addition to the usual difficulty of statistical modeling, models of spatially structured data may have spatial structure in their errors, with different regions being more or less well-described by a given model. This also means that it can be hard to tell how well your model performs when its predictions are aggregated to different scales, which is common when models fit to data from point measurements (for instance, the sale prices of individual homes) are used to try and estimate quantities over an entire area (the average value of all homes in a city or state). If model accuracy is only investigated at individual aggregation scales, such as when accuracy is only assessed for the original point measurements or across the entire study area as a whole, then local differences in accuracy might be "smoothed out" accidentally resulting in an inaccurate picture of model performance. - -For this reason, researchers (most notably, [Riemann et al. (2010)](https://www.nrs.fs.fed.us/pubs/jrnl/2010/nrs_2010_riemann_001.pdf)) have suggested assessing models at multiple scales of spatial aggregation to ensure cross-scale differences in model accuracy are identified and reported. This is not the same thing as [tuning a model](https://www.tidymodels.org/start/tuning/), where we're looking to select the best hyperparameters for our final model fit; instead, we want to assess how that final model performs when its predictions are aggregated to multiple scales. This post walks through how to do that using the [spatialsample](https://spatialsample.tidymodels.org/) package. - -## Multi-scale Assessment - -Because Riemann et al. were working with data from the US Forest Inventory and Analysis (FIA) program, we're going to do the same. However, because our main goal is to show how spatialsample can support this type of analysis, we won't spend a ton of time worrying about any of the quirks of FIA data or on feature engineering. Instead, we're going to use a simple linear model to see if we can predict how much aboveground biomass ("AGB"; all the non-root woody bits of trees) there is in a forest based on how many trees there are. We'll use all the FIA field data from New York State, USA. - -Because we're mostly interested in assessing our models, let's not focus on how exactly to download and wrangle the FIA data. If you're curious, the code is in a hidden chunk here: - -
    -Pre-processing code - - -```r -library(dplyr) - -# Download the FIA database for New York over the internet, -# and unzip it into our local directory -# -# This updates annually, which means that this post likely won't -# generate the exact same results after 2022 -httr::GET( - "https://apps.fs.usda.gov/fia/datamart/Databases/SQLite_FIADB_NY.zip", - httr::write_disk("SQLite_FIADB_NY.zip", TRUE) -) - -unzip("SQLite_FIADB_NY.zip") - -# We're going to work with the database through dplyr's database connections -# -# But first, we need to create a DBI connection to the database and -# load out tables: -con <- DBI::dbConnect(RSQLite::SQLite(), dbname = "FIADB_NY.db") -trees <- tbl(con, "TREE") - -plots <- tbl(con, "PLOT") - -# The FIA database has every measurement ever collected by the program; -# we'll filter to only the most recent survey for each of the plots. -# -# Plots are measured on a rolling 7 year basis, so we'll also cut out any -# plots which might not be remeasured anymore with a call to filter() -plots <- plots %>% - group_by(PLOT) %>% - filter(INVYR == max(INVYR, na.rm = TRUE)) %>% - ungroup() %>% - filter(INVYR > 2009) %>% - collect() - -copy_to(con, plots, "newest_plots", TRUE) -newest_plots <- tbl(con, "newest_plots") - -# Now we'll use a filtering join to select only trees measured in the most -# recent sample at each plot -# -# We'll also count how many trees were at each plot, -# sum up their AGB, -# and save out a few other useful columns like latitude and longitude -plot_measurements <- trees %>% - right_join(newest_plots, by = c("INVYR", "PLOT")) %>% - group_by(PLOT) %>% - summarise( - yr = mean(INVYR, na.rm = TRUE), - plot = mean(PLOT, na.rm = TRUE), - lat = mean(LAT, na.rm = TRUE), - long = mean(LON, na.rm = TRUE), - n_trees = n(), - agb = sum(DRYBIO_AG, na.rm = TRUE) - ) %>% - collect() %>% - mutate( - # Because of how we joined, `n_trees` is always at least 1 -- - # even if there are 0 trees - n_trees = ifelse(is.na(agb) & n_trees == 1, 0, n_trees), - agb = ifelse(is.na(agb), 0, agb) - ) - -DBI::dbDisconnect(con) - -readr::write_csv(plot_measurements, "plots.csv") -``` - -
    - -With that pre-processing done, it's time to load our data and turn it into an sf object. We're going to reproject our data to use a coordinate reference system that the US government tends to use for national data products, like the FIA: - - -```r -library(sf) - -invisible(sf_proj_network(TRUE)) - -plot_measurements <- - readr::read_csv("https://www.tidymodels.org/learn/work/multi-scale/plots.csv") %>% - st_as_sf(coords = c("long", "lat"), crs = 4326) %>% - st_transform(5070) -``` - -This is what we're going to resample. We want to assess our model's performance at multiple scales, following the approach in Riemann et al. That means we need to do the following: - -1. Block our study area using multiple sets of regular hexagons of different sizes, and assign our data to the hexagon it falls into within each set. -2. Perform leave-one-block-out cross-validation with each of those sets, fitting our model to `n - 1` of the `n` hexagons we've created and assessing it on the hold-out hexagon. -3. Calculate model accuracy for each size based on the aggregated predictions for each of those held-out hexes. - -So to get started, we need to block our study area. We can do this using the `spatial_block_cv()` function from spatialsample. We'll generate ten different sets of hexagon tiles, using `cellsize` arguments of between 10,000 and 100,000 meters. The code to do that, and to store all of our resamples in a single tibble, looks like this: - - -```r -set.seed(123) -library(dplyr) -library(spatialsample) -cellsize <- seq(10, 100, 10) * 1000 - -create_resample <- function(cellsize) { - spatial_block_cv( - plot_measurements, - v = Inf, - cellsize = cellsize, - square = FALSE - ) -} - -riemann_resamples <- tibble( - cellsize = cellsize, - resamples = purrr::map(cellsize, create_resample) -) -``` - -Two things to highlight about this code: - -+ `cellsize` is in meters because our coordinate reference system is in meters. This argument represents the length of the [apothem](https://en.wikipedia.org/wiki/Apothem), from the center of each polygon to the middle of the side. -+ `v` is `Inf` because we want to perform leave-one-block-out cross-validation, but we don't know how many blocks there will be before they're created. This is the supported way to do leave-one-X-out cross-validation in spatialsample > 0.2.0 (another option is to set `v = NULL`). - -If we want, we can visualize a few of our resamples, to get a sense of what our tiling looks like: - - -```r -autoplot(riemann_resamples$resamples[[9]]) -``` - - - - -```r -autoplot(riemann_resamples$resamples[[10]]) -``` - - - -And that's step 1 of the process completed! Now we need to move on to step 2, and actually fit models to each of these resamples. As a heads-up, this is a _lot_ of models, and so is going to take a while: - - -```r -riemann_resamples$resamples %>% purrr::map_dbl(nrow) %>% sum() -#> [1] 2600 -``` - -Linear regression was invented around 1805, long before the Analytical Engine was a twinkle in Babbage's eye. Whenever you get frustrated at how long it takes to fit many models, it's nice to take a step back and recognize that we're asking our poor, overworked computers to fit roughly as many models as were used in the first ~100 years of the technique's life. - -Now let's load the rest of the tidymodels packages, then use them to define a workflow (from the workflows package), specifying the formula and model that we want to fit to each resample: - - -```r -library(tidymodels) - -lm_workflow <- workflow(agb ~ n_trees, linear_reg()) -``` - -Next, we'll actually apply that workflow a few thousand times! Now as we said at the start, we aren't looking to tune our models using these resamples. Instead, we're looking to see how well our point predictions do at estimating AGB across larger areas. As such, we don't really care about calculating model metrics for each hexagon, and we'll set our code to only calculate a single metric (root-mean-squared error, or RMSE) to save a little bit of time. We'll also use the `control_resamples()` function with `save_pred = TRUE` to make sure we keep the predictions we're making across each resample. We can add these predictions as a new column to our resamples using the following: - - -```r -riemann_resamples <- riemann_resamples %>% - mutate( - resampled_outputs = purrr::map( - resamples, - fit_resamples, - object = lm_workflow, - metrics = metric_set(rmse), - control = control_resamples(save_pred = TRUE) - ) - ) -``` - -The `riemann_resamples` object now includes both our original resamples as well as the predictions generated from each run of the model. We can use the following code to "unnest" our predictions and estimate both the average "true" AGB and our average prediction at each hexagon: - - -```r -riemann_metrics <- riemann_resamples %>% - select(cellsize, resampled_outputs) %>% - unnest(resampled_outputs) %>% - transmute( - cellsize = cellsize, - mean_agb = purrr::map_dbl(.predictions, function(x) mean(x$agb)), - mean_pred = purrr::map_dbl(.predictions, function(x) mean(x$.pred)) - ) - -head(riemann_metrics) -#> # A tibble: 6 × 3 -#> cellsize mean_agb mean_pred -#> -#> 1 10000 5930. 7161. -#> 2 10000 6265. 7020. -#> 3 10000 11766. 7673. -#> 4 10000 28067. 21806. -#> 5 10000 13132. 17911. -#> 6 10000 0 6287. -``` - -Now that we've got our "true" and estimated AGB for each hexagon, all that's left is for us to calculate our model accuracy metrics for each aggregation scale we investigated. We can use functions from yardstick to quickly calculate our root-mean-squared error (RMSE) and mean absolute error (MAE) for each cell size we investigated: - - -```r -riemann_metrics <- riemann_metrics %>% - group_by(cellsize) %>% - summarize(rmse = rmse_vec(mean_agb, mean_pred), - mae = mae_vec(mean_agb, mean_pred)) -``` - -And just like that, we've got a multi-scale assessment of our model's accuracy! To repeat a point from earlier, we aren't using this as a way to tune our model. Instead, we can use our results to investigate and report how well our model does at different levels of aggregation. For instance, while it appears that both RMSE and MAE improve as we aggregate our predictions to larger and larger hexagons, some scales have a much larger difference between the two metrics than others. This hints that, at those specific scales, a few individual hexagons are large outliers driving RMSE higher, which might indicate that our model isn't performing well in a few specific locations: - - -```r -library(ggplot2) - -riemann_metrics %>% - pivot_longer(-cellsize) %>% - ggplot(aes(cellsize, value, color = name)) + - geom_line() + - geom_point() + - theme_minimal() -``` - - - -## References - -Riemann, R., Wilston, B. T., Lister, A., and Parks, S. 2010. An effective assessment protocol for continuous geospatial datasets of forest characteristics using USFS Forest Inventory and Analysis (FIA) data. Remote Sensing of Environment, 114, pp. 2337-2353. doi: 10.1016/j.rse.2010.05.010. - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> spatialsample * 0.2.1 2022-08-05 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/learn/work/multi-scale/plots.csv b/content/learn/work/multi-scale/plots.csv deleted file mode 100644 index 12a9c915..00000000 --- a/content/learn/work/multi-scale/plots.csv +++ /dev/null @@ -1,5304 +0,0 @@ -PLOT,yr,plot,lat,long,n_trees,agb -1,2017,1,42.147515000000055,-74.67891400000005,49,0 -3,2016,3,41.64130599999999,-74.85530199999995,33,26285.363412000002 -5,2015,5,42.100556,-75.839602,0,0 -6,2014,6,42.83503599999996,-74.32485799999996,74,23246.807345000005 -7,2019,7,44.68789900000001,-74.807912,14,6447.0186570000005 -8,2018,8,43.9263,-75.486155,8,0 -9,2015,9,43.03013099999999,-76.913955,36,11957.359361000003 -10,2018,10,42.736786,-78.38136,0,0 -11,2014,11,44.730972000000015,-73.747382,42,16175.218191 -12,2017,12,42.43822,-73.580949,0,0 -13,2013,13,42.160172000000024,-79.73307100000007,35,11361.015709000001 -14,2015,14,42.84362299999999,-73.554264,13,5426.17187 -17,2012,17,42.583601,-75.283068,0,0 -18,2017,18,42.991077,-76.314216,0,0 -22,2017,22,42.282438,-76.05806299999998,15,11253.549901 -23,2016,23,42.050778,-76.567825,0,0 -25,2013,25,43.976247000000036,-74.31649800000004,47,16799.35903 -26,2017,26,43.30032000000001,-74.48491400000007,55,26908.858951000002 -28,2013,28,43.177151,-77.180462,0,0 -29,2016,29,43.55333300000001,-75.82526599999998,38,23194.483228999998 -31,2016,31,44.40042500000003,-73.94127799999993,43,18003.572041 -32,2018,32,43.991156,-76.170942,0,0 -33,2013,33,42.87809,-75.055446,0,0 -36,2015,36,42.435094,-78.51449,0,0 -37,2015,37,42.459771999999994,-79.19512499999999,13,6189.559972999999 -39,2019,39,42.16778399999999,-78.47804700000005,33,27651.508061 -40,2019,40,43.144069,-78.487376,0,0 -41,2019,41,42.678962999999975,-74.64694400000005,65,41592.394857 -42,2017,42,42.983642,-77.220044,0,0 -45,2017,45,42.57114000000001,-76.766154,16,3258.94082 -46,2015,46,43.178992,-73.32886699999997,19,13387.616945999998 -48,2019,48,43.222215,-78.299006,0,0 -49,2019,49,44.369623000000026,-75.02052699999996,30,11644.178845999999 -50,2016,50,42.78889399999998,-74.18869799999996,60,19499.012917 -55,2018,55,42.290317,-78.789661,4,0 -56,2018,56,43.406329000000014,-75.260499,26,23802.348481999998 -57,2015,57,44.01154100000002,-74.19548100000004,37,20846.08687 -58,2016,58,42.892658,-75.092783,0,0 -59,2017,59,42.161862,-73.953981,0,0 -60,2015,60,41.306301000000005,-74.05192699999999,32,37625.810937999995 -61,2019,61,44.699447,-75.219186,0,0 -62,2015,62,42.675694000000014,-78.70607599999995,43,34481.808360999996 -63,2013,63,44.006758,-73.90690999999994,81,15338.619842999997 -64,2017,64,44.815192999999965,-73.98855000000002,48,15251.399734999999 -65,2013,65,43.46193799999997,-73.76051499999996,59,25349.432340999996 -67,2019,67,44.58944900000004,-74.657956,37,29072.386261 -69,2019,69,42.08164899999998,-75.734584,31,39723.438409 -70,2013,70,43.804894000000004,-74.24278200000002,25,6148.050816999999 -71,2017,71,42.737853,-78.440626,0,0 -72,2016,72,43.089277,-73.905273,4,2244.732727 -73,2017,73,43.400145999999964,-74.79362200000007,74,12279.168771999999 -74,2016,74,42.37284,-76.152582,0,0 -75,2018,75,44.77607800000002,-74.11834600000002,33,21034.924382000005 -76,2017,76,44.545380000000016,-74.52125800000002,73,14047.483390999996 -77,2017,77,42.722900999999986,-74.29982599999997,19,7429.493845999999 -78,2013,78,43.084105,-74.153767,0,0 -79,2014,79,43.009476,-75.307763,0,0 -80,2016,80,41.373989,-74.468681,0,0 -82,2019,82,43.261948,-78.606473,0,0 -83,2017,83,42.84962399999998,-73.67557800000002,21,34330.186528 -85,2016,85,42.361910999999985,-76.550428,33,24546.003242 -86,2013,86,42.90245700000001,-73.575147,25,11463.417970999999 -87,2019,87,42.406123,-78.3096,0,0 -88,2013,88,40.88585400000004,-72.786868,37,14434.386534999998 -90,2018,90,42.85659,-75.110656,0,0 -91,2013,91,43.106639,-78.939643,0,0 -93,2016,93,42.157459,-76.869597,0,0 -94,2018,94,42.560416,-73.488097,0,0 -95,2015,95,41.911270999999964,-75.218004,32,21369.380181 -96,2016,96,43.066792,-78.274321,0,0 -97,2014,97,44.41789200000001,-74.50085799999994,30,3413.4543790000007 -98,2014,98,43.276659,-78.531625,0,0 -100,2014,100,42.46398799999998,-78.35642999999995,44,9527.572436999999 -101,2019,101,42.542476,-77.221318,7,220.859131 -102,2013,102,42.269505,-75.088255,0,0 -103,2014,103,42.681951999999995,-76.19561500000005,29,7707.220758 -104,2015,104,43.136546999999986,-77.34473499999999,16,9203.208136 -105,2017,105,43.42132700000001,-75.83138499999998,17,12341.169625999999 -106,2015,106,42.828068,-76.775935,0,0 -107,2015,107,42.078285999999984,-78.150079,44,21065.669131000002 -108,2018,108,44.4355220000001,-74.60529299999989,87,12598.379415 -109,2017,109,41.986032,-73.540591,0,0 -112,2019,112,42.32806000000003,-79.13097000000002,31,16459.340724 -113,2013,113,44.90569200000002,-74.60870299999999,46,11857.274495 -116,2014,116,44.23900099999997,-74.97134199999995,45,16357.272711000003 -118,2014,118,42.43128299999996,-78.64129899999993,44,27607.538160999997 -119,2014,119,42.28115699999997,-79.23662,36,0 -120,2018,120,42.54034,-76.366375,0,0 -121,2013,121,40.96465899999999,-72.74666299999998,19,3752.725387 -122,2013,122,42.567251,-77.385297,0,0 -123,2014,123,42.451702999999974,-74.205156,38,33997.9302 -124,2015,124,44.633092,-75.069702,5,1858.741132 -125,2019,125,42.502026,-75.28771300000001,7,5189.022088000001 -126,2018,126,44.35352699999999,-75.41170400000006,34,4958.637717 -127,2014,127,42.35813499999997,-75.91465100000003,36,17984.156877 -128,2016,128,41.910183,-74.704429,0,0 -129,2018,129,42.16797900000004,-79.75001800000003,36,8703.366197 -130,2019,130,44.408689999999986,-74.926328,37,15662.573440000002 -131,2019,131,42.689735,-75.924604,0,0 -132,2014,132,42.411714,-73.893055,0,0 -133,2014,133,43.79180599999999,-74.7653489999999,53,22585.084856999998 -134,2013,134,44.34132,-75.161941,33,21255.615257 -135,2017,135,42.28716899999997,-76.20462700000003,37,19343.588237999997 -136,2016,136,42.542019,-74.991702,0,0 -138,2014,138,42.91634299999999,-74.09878300000001,12,14223.600791 -141,2019,141,42.003272,-76.180163,28,25485.429084999996 -144,2015,144,42.718214,-77.981784,0,0 -145,2013,145,43.176114,-78.612492,0,0 -147,2015,147,43.14027899999998,-78.19895899999997,40,8871.112959 -148,2019,148,41.821663,-74.336514,0,0 -149,2017,149,43.592488,-74.43611999999997,38,27562.588881000003 -151,2014,151,42.99750999999999,-76.712763,7,10113.945480999999 -152,2015,152,40.835755,-73.502748,0,0 -154,2014,154,42.297523000000005,-78.745984000000007,10,3655.9287270000004 -156,2016,156,43.21238500000001,-74.3569900000001,50,33017.263325 -158,2017,158,43.75165999999998,-75.11505800000002,31,14841.800737000001 -159,2018,159,44.23775100000005,-74.91496800000003,45,18417.617399000002 -160,2015,160,42.164627,-76.213161,0,0 -162,2019,162,43.37815,-75.12932900000004,25,6370.053991 -164,2019,164,42.291706,-79.634553,0,0 -166,2015,166,42.60123599999998,-78.34244600000005,31,9053.958604000001 -167,2017,167,42.06202600000003,-75.29432500000003,27,0 -168,2019,168,42.92448800000004,-77.18900100000005,40,30428.64506 -169,2018,169,44.46639400000002,-73.42009000000004,53,21506.668323 -170,2015,170,41.468006,-73.942441,0,0 -171,2015,171,42.18009899999998,-76.55450100000004,43,19114.289667 -172,2018,172,40.88430799999999,-72.97885700000002,10,4053.7923900000005 -173,2017,173,43.11042399999998,-78.40370299999996,45,32185.25608700001 -175,2015,175,43.353009,-74.46828200000002,29,17601.885729 -176,2017,176,42.225104000000044,-73.76964099999996,48,0 -177,2013,177,42.988066,-74.437933,7,2045.6738830000002 -179,2013,179,43.088933,-73.27758300000004,33,0 -180,2016,180,44.16654699999995,-73.54422800000002,69,27012.302233999995 -182,2018,182,42.322064,-75.455629,0,0 -183,2019,183,42.94150199999999,-75.39085300000006,28,0 -184,2018,184,42.923449,-77.068737,0,0 -185,2017,185,44.09389900000001,-74.09876600000005,100,0 -186,2015,186,42.46069600000005,-74.70792299999995,58,34027.407545999995 -187,2013,187,44.685114,-75.339405,0,0 -188,2018,188,41.992542,-74.052375,0,0 -189,2016,189,42.310925,-74.09783699999996,37,20703.221386999998 -190,2016,190,42.323335,-76.218239,0,0 -191,2017,191,43.165705,-78.325135,0,0 -192,2017,192,44.351533,-74.27186,2,0 -194,2016,194,42.29658799999999,-77.62185699999999,6,3491.231658 -196,2017,196,41.54150099999999,-74.545881,32,25486.675670999997 -197,2018,197,44.933872,-74.057053,0,0 -198,2017,198,43.022417,-75.75415599999998,6,0 -199,2018,199,42.299839,-78.9874,8,6105.113501 -200,2018,200,43.823115999999985,-73.7337889999999,102,21901.759951000007 -202,2017,202,42.53060899999996,-74.42880199999993,54,0 -203,2016,203,42.78448899999995,-75.10820599999992,45,20183.024372999997 -204,2015,204,43.09839499999999,-75.03191500000001,44,12375.571262000001 -205,2015,205,42.129973,-73.66077199999998,13,21242.57346 -206,2017,206,40.708819,-73.336867,0,0 -208,2016,208,43.422012,-74.050107,0,0 -210,2014,210,42.670464,-74.966509,0,0 -211,2019,211,42.17337599999998,-74.311553,38,31254.20415 -212,2017,212,43.62391800000008,-74.65566900000002,73,17851.447804 -213,2016,213,42.391317,-76.908409,0,0 -215,2014,215,41.906733000000024,-75.13676000000001,37,29046.835025999993 -216,2017,216,43.94371900000001,-74.92871099999996,61,6048.009811999999 -217,2014,217,43.498620999999986,-74.95583600000003,26,16646.361651 -218,2014,218,40.95768299999999,-73.759951,28,29572.256017 -219,2013,219,42.40169,-78.881611,6,4607.002548 -220,2013,220,44.679031,-73.466389,0,0 -221,2014,221,44.921003,-73.412233,0,0 -222,2018,222,44.39986900000003,-74.22442300000007,49,33168.033494 -224,2018,224,42.176318,-77.25775799999998,17,7994.946865999999 -225,2018,225,43.001985,-73.524867,0,0 -226,2017,226,43.021577999999984,-78.10553400000002,16,20607.226581000003 -229,2019,229,42.34636300000001,-74.87800799999997,27,20678.171193 -230,2016,230,42.725737000000045,-73.46397500000003,63,33775.802519 -231,2014,231,41.1019,-72.098684,0,0 -232,2019,232,42.15083299999998,-78.236441,38,44648.659853 -233,2017,233,43.45836200000001,-76.38063400000007,56,35411.129504 -234,2017,234,44.53031499999998,-74.17767999999997,31,10980.564381 -236,2015,236,42.679873,-76.061882,0,0 -237,2014,237,41.80420299999999,-74.96774400000002,35,22326.579728 -238,2019,238,43.992698999999945,-75.47313699999997,69,16491.398231 -239,2013,239,42.460755000000006,-75.48848200000003,35,26957.440864999997 -240,2019,240,44.473256000000006,-75.08771900000008,49,8558.336850000002 -241,2019,241,43.59365200000001,-73.47114599999996,47,14716.624579999998 -242,2014,242,41.488669999999985,-74.12282699999997,22,6845.317760999998 -243,2016,243,40.791014,-73.128561,0,0 -244,2015,244,42.529916,-78.253181,0,0 -245,2017,245,41.96410500000001,-75.15074299999998,29,19130.910635 -246,2017,246,42.412933,-77.050236,0,0 -247,2017,247,42.885098,-77.107542,0,0 -248,2017,248,43.252475,-74.11906899999997,44,0 -249,2013,249,42.02192199999996,-74.17007300000009,46,24140.082162000006 -250,2015,250,43.54919900000003,-74.12603599999994,82,15484.520951999999 -251,2018,251,42.07590200000002,-77.76371900000002,28,11903.863949999999 -253,2017,253,42.766754,-76.793454,0,0 -256,2016,256,44.11834700000001,-75.22503800000003,32,0 -257,2017,257,42.112039,-76.844144,5,4069.810042 -258,2013,258,43.835419,-73.66377899999999,46,21479.640878000006 -259,2014,259,40.792223,-73.003062,0,0 -260,2017,260,42.209704,-76.303224,0,0 -261,2015,261,43.22128999999998,-78.57525000000004,26,26493.934122000002 -262,2013,262,41.589679999999994,-74.695473,11,6692.1225890000005 -263,2015,263,42.185328,-74.774962,0,0 -265,2014,265,42.759184999999995,-76.362034,26,18999.837056000004 -266,2014,266,42.02074300000001,-76.97350600000003,25,752.780942 -267,2014,267,44.224473,-75.947161,0,0 -268,2017,268,43.780156000000005,-74.66903499999992,51,31966.018366 -270,2019,270,43.066598,-74.08842200000002,22,11372.102794 -272,2015,272,42.363566,-78.6884799999999,67,13497.752218000001 -273,2018,273,41.670109,-74.46131800000006,30,0 -274,2017,274,42.570463999999966,-78.69248400000002,41,22970.953152000002 -282,2013,282,42.773091000000015,-73.84720300000001,17,5880.8261569999995 -283,2018,283,43.16202399999998,-77.420478,39,0 -284,2014,284,42.42576499999998,-76.17970800000006,29,29140.681682 -285,2015,285,42.609267,-77.099283,0,0 -287,2017,287,43.328332,-75.551651,0,0 -288,2015,288,42.54755999999998,-75.72799799999994,54,20350.905238999996 -289,2014,289,43.525223,-76.237941,0,0 -290,2015,290,40.86729,-73.853908,0,0 -292,2014,292,42.781952,-74.53276099999998,12,24.542268999999997 -294,2016,294,41.465237,-74.26397,0,0 -295,2013,295,42.600589,-75.592741,0,0 -296,2013,296,42.405226999999996,-74.10846,0,0 -297,2013,297,42.465856000000024,-75.67628599999996,28,25277.673813999998 -298,2017,298,43.004431,-77.895977,0,0 -299,2017,299,42.111472,-76.160117,0,0 -301,2017,301,43.53192800000006,-73.41012099999993,47,17052.205804999998 -302,2015,302,44.563129000000004,-73.457981,1,63.023791 -303,2013,303,42.86127100000001,-73.968261,51,25464.590751 -305,2019,305,41.982430999999956,-74.56079300000007,56,14200.460263000004 -306,2015,306,42.61515000000002,-77.315701,40,23499.605201000002 -308,2015,308,42.699659,-74.891913,0,0 -312,2016,312,43.61758599999999,-75.43347799999995,62,0 -313,2016,313,41.31356,-74.199633,0,0 -315,2019,315,42.628411,-76.50449,0,0 -316,2017,316,43.596815,-76.116335,0,0 -317,2017,317,44.86281699999998,-73.90688500000005,27,1792.127852 -318,2014,318,44.18173900000004,-75.44688700000005,41,7560.259828 -320,2015,320,43.108920000000005,-75.35429699999999,10,3857.532422 -322,2014,322,42.629548,-75.175709,0,0 -324,2016,324,43.790308999999986,-76.20465299999996,31,18904.800593 -325,2019,325,42.620844,-77.047781,0,0 -326,2019,326,43.926171999999966,-73.46151100000012,79,12226.431007000001 -327,2016,327,42.133970000000026,-75.247968,41,28257.227295 -328,2019,328,41.116843,-74.159322,0,0 -329,2019,329,42.04126900000004,-74.73660299999996,48,40618.240185999995 -331,2018,331,43.199315,-74.709128,0,0 -332,2019,332,43.17186100000002,-75.97979699999998,43,14903.167115 -334,2015,334,42.00497700000004,-74.11965599999995,47,44309.404205 -335,2015,335,42.91121999999998,-76.71485600000004,35,25773.976805000006 -337,2014,337,41.160157999999996,-73.65575399999994,29,28569.001064 -338,2015,338,44.064697,-73.98494699999995,50,12158.348146999999 -339,2015,339,42.586158,-76.774242,0,0 -340,2016,340,41.574881,-74.57729199999996,46,24546.507235000005 -341,2016,341,44.299158999999975,-75.65427799999999,45,12717.032652000004 -342,2015,342,42.561337999999985,-78.08143199999996,27,9847.09319 -343,2014,343,44.958213,-74.54006700000001,14,6199.936537 -344,2015,344,42.382186000000026,-75.34705200000003,38,20808.50277 -345,2016,345,43.568569999999966,-76.16155500000002,42,32890.326163 -346,2013,346,42.741026,-73.49946900000003,26,26076.040212000004 -347,2016,347,42.45874899999996,-73.99915400000006,42,18421.863421000002 -348,2019,348,42.849786,-74.025802,14,12594.044888999999 -349,2013,349,42.50814100000001,-76.679741,15,4457.839925 -350,2013,350,42.17228599999999,-74.82471299999999,40,25159.037709999997 -351,2018,351,42.67606500000001,-78.67144000000003,26,6922.932396 -352,2013,352,44.61934000000007,-73.8515699999999,79,12051.303228000004 -353,2019,353,42.97617,-75.722794,0,0 -354,2015,354,44.43899800000002,-73.572909,41,11622.249381 -355,2015,355,42.128664,-74.569223,0,0 -356,2014,356,43.68050799999999,-75.04949100000005,35,14174.417990999998 -358,2017,358,43.161384,-73.671157,11,0 -359,2017,359,42.37851300000004,-74.36207800000007,37,0 -360,2016,360,41.70290300000005,-74.54848899999992,48,0 -361,2019,361,43.69744500000001,-75.27322400000001,37,23017.588302999997 -362,2015,362,44.647868,-73.611701,0,0 -364,2019,364,42.632763999999966,-73.93408300000007,44,41882.779816 -365,2014,365,44.14337,-73.96742599999999,30,0 -366,2019,366,42.311251000000034,-77.21293000000006,55,24759.079209000003 -367,2015,367,43.916761,-75.936341,0,0 -369,2014,369,43.451833,-73.380962,2,2124.475749 -370,2018,370,43.053136,-73.871103,0,0 -372,2017,372,42.386849999999995,-75.613433,18,16254.736704 -373,2019,373,42.10735900000002,-75.38013,37,14518.848961000002 -376,2015,376,43.85828599999997,-75.24967100000002,39,13827.282383999998 -377,2016,377,42.311095000000016,-78.69290700000006,30,4517.9886559999995 -379,2017,379,43.75599499999999,-74.18134600000002,71,20495.459663999998 -381,2016,381,43.260839,-76.390163,0,0 -385,2013,385,42.21997399999999,-79.688580999999985,14,5532.262624 -386,2014,386,43.703505999999955,-75.54028099999994,73,22145.518860000007 -387,2015,387,42.49709400000001,-74.51632100000005,41,16023.768822 -388,2018,388,43.278025999999976,-75.742762,37,11756.234628 -389,2014,389,43.656968,-74.02659799999996,32,31683.110404999996 -390,2019,390,43.736029000000016,-75.59356800000013,64,14455.806729000004 -391,2015,391,42.957404,-76.387286,11,3899.72171 -392,2017,392,42.748241999999955,-73.37758600000004,40,31366.790730999997 -395,2015,395,44.07246699999998,-74.42338900000003,48,15080.507 -396,2019,396,42.12772799999999,-74.85642400000005,36,0 -397,2014,397,42.09256200000002,-74.04949699999999,38,28573.793497 -398,2014,398,42.25224200000002,-75.61777299999996,25,23420.065448999998 -399,2015,399,42.05462200000003,-79.35024999999997,36,26072.246393999994 -400,2013,400,44.36525100000002,-74.405033,67,17843.130762 -401,2015,401,43.11621999999993,-73.96715200000007,61,31100.565984000004 -402,2013,402,43.097635,-77.08689,0,0 -404,2019,404,43.845539,-73.850658,0,0 -406,2019,406,43.093707,-75.645575,0,0 -407,2018,407,43.735028999999955,-74.52287499999996,78,0 -408,2018,408,44.668421999999985,-74.04039600000002,35,4109.358998 -409,2015,409,42.98786900000003,-78.35933699999998,42,10052.016891000001 -411,2017,411,41.387398000000026,-74.66280300000004,45,0 -412,2017,412,43.36857499999998,-76.24196399999995,43,4617.569432 -413,2018,413,43.291381,-76.11602,0,0 -414,2013,414,43.227068,-76.803813,0,0 -415,2013,415,42.820269999999994,-75.61874100000006,37,15319.751166000002 -417,2014,417,42.462356999999955,-73.46711000000003,40,0 -418,2015,418,42.279329999999995,-76.11246299999998,26,9421.386949000002 -419,2016,419,43.053305000000016,-75.77621,17,8926.685859 -420,2018,420,42.450669999999974,-78.207267,25,7563.942107 -421,2014,421,42.655056999999985,-73.89614000000003,16,6892.088200999999 -422,2013,422,43.241602,-78.049889,7,2036.4960889999998 -423,2013,423,41.47127799999999,-74.88312399999997,31,22153.778164 -424,2014,424,43.517645,-73.473515,0,0 -425,2017,425,41.194001000000014,-74.27157400000002,37,23463.739332999998 -427,2013,427,40.801636,-72.65807,0,0 -429,2019,429,42.959830000000004,-77.511049,6,9005.426719 -430,2018,430,42.314392,-73.86678299999996,29,13536.341764 -432,2015,432,44.038128,-75.875951,0,0 -433,2014,433,44.144714000000015,-75.107571,29,7813.0355839999975 -434,2013,434,42.514006,-77.406791,0,0 -435,2014,435,42.261825,-76.821174,0,0 -436,2017,436,42.717014,-75.575206,9,1620.723272 -437,2015,437,43.29946,-75.62509999999999,11,2543.0822829999997 -438,2013,438,44.74222400000004,-74.36323099999997,63,20255.283878000006 -439,2017,439,42.9088,-74.375604,0,0 -440,2014,440,44.133037999999964,-74.91343499999996,48,3871.408031 -441,2013,441,44.14480499999999,-74.61786099999998,30,24303.614634 -443,2017,443,43.56495099999997,-74.68259600000002,64,26681.770288000003 -444,2013,444,43.488337,-76.26116,32,19023.097962000003 -445,2014,445,43.337577999999965,-74.15795799999995,36,32452.276029 -447,2014,447,44.42480900000005,-73.79917000000013,68,14621.101021 -449,2014,449,42.116696,-73.888786,5,5732.839918 -450,2014,450,42.316622000000024,-77.83914699999997,25,21565.270322 -451,2015,451,43.452332999999975,-74.41249299999998,34,25593.420216 -453,2014,453,42.04294899999998,-73.978285,35,15946.304981 -454,2019,454,41.512265,-74.517642,0,0 -458,2013,458,42.572532,-76.144349,0,0 -459,2015,459,44.265314,-73.63349400000003,41,13867.175127000002 -460,2013,460,43.90214200000002,-73.60123399999999,24,13584.045023 -462,2015,462,42.560854,-75.622085,0,0 -463,2019,463,41.815288,-74.710451,5,813.878065 -465,2018,465,41.866986,-73.78503099999996,28,35931.164714 -467,2016,467,43.42081000000001,-76.238712,31,0 -468,2015,468,44.447312000000004,-74.47532200000008,39,7407.057697 -469,2016,469,42.04362700000001,-75.50988099999996,35,6005.957084000001 -470,2018,470,44.88821099999997,-73.7135360000001,67,14230.717504999999 -471,2014,471,42.48909899999995,-77.245993999999925,48,30335.378514000004 -473,2014,473,44.08946399999997,-74.58057400000004,40,25190.211940999998 -474,2018,474,42.226051,-77.622684,0,0 -475,2019,475,42.615159,-77.400431,0,0 -476,2019,476,43.260808,-73.33555800000005,36,37482.065031 -477,2019,477,42.618946999999984,-78.62612600000007,44,7490.445871 -478,2016,478,44.806277000000044,-73.64494600000009,92,17465.394195999997 -479,2019,479,41.26233700000001,-73.67776299999997,26,18650.589117999996 -480,2013,480,42.88824399999999,-74.45923900000001,10,7049.8511690000005 -481,2016,481,42.26713100000002,-76.52361800000004,45,51276.702378 -482,2016,482,42.376203,-74.773703,0,0 -483,2016,483,42.920528,-78.689273,0,0 -485,2019,485,42.328148,-76.651252,0,0 -486,2018,486,44.192542,-74.96473399999999,34,12644.019478 -487,2016,487,43.494836,-74.357446,0,0 -488,2016,488,43.169168000000006,-75.32425,10,5649.467353 -489,2016,489,44.498847999999974,-73.49447000000005,37,3571.4378329999995 -490,2019,490,42.35395699999995,-74.48969399999994,75,28318.848784999995 -491,2019,491,41.777941,-74.394332,0,0 -493,2015,493,42.01877,-77.300888,0,0 -494,2018,494,42.967801,-78.21915,0,0 -495,2019,495,44.21651900000001,-75.26891500000008,40,16657.588772000003 -496,2019,496,42.533159999999974,-77.25498299999997,34,23576.013612000006 -497,2019,497,42.384561999999974,-75.24875199999998,43,7180.532935 -498,2017,498,40.760001,-73.475826,0,0 -500,2016,500,42.822703,-74.477056,0,0 -501,2019,501,42.973636,-75.317787,0,0 -502,2015,502,41.582173,-73.841127,0,0 -503,2013,503,43.003304,-76.589299,0,0 -505,2016,505,43.151933,-78.728086,0,0 -506,2013,506,42.296706,-73.810126,0,0 -507,2013,507,41.68011400000001,-74.57349100000008,51,33759.981059 -508,2015,508,43.373319999999985,-74.830405,33,31636.143347 -509,2013,509,42.84894099999999,-76.230869,23,0 -510,2017,510,43.25427799999997,-73.96633100000004,31,17987.884708999998 -511,2014,511,44.41193000000003,-73.94842500000003,56,16172.956277 -512,2015,512,43.672322999999935,-74.64672100000004,67,29197.322610000003 -513,2018,513,43.483269,-73.548297,7,12889.43152 -515,2013,515,41.581847,-73.565478,0,0 -516,2017,516,43.576013,-75.92495899999996,26,22734.785961999998 -518,2014,518,43.576417,-74.024118,32,10482.004187 -520,2018,520,43.94822,-75.09976499999998,33,26461.271255 -522,2018,522,44.33859600000003,-73.51523799999997,36,7674.1837049999995 -523,2014,523,42.53562,-74.51335199999995,26,2296.290692 -524,2014,524,43.827226000000024,-76.21621400000002,29,1361.436888 -525,2014,525,43.214615999999985,-74.08030899999996,39,16669.159548000003 -526,2017,526,42.318585999999975,-76.10116699999989,79,0 -528,2017,528,42.220677,-76.441626,0,0 -530,2013,530,42.36499400000002,-75.00931199999997,25,8012.641094 -531,2014,531,41.95293500000002,-73.89946499999998,32,46503.063183 -534,2014,534,44.397248,-75.11934500000002,38,35102.639469 -535,2017,535,44.889503,-74.87912800000001,10,625.6631850000001 -536,2013,536,43.944436,-75.97339699999999,10,4163.207208 -537,2013,537,43.82004800000003,-73.50239400000007,46,33921.452579 -538,2014,538,42.337793,-73.762423,0,0 -539,2017,539,42.32535800000003,-73.71853800000004,34,0 -540,2013,540,42.25054899999999,-79.14243699999999,25,12411.539432 -541,2019,541,42.407203,-78.052866,0,0 -544,2015,544,42.213905,-77.566567,0,0 -546,2015,546,43.79632500000002,-75.09199399999994,34,18892.483544 -547,2017,547,42.133387,-74.738803,0,0 -548,2015,548,42.129987,-76.694866,0,0 -549,2019,549,42.26248499999998,-75.65386800000002,36,7934.1329799999985 -550,2017,550,42.487186,-74.815437,0,0 -551,2019,551,41.865526000000024,-73.71691600000003,30,0 -552,2016,552,40.842583,-73.284627,0,0 -553,2015,553,42.721827,-74.197807,0,0 -555,2013,555,42.149946,-78.16431999999999,9,38.676591 -556,2016,556,42.856059999999985,-77.67381500000005,25,6281.4766309999995 -557,2018,557,42.916015,-77.374542,0,0 -558,2018,558,41.02132699999998,-72.00719799999995,40,5872.296600000001 -562,2019,562,42.118582,-73.56012,0,0 -563,2016,563,40.693325,-73.857472,0,0 -564,2013,564,43.114332999999995,-75.098111,9,2649.968064 -565,2016,565,44.225015,-73.604394,0,0 -566,2015,566,42.205062,-78.40225700000008,47,16045.384023999997 -567,2014,567,44.33477799999997,-74.55728799999999,44,5864.506428000001 -568,2016,568,42.91316499999998,-74.06680499999996,51,33768.697159999996 -569,2018,569,42.559201,-77.435356,0,0 -570,2014,570,42.182522000000006,-75.82589700000001,37,18889.223644 -572,2014,572,41.93747699999998,-73.69460400000001,18,24031.580941000004 -573,2017,573,42.35559800000003,-74.194164,40,27405.626431000004 -575,2013,575,42.076622999999984,-78.21256499999997,35,16477.356169 -576,2019,576,42.18127099999997,-75.13947899999997,58,20435.240263 -580,2017,580,44.496439,-73.99253899999995,50,6535.735435000001 -582,2015,582,42.858661,-77.50356,0,0 -583,2013,583,43.43147500000001,-74.31554599999993,55,46012.03118400001 -585,2015,585,43.325653,-77.971377,0,0 -588,2018,588,42.48656299999998,-73.701273,40,38746.430118 -589,2013,589,44.340024999999976,-74.98457600000002,33,14098.312357 -590,2013,590,42.11502300000002,-74.47774299999995,47,34437.257879000004 -591,2015,591,44.788,-74.406264,0,0 -592,2016,592,43.92021599999999,-73.86158500000008,39,25976.339462 -593,2019,593,44.04032299999998,-76.29214299999997,18,3097.834027 -596,2015,596,42.260836999999974,-76.69402499999988,64,14933.003136 -597,2019,597,44.84532399999994,-75.05014900000005,54,10174.051524 -600,2018,600,42.160154000000006,-73.588264,18,9788.981489999998 -601,2013,601,42.06861299999997,-75.57232000000008,42,30684.881403000003 -602,2015,602,41.74074999999996,-74.62535699999992,73,0 -603,2013,603,41.150714,-72.279866,4,0 -604,2019,604,42.28559499999998,-75.43725999999998,71,25429.181813999996 -605,2018,605,42.50156799999997,-77.98675700000004,52,19601.962557000006 -606,2015,606,41.270957999999986,-73.693129,27,49522.256719 -609,2014,609,41.293905,-74.225157,0,0 -610,2014,610,43.289705,-74.36276799999997,29,48969.411321 -612,2016,612,42.266703,-75.560922,0,0 -613,2013,613,44.50835800000002,-74.31644400000003,35,12109.687446999998 -614,2019,614,43.22227100000004,-76.62026100000001,52,25110.209119000006 -615,2017,615,42.572573,-75.146932,0,0 -616,2018,616,42.654526,-76.850465,0,0 -617,2019,617,42.160632999999976,-77.67773099999991,45,35409.119153 -619,2015,619,43.332916,-73.615337,0,0 -620,2013,620,42.694524,-74.80137699999989,59,39581.92730999999 -621,2019,621,44.538661,-75.23357,0,0 -623,2019,623,43.99614199999998,-74.66564999999994,66,21089.542951000007 -624,2016,624,43.46804,-75.285688,8,2516.2796289999997 -626,2018,626,41.968435,-74.778601,38,20272.330550000002 -627,2015,627,41.548977,-74.392887,0,0 -628,2013,628,42.008992999999975,-75.970644,40,22477.354655000003 -629,2019,629,43.56631199999997,-73.50197399999992,45,25300.954303000002 -630,2017,630,42.05517099999998,-78.45811499999995,38,39279.17222099999 -631,2018,631,43.63763500000003,-74.22318600000003,40,13006.739867 -632,2019,632,42.234355,-78.99062300000001,8,0 -633,2017,633,42.849492,-78.013774,0,0 -635,2013,635,41.167065999999984,-74.17432200000003,26,19433.834061 -636,2019,636,44.861445,-73.49530999999992,48,29561.281523000005 -638,2015,638,41.316546,-74.306817,0,0 -639,2018,639,40.73462,-73.098113,0,0 -640,2014,640,42.922877,-74.603532,0,0 -641,2018,641,44.781178,-73.570124,0,0 -642,2019,642,43.028793,-77.705677,8,2233.978608 -643,2018,643,43.208895,-78.11243,5,228.96499 -644,2014,644,42.326865,-74.933677,28,20908.356704 -645,2015,645,42.270676,-73.46215500000001,7,2778.62275 -646,2017,646,43.08311200000001,-74.61994300000002,10,0 -647,2019,647,42.095870999999995,-79.10404000000005,48,0 -648,2013,648,44.681947000000015,-74.550303,48,12036.057687999999 -649,2018,649,43.245855,-77.736056,0,0 -651,2019,651,43.28533,-76.386043,0,0 -652,2019,652,43.240912,-77.383488,0,0 -653,2019,653,42.416213,-76.437012,0,0 -654,2016,654,43.314192,-74.881556,0,0 -655,2013,655,42.167554,-78.68503000000001,16,10784.858174 -656,2015,656,42.64433199999998,-74.38363599999994,67,0 -657,2015,657,44.779703,-74.982823,0,0 -658,2014,658,43.881718000000035,-75.80360199999997,49,9078.549841 -662,2019,662,44.68450200000004,-75.14494199999992,103,19772.977511 -665,2015,665,43.14078300000001,-73.36369400000002,54,23900.103594 -666,2014,666,42.964961,-77.087673,4,1630.7881149999998 -670,2019,670,42.511242,-78.518446,0,0 -671,2015,671,42.77436400000004,-73.43048700000006,41,28479.613107999998 -673,2019,673,42.432489000000004,-76.42095500000003,26,19337.219773999997 -676,2019,676,43.474104,-74.29269999999998,25,21415.016323000003 -677,2014,677,42.713057,-73.68595099999997,31,0 -678,2013,678,43.13529500000001,-75.42225699999997,19,0 -679,2017,679,42.663309,-76.160141,0,0 -681,2018,681,42.875998999999986,-77.94964000000003,24,12588.594273 -682,2014,682,43.37339100000004,-75.95743299999997,61,11525.571456 -683,2019,683,43.877305,-75.526817,0,0 -684,2015,684,43.643628000000014,-73.69985900000003,29,7090.068126 -685,2014,685,43.180573999999986,-73.722376,36,26008.937296999997 -686,2016,686,42.213687,-75.418683,0,0 -687,2018,687,42.79825200000002,-75.67706399999999,25,20696.10761 -688,2017,688,43.26682000000004,-75.96745899999998,47,17063.677801000005 -689,2018,689,41.662708,-73.753479,3,22170.651601999998 -690,2019,690,42.645329,-73.7832,0,0 -691,2019,691,42.385179,-78.968565,0,0 -692,2016,692,42.98954099999996,-73.99606099999993,63,25753.43080900001 -694,2019,694,43.835422,-76.100441,0,0 -696,2017,696,42.046330000000005,-77.412779,18,3732.644552999999 -697,2017,697,44.698542000000025,-74.60902200000005,35,21454.850265999998 -698,2015,698,42.25126600000006,-79.05941999999992,69,32839.551545999995 -699,2017,699,42.432245,-74.149289,0,0 -700,2014,700,41.035233,-72.323644,0,0 -701,2016,701,44.33845500000001,-75.63851900000004,36,13439.765356000002 -702,2015,702,42.757477,-75.753697,13,22843.181851999998 -703,2017,703,41.613611,-74.006958,0,0 -704,2018,704,43.874316000000015,-75.32483,20,15255.147777999999 -706,2019,706,42.706929,-74.492693,0,0 -708,2013,708,43.48690299999999,-75.020211,52,28298.809546 -710,2016,710,42.211346,-78.552153,1,49.322596 -711,2017,711,44.342917,-73.364604,0,0 -712,2013,712,42.12595400000003,-75.14222699999996,40,33229.13666 -713,2017,713,43.062559,-77.544926,0,0 -714,2019,714,41.029799,-72.17711,1,33.460047 -715,2017,715,43.95836600000002,-73.84175800000001,44,30675.751159000007 -716,2017,716,42.077951999999996,-73.82941099999995,26,15176.850432000001 -717,2017,717,43.039589,-76.981907,0,0 -718,2018,718,40.901619999999994,-73.88531800000003,20,18668.546289 -719,2016,719,43.065254999999986,-73.92393200000001,13,6078.514285 -724,2019,724,42.013155,-76.524887,0,0 -725,2013,725,42.89286500000002,-76.27493200000002,24,20884.331840000003 -726,2014,726,43.014612,-77.968812,0,0 -727,2015,727,42.898357,-73.890152,0,0 -728,2014,728,42.152854,-76.351853,0,0 -729,2018,729,43.594354,-74.04456600000002,54,10352.642682 -730,2014,730,44.23421400000001,-73.54259299999998,37,11860.174269 -732,2019,732,41.279452,-74.01338000000003,20,23320.021018000003 -733,2014,733,44.43789900000001,-75.17009699999997,21,5891.416265 -734,2013,734,42.883166,-77.710454,0,0 -735,2017,735,42.714716,-78.619933,4,728.1412250000001 -739,2017,739,42.89693499999999,-75.31356400000003,23,23026.645567000003 -740,2017,740,43.80522900000003,-75.79226599999996,50,22298.400653999997 -742,2014,742,44.471767,-75.572624,0,0 -743,2018,743,43.64259799999998,-74.39705599999996,34,34719.665936 -744,2014,744,43.11220099999998,-74.83472500000003,38,8343.558357 -745,2019,745,42.035721,-76.140213,1,0 -747,2017,747,43.78520099999997,-74.85940200000003,48,12064.423438999998 -749,2019,749,44.51540700000003,-75.39695699999997,38,0 -750,2019,750,42.029467000000025,-78.98664499999997,50,33179.826239 -751,2019,751,43.19606000000007,-73.96676399999987,71,12379.858849 -752,2018,752,43.900259,-73.80177899999993,43,21599.680641 -753,2014,753,42.11795400000001,-77.283693,15,0 -754,2014,754,43.174291000000004,-76.29501500000006,52,22766.696299999996 -755,2017,755,44.48212899999999,-74.779622,51,8730.856172 -756,2013,756,42.14626300000004,-77.82765699999992,52,19716.414294000002 -757,2016,757,43.47547099999998,-74.18787400000005,40,21170.957003000003 -758,2018,758,43.347266999999995,-78.01545800000002,23,15006.472547999998 -759,2018,759,42.260585,-79.54935699999999,39,12150.969608 -760,2014,760,43.08403399999998,-78.102163,41,23131.11422 -761,2018,761,42.71689300000002,-76.77015900000002,34,23351.066861000003 -762,2017,762,42.09498299999997,-77.05614699999992,41,34528.01672 -763,2019,763,42.69541400000002,-75.898703,32,23742.646514 -764,2015,764,42.000420999999974,-77.52123200000004,39,13520.707242 -765,2018,765,43.67825700000002,-74.17569000000002,49,20833.025460999997 -766,2019,766,42.46287,-75.018323,0,0 -767,2017,767,42.243585000000024,-79.59151100000003,25,0 -768,2016,768,42.46596100000003,-75.78339800000002,36,17781.767216 -769,2017,769,42.347033,-77.79264700000014,83,9203.312942 -770,2015,770,42.010322000000045,-75.90237199999991,67,13957.618510999999 -771,2014,771,42.469769000000014,-77.73667100000004,26,29896.532255000002 -772,2019,772,41.844753999999995,-74.15221100000002,27,24648.123411 -774,2018,774,41.55625,-74.51680699999999,27,17199.82362 -776,2018,776,44.34270699999997,-74.80855599999997,59,10047.352234 -779,2015,779,43.60533499999998,-74.50402399999994,49,17832.167427 -780,2013,780,41.41565500000001,-74.048669,8,8524.004871 -781,2019,781,42.91703600000002,-73.76144999999998,17,0 -782,2015,782,42.796058,-76.534763,0,0 -783,2013,783,44.242837,-73.92846599999997,41,20484.661037999995 -784,2016,784,42.638508,-78.51329599999998,13,0 -786,2018,786,44.62108600000005,-73.525674,42,4639.233107999999 -788,2017,788,42.984527999999976,-74.15792899999998,37,9518.916130999998 -789,2017,789,42.15792699999998,-77.72247999999995,33,22458.153474000006 -790,2018,790,42.318851,-77.422462,0,0 -791,2017,791,44.53025200000001,-74.37822499999997,26,3747.3851939999995 -792,2016,792,43.003735,-78.127548,0,0 -793,2018,793,42.19264999999997,-76.43026000000006,44,21927.106707 -794,2019,794,44.096634,-74.26147099999989,74,24852.886964999998 -795,2018,795,44.678492,-74.10099099999991,57,7106.621456 -797,2014,797,42.863776,-73.75211,0,0 -798,2019,798,44.139724,-73.62529599999999,22,11241.442239000002 -799,2015,799,44.52277800000001,-73.42775700000004,68,28285.483217999994 -800,2018,800,42.03601900000002,-75.84065699999998,77,36809.00394799999 -801,2018,801,42.336969,-77.216068,0,0 -802,2019,802,44.31465500000001,-75.22502499999999,21,1341.341582 -804,2019,804,44.312719999999956,-74.809221,51,3817.231395 -805,2016,805,42.931115,-75.273707,4,7125.971856 -806,2019,806,41.601491,-73.781592,0,0 -808,2015,808,43.135923,-78.933849,3,6011.6500719999995 -809,2018,809,42.62194300000006,-74.60297099999998,69,24341.499135 -811,2018,811,43.96857700000005,-73.95453200000006,61,25073.373606 -812,2017,812,43.270326,-76.879437,5,0 -813,2014,813,42.084672,-73.542076,2,3712.511947 -814,2017,814,40.817084,-73.603051,0,0 -817,2017,817,43.008476999999985,-74.68896199999998,19,0 -820,2013,820,41.775295,-73.82871999999999,29,17343.914581000005 -821,2013,821,42.52368,-77.778864,0,0 -822,2017,822,42.58668099999999,-78.50296100000004,49,42706.724052 -824,2019,824,43.734578999999954,-75.20429400000008,45,24847.058752 -825,2018,825,44.13032999999997,-75.17355499999998,39,22809.127278 -828,2019,828,44.108623,-73.690156,16,2130.32794 -829,2015,829,43.476044,-76.229347,3,0 -830,2015,830,42.922849,-73.276691,0,0 -831,2019,831,40.835347,-73.012177,0,0 -833,2019,833,42.06611100000002,-73.76539699999998,35,16225.084442999996 -834,2018,834,42.52274800000002,-75.96727700000002,34,20077.939404 -836,2018,836,43.016517,-75.419593,0,0 -837,2016,837,42.127944,-78.59384,28,19685.833884 -839,2014,839,42.356878999999985,-78.35797099999994,43,25269.059334999994 -840,2013,840,44.134643,-73.438993,0,0 -841,2014,841,41.888375,-74.019606,0,0 -843,2017,843,44.38663300000003,-74.45025299999999,51,1752.788359 -845,2014,845,43.012217,-75.488239,0,0 -846,2019,846,43.70172699999995,-73.948449,49,20611.418282 -847,2019,847,43.118962,-75.967762,0,0 -848,2013,848,40.916066,-72.94723000000003,34,8917.970609 -850,2017,850,43.68598700000002,-75.32139899999996,49,0 -852,2013,852,42.711124,-74.978743,0,0 -853,2017,853,43.591568999999986,-75.34421400000005,42,27708.524251 -854,2019,854,43.356507000000015,-75.287332,12,12793.090228000001 -857,2018,857,42.124506,-74.70486200000002,21,16387.050606 -858,2013,858,42.40856099999999,-75.52748199999998,53,5465.165781999998 -860,2017,860,43.797888999999955,-74.14843599999996,56,17474.953844 -861,2017,861,41.858982,-74.690546,0,0 -862,2017,862,43.439218,-73.491781,0,0 -865,2015,865,43.307935,-78.472513,0,0 -866,2018,866,42.70833,-74.321793,0,0 -867,2019,867,44.087160999999966,-74.791659,45,16832.284347 -868,2018,868,44.09109399999995,-74.42381399999991,45,18088.114699000005 -869,2016,869,42.454631,-73.657055,0,0 -872,2017,872,43.056968999999995,-78.95889,9,3027.4894320000003 -873,2018,873,43.11814000000002,-74.56839600000008,51,16741.487477 -874,2019,874,42.161436000000016,-79.20638400000003,32,2191.7557030000003 -876,2019,876,44.700151999999925,-74.1933150000001,73,14481.957514999998 -878,2013,878,40.860847,-72.639137,17,1096.84062 -880,2016,880,43.856688,-75.950143,0,0 -883,2018,883,43.777125,-76.151029,0,0 -884,2013,884,43.37249299999999,-74.54063899999993,60,22112.551656 -885,2014,885,42.749869,-74.981217,24,25533.983976999996 -886,2019,886,42.70685499999997,-74.074971,40,4530.7495770000005 -887,2013,887,42.186766000000006,-79.04329499999996,52,11026.398096 -888,2014,888,43.004480999999984,-75.684134,15,0 -890,2016,890,42.364208999999995,-75.39041799999997,26,19322.361276 -892,2016,892,41.386429,-73.979303,0,0 -893,2018,893,41.897489,-74.149541,7,13500.219773 -894,2014,894,43.589414,-76.199689,0,0 -897,2014,897,42.025239,-78.199675,6,16845.614837 -898,2015,898,40.897666,-72.346036,0,0 -899,2018,899,42.40153499999999,-76.30328199999998,35,27456.761773 -900,2019,900,43.48795500000004,-75.59886199999995,46,22850.704634 -901,2014,901,41.97789400000003,-75.25217999999997,34,0 -904,2017,904,43.292101,-77.894251,0,0 -905,2014,905,44.222198,-76.120958,0,0 -906,2016,906,43.048011,-77.58662,0,0 -907,2013,907,42.520267999999994,-74.59146700000005,42,19985.054588 -910,2017,910,43.585248,-73.359575,0,0 -911,2014,911,41.762322000000026,-74.507063,48,23425.800504000003 -912,2017,912,43.08089400000005,-75.52117400000013,82,11398.352263000004 -913,2014,913,42.20934699999999,-77.07500200000003,27,20846.614181999998 -914,2016,914,41.80971799999998,-74.20244199999992,48,16918.962841999997 -916,2014,916,43.317161,-75.618206,0,0 -917,2014,917,42.32334899999995,-77.71240500000012,64,22456.036038000002 -918,2015,918,42.014828,-76.242578,4,0 -919,2016,919,43.211806999999986,-74.39357700000001,32,24221.530147 -920,2014,920,42.45682299999999,-75.43932200000002,38,14916.511289999999 -921,2018,921,42.07265099999997,-77.33955100000001,50,18267.439562 -923,2016,923,42.918851,-76.777179,0,0 -924,2018,924,44.82376800000004,-73.94119300000006,60,8448.643388999999 -925,2015,925,42.545446,-77.756803,0,0 -926,2014,926,43.42389000000002,-75.72206800000002,33,0 -927,2016,927,42.606095,-79.001738,0,0 -928,2014,928,43.87106999999999,-73.634403,34,21350.527856 -931,2018,931,43.227208999999974,-74.13896699999997,52,33620.898704 -932,2019,932,43.808415,-75.72659,0,0 -933,2014,933,43.28264199999996,-75.08339299999993,50,21069.312571000006 -934,2014,934,42.595578999999994,-78.94518300000001,13,3539.507171 -935,2018,935,44.575515,-75.42658899999999,33,11551.529291 -936,2015,936,42.082482999999996,-74.23009099999997,61,29530.928329 -937,2015,937,44.118177000000045,-74.72369500000002,42,18208.239722 -938,2013,938,42.976427,-78.009928,0,0 -939,2016,939,44.75976399999994,-74.63380799999992,76,11498.276112 -940,2013,940,43.061779,-77.929854,0,0 -942,2013,942,44.98873599999994,-74.46550399999988,66,13566.156583 -943,2018,943,43.10846,-73.633867,0,0 -944,2013,944,41.258232,-74.321336,0,0 -945,2016,945,44.275385,-75.850464,9,0 -947,2015,947,42.05078,-77.305082,0,0 -948,2015,948,42.15813899999995,-77.597988,49,17353.285131 -949,2015,949,42.599724,-77.49819699999996,28,18002.326825 -950,2016,950,43.973226000000025,-75.50430700000005,55,24408.894123 -951,2017,951,43.496844,-73.27257099999999,46,11139.341956999999 -952,2019,952,42.497207,-75.663,0,0 -953,2019,953,42.984982,-74.283311,0,0 -954,2017,954,43.896178,-76.103694,3,5879.409915 -955,2019,955,42.619971,-76.157966,9,2351.127611 -956,2016,956,42.863046,-76.584762,0,0 -958,2019,958,42.190926,-74.24557400000002,41,27950.832647999996 -959,2014,959,43.320687,-77.774949,0,0 -960,2015,960,42.131117,-78.304777,0,0 -961,2013,961,42.587003,-74.340096,0,0 -962,2013,962,43.80781299999994,-75.1536729999999,61,31942.794657000002 -964,2019,964,42.667495,-77.687505,1,21.70099 -965,2019,965,41.489354000000034,-74.58795700000005,40,17710.051618999998 -966,2015,966,42.995001,-75.353345,0,0 -967,2019,967,44.599055,-74.08515500000009,58,6724.730581 -968,2015,968,44.068936,-75.857129,0,0 -969,2016,969,42.38655999999999,-79.35200100000004,34,16404.574397 -970,2018,970,43.139327,-79.033112,0,0 -971,2015,971,42.690397000000004,-77.230335,29,17896.399598 -972,2017,972,42.36755199999998,-77.74272299999996,42,8295.44165 -973,2017,973,42.681897,-78.883435,0,0 -974,2019,974,42.40779899999999,-77.693663,26,8279.358685 -975,2014,975,44.6325,-75.304154,0,0 -978,2018,978,42.12300000000003,-75.07665899999999,39,26331.330022 -979,2018,979,42.48629699999999,-77.652113,13,6005.844356 -983,2015,983,42.582251,-76.446577,4,1092.39666 -984,2013,984,42.331639,-77.38773199999999,19,16815.723896 -985,2016,985,42.509205,-78.048417,0,0 -986,2014,986,41.66932199999999,-74.91910000000003,31,28145.281850000003 -988,2015,988,40.88607,-72.422093,0,0 -992,2019,992,44.493501,-75.053979,0,0 -993,2014,993,42.72359699999998,-74.78274599999999,39,31960.638135 -994,2013,994,42.69924300000002,-76.05108900000002,12,5553.572295 -995,2017,995,44.77713100000003,-73.62224799999996,29,2376.8838490000007 -996,2017,996,42.57716100000002,-77.27713999999997,25,1878.1249749999997 -997,2014,997,44.70797499999998,-74.98179699999994,53,30018.623273999998 -998,2017,998,43.699084999999975,-73.83321299999993,38,22941.054604999994 -1002,2014,1002,42.10519100000001,-75.50412699999998,30,33935.700643000004 -1003,2018,1003,42.396744000000005,-74.15464900000002,10,0 -1004,2013,1004,40.664934,-73.649406,0,0 -1005,2013,1005,43.696847,-75.780005,0,0 -1006,2014,1006,43.06205099999993,-73.978842,56,34321.711851 -1007,2013,1007,41.80568599999999,-74.48588999999997,34,25940.854904999997 -1008,2018,1008,44.51937300000004,-73.632255,55,19716.923636999996 -1009,2015,1009,42.542932999999984,-77.16633700000003,21,2889.739577 -1011,2016,1011,42.453900999999995,-78.58820000000001,12,0 -1012,2015,1012,43.55085200000002,-75.141817,36,18657.885718999998 -1014,2019,1014,41.72677599999999,-73.65036099999999,20,22672.011043 -1015,2013,1015,44.20524,-74.51761100000004,36,15207.178239 -1016,2017,1016,42.939804999999986,-75.96988,13,3029.115437 -1017,2015,1017,42.975119,-74.036926,8,539.812232 -1018,2017,1018,44.13222300000002,-74.43107899999997,42,33904.816553 -1019,2015,1019,43.047520999999975,-76.47946099999993,40,13880.230368 -1021,2019,1021,41.32772799999999,-74.10610700000001,18,27955.416197999995 -1023,2017,1023,41.44914,-74.433336,0,0 -1024,2018,1024,42.589388,-78.29469799999997,27,23111.876411000005 -1025,2017,1025,42.25028199999998,-74.81516000000008,45,16595.266196999997 -1026,2013,1026,41.891464,-73.552464,0,0 -1027,2016,1027,43.34502799999998,-73.36618799999998,48,20693.092688 -1028,2015,1028,43.109336,-76.283576,0,0 -1029,2016,1029,41.349543999999995,-74.574432,12,7307.218502 -1030,2016,1030,41.432676,-74.334384,0,0 -1031,2016,1031,43.13181200000001,-74.63110899999997,37,19690.521985 -1032,2014,1032,43.005937,-77.810354,4,722.1336389999999 -1035,2013,1035,42.69427100000003,-75.71571499999995,47,31521.519772000003 -1036,2013,1036,42.549572,-74.73721,0,0 -1038,2019,1038,43.299932999999996,-74.04254300000001,49,21616.870672 -1039,2015,1039,43.40242899999999,-76.50063999999999,8,0 -1041,2015,1041,43.629644000000035,-75.18211100000005,39,5690.447266 -1044,2019,1044,43.040862,-74.609673,0,0 -1045,2014,1045,44.89650500000005,-73.59418100000006,71,16103.249690999999 -1046,2016,1046,42.66429100000003,-75.82823899999995,31,6288.775478 -1047,2015,1047,42.802706,-75.205802,0,0 -1048,2013,1048,43.412642,-73.392584,0,0 -1049,2014,1049,40.945821,-72.914965,0,0 -1050,2016,1050,42.434662,-76.497215,0,0 -1051,2018,1051,40.997024,-72.566384,1,0 -1052,2013,1052,43.07652499999997,-76.72477799999999,30,0 -1053,2014,1053,44.227527999999936,-75.05376199999996,61,15978.586857999999 -1054,2019,1054,42.26655800000002,-77.73229599999999,17,17996.429429 -1055,2014,1055,42.15945,-73.512046,0,0 -1057,2019,1057,44.636412,-75.36016,0,0 -1058,2016,1058,43.57718100000005,-75.60258900000005,46,13029.048605 -1059,2018,1059,42.363153000000004,-79.1865980000001,49,16688.008197 -1060,2014,1060,42.869524,-74.664776,0,0 -1061,2019,1061,44.12614499999997,-75.49636400000004,28,10967.917280000001 -1062,2015,1062,41.65118099999998,-73.69331400000004,25,22689.524979 -1063,2019,1063,42.25086899999999,-74.02108799999998,51,45502.756978000005 -1064,2015,1064,43.904240999999985,-75.083392,51,29588.795204000002 -1065,2019,1065,42.88761100000006,-73.41573500000003,47,16316.394546000001 -1067,2016,1067,42.27286100000001,-76.784181,25,11550.290670999999 -1068,2017,1068,44.55952299999996,-75.67033499999995,38,4409.172422000001 -1069,2019,1069,41.77826899999998,-74.54704300000002,36,0 -1070,2018,1070,42.50710299999999,-75.015286,36,21318.471991 -1071,2019,1071,42.636842,-74.332158,0,0 -1072,2014,1072,42.22237700000003,-77.22785700000001,28,2790.304044 -1074,2016,1074,42.876979,-73.501758,0,0 -1075,2013,1075,42.12042100000003,-74.27510200000005,45,15539.437130000002 -1076,2014,1076,42.76541399999998,-75.41278999999997,44,16430.617101999997 -1079,2019,1079,43.443532,-75.78982899999997,43,9178.785512 -1080,2019,1080,43.04256300000001,-75.50812800000001,7,1114.780851 -1081,2017,1081,44.44284900000002,-75.01517800000008,47,11061.49019 -1082,2017,1082,42.87784,-73.800795,9,13302.052701 -1083,2019,1083,44.079850999999984,-73.85988799999997,49,25228.317955 -1084,2019,1084,44.03484299999996,-75.34428900000007,55,2105.28066 -1086,2017,1086,41.688948,-73.745648,0,0 -1087,2014,1087,42.393127,-79.211076,0,0 -1088,2013,1088,44.10661299999997,-74.03833399999995,53,20459.218273 -1089,2019,1089,44.536161,-74.91972,40,13867.892523 -1090,2016,1090,41.838783,-73.649865,0,0 -1091,2014,1091,42.744194,-77.254678,5,3935.671655 -1092,2013,1092,42.784709000000014,-74.93835200000007,87,35553.26138 -1094,2019,1094,43.652350999999975,-75.07728600000003,38,20098.806635 -1096,2018,1096,42.636925,-76.699683,0,0 -1097,2015,1097,42.309958,-77.282865,0,0 -1098,2017,1098,43.64920800000002,-73.88768,32,15336.569598000002 -1100,2017,1100,43.966385,-75.871979,4,3189.861468 -1101,2015,1101,42.07329099999999,-75.68069399999997,32,23620.901549000002 -1102,2018,1102,42.08903299999998,-76.12324900000002,29,26985.257042 -1104,2017,1104,44.21528099999992,-74.41138299999999,72,8333.948889000003 -1105,2018,1105,40.743489,-73.985015,0,0 -1106,2013,1106,42.838792,-74.710034,0,0 -1108,2015,1108,42.089636,-77.98904100000001,28,17362.362864 -1109,2016,1109,40.963438,-72.862963,0,0 -1110,2016,1110,42.765191,-75.971669,0,0 -1114,2015,1114,43.691485000000036,-73.533873,36,37167.824059 -1115,2016,1115,43.226195,-78.371075,0,0 -1116,2018,1116,43.49191199999998,-74.484684,38,37079.733392 -1117,2014,1117,44.763328,-74.505783,0,0 -1120,2017,1120,41.913537,-73.621175,0,0 -1121,2018,1121,43.310039,-76.076394,4,205.42915 -1123,2018,1123,44.008757000000045,-74.14438699999997,60,11577.834228999996 -1124,2017,1124,41.101043,-73.962913,0,0 -1125,2013,1125,42.00123699999997,-74.65418200000002,29,0 -1126,2013,1126,44.357966,-73.490961,0,0 -1127,2016,1127,42.51010000000001,-73.613645,13,5865.751714 -1128,2018,1128,43.413819999999994,-75.61481300000001,41,27419.40862 -1129,2019,1129,44.786972,-74.852775,0,0 -1130,2019,1130,41.867649,-74.106938,0,0 -1131,2013,1131,40.814904000000006,-72.839025,10,0 -1132,2015,1132,42.34304,-74.248876,0,0 -1133,2015,1133,42.027338,-75.350426,0,0 -1134,2019,1134,42.34543399999998,-73.66136899999997,24,18745.667172999998 -1135,2017,1135,42.871011,-74.788498,0,0 -1136,2018,1136,42.762205,-73.514134,50,29628.218222 -1137,2015,1137,42.473955,-73.683533,0,0 -1140,2014,1140,41.40207499999999,-74.72310000000006,50,23332.329926000002 -1141,2016,1141,40.640456,-73.569445,0,0 -1142,2015,1142,42.23031300000002,-73.78807099999996,25,31241.564314 -1143,2019,1143,42.167412,-79.582088,2,205.04243400000001 -1144,2019,1144,43.818799,-75.32550899999994,64,31252.248387999993 -1146,2013,1146,43.521208,-75.527228,0,0 -1150,2018,1150,42.40389,-79.453111,0,0 -1153,2014,1153,42.500225,-73.802554,2,162.365668 -1154,2013,1154,42.68007799999999,-76.58644099999998,18,3553.10231 -1156,2018,1156,40.900467,-73.384861,0,0 -1158,2013,1158,42.424326000000015,-74.27006699999995,43,25785.002498 -1159,2013,1159,43.97555200000002,-74.49129200000004,59,18354.191968000003 -1160,2015,1160,43.261078,-75.28498,0,0 -1161,2015,1161,42.58731400000004,-78.41310000000009,45,29379.958916000007 -1163,2013,1163,44.482269000000024,-73.961911,38,0 -1165,2019,1165,43.803991,-73.882538,32,0 -1166,2014,1166,42.279125000000015,-77.31810399999996,36,11783.995401999997 -1167,2013,1167,43.03382,-73.603529,5,0 -1168,2014,1168,44.612422,-74.37614800000004,27,19918.477973999998 -1169,2015,1169,43.438477,-75.92970599999992,57,32991.721507999995 -1171,2014,1171,43.25404200000002,-77.9754079999999,52,18557.957430000002 -1174,2018,1174,40.812191,-72.906445,0,0 -1175,2018,1175,44.311136999999995,-75.41663500000003,32,5050.794631 -1176,2013,1176,43.668415999999986,-74.94951799999994,29,27945.968415000003 -1180,2014,1180,40.642758,-73.954526,0,0 -1181,2017,1181,42.624929,-77.07264299999999,13,1927.2147499999999 -1182,2015,1182,42.919518,-76.037195,14,8514.752559 -1184,2017,1184,42.200112,-75.37117200000007,37,25230.749763000003 -1186,2017,1186,43.72598199999996,-75.6528010000001,75,9039.00181 -1187,2018,1187,43.45049500000003,-73.5981109999999,55,9491.59003 -1188,2018,1188,42.22290100000004,-75.54625300000001,58,21360.71991 -1189,2017,1189,42.278165,-74.44351,0,0 -1190,2013,1190,43.99573299999998,-73.72865,48,14197.742978000006 -1191,2019,1191,42.585665,-77.965456,0,0 -1192,2014,1192,40.71670799999999,-73.21495099999997,37,24540.78606 -1193,2014,1193,42.800189,-77.013202,0,0 -1194,2014,1194,41.324943,-73.70301599999998,29,42097.860431999994 -1195,2019,1195,43.488604,-76.125777,0,0 -1196,2016,1196,41.423307,-73.760836,0,0 -1198,2015,1198,43.13543599999999,-78.59718099999999,7,2015.431866 -1199,2015,1199,43.292684,-78.873836,0,0 -1200,2014,1200,43.661654,-74.67348499999994,62,30933.754043 -1202,2015,1202,43.21282,-75.550962,0,0 -1203,2015,1203,42.67307699999999,-74.28542500000002,29,20387.989427000004 -1204,2013,1204,43.12015799999998,-73.586906,21,13853.408033 -1205,2019,1205,42.812683,-76.344882,0,0 -1206,2019,1206,42.397911000000015,-77.98497300000002,24,19418.852160000002 -1207,2018,1207,42.430873,-75.91532999999991,53,27241.217114999996 -1208,2018,1208,42.446517999999976,-75.751208,41,23905.270238 -1209,2014,1209,42.533376999999994,-75.04634799999998,10,2152.628312 -1210,2016,1210,41.45435399999999,-73.614591,32,0 -1211,2017,1211,42.57339,-76.043358,0,0 -1212,2017,1212,43.589985999999975,-75.02185700000003,38,30185.981967000003 -1214,2013,1214,42.08525,-76.969816,0,0 -1215,2016,1215,42.494063,-76.487579,0,0 -1216,2019,1216,43.015674,-77.292402,0,0 -1218,2014,1218,42.218902,-76.152652,0,0 -1219,2018,1219,41.90034,-75.009673,0,0 -1220,2013,1220,41.50717299999999,-74.68472100000004,31,17668.878074999997 -1225,2019,1225,42.380342,-76.115956,0,0 -1226,2014,1226,42.546813,-77.864472,0,0 -1227,2019,1227,42.52255699999995,-73.874006,63,25397.69337500001 -1228,2017,1228,42.442986000000005,-77.555431,17,7995.592425999999 -1229,2018,1229,44.49319599999994,-75.27747600000012,69,22215.012399 -1230,2017,1230,42.815099,-73.89507,0,0 -1231,2017,1231,44.122511000000024,-74.96469699999994,35,11190.09188 -1232,2018,1232,42.77060500000002,-74.06157699999996,24,19742.00844 -1233,2014,1233,44.139687000000016,-74.32429499999995,31,33998.841698000004 -1234,2013,1234,44.28789200000003,-75.39565500000002,23,0 -1235,2017,1235,42.04451,-74.246492,0,0 -1236,2016,1236,41.26654899999996,-73.53759799999995,33,43934.017847999996 -1237,2014,1237,42.71650599999999,-73.86903600000004,37,11093.021376 -1238,2017,1238,41.33742299999998,-73.82303700000003,21,46759.072894 -1240,2016,1240,43.199812,-76.894144,0,0 -1241,2015,1241,44.20365799999996,-73.87941600000009,45,13288.586577000002 -1242,2019,1242,43.382859000000025,-76.011205,24,18940.749943999996 -1243,2013,1243,44.08075400000002,-75.70957300000006,48,29927.395534 -1244,2015,1244,43.171427,-76.447432,0,0 -1248,2017,1248,41.32746999999998,-73.98940799999997,23,13996.520180000001 -1249,2016,1249,43.130121,-73.738751,0,0 -1250,2015,1250,40.85646699999998,-72.98171099999998,23,8616.991315000001 -1251,2018,1251,43.81759899999998,-75.58464899999993,39,31721.149096999998 -1252,2013,1252,42.72875399999999,-73.34561899999997,18,19007.564314 -1253,2017,1253,43.45572599999999,-73.86706200000002,7,0 -1255,2017,1255,42.31512999999997,-75.82112800000003,46,20865.969117999997 -1256,2018,1256,42.395478,-73.629162,0,0 -1257,2014,1257,42.58397,-77.16026,5,2455.989255 -1259,2017,1259,43.014373,-75.939176,0,0 -1260,2016,1260,42.588697,-79.071828,0,0 -1261,2017,1261,43.650205000000014,-75.28774100000001,17,15254.337688000003 -1263,2014,1263,43.803908,-73.84231400000003,60,0 -1265,2014,1265,42.37805200000002,-77.27716000000004,27,15794.728743999998 -1267,2017,1267,42.56243,-75.893791,0,0 -1271,2017,1271,42.061197,-77.591809,24,0 -1272,2016,1272,43.67662900000003,-75.72612699999995,37,25669.002986 -1273,2018,1273,43.51190300000003,-74.86616300000004,52,26948.026482 -1274,2017,1274,42.899855,-78.875578,0,0 -1275,2017,1275,42.930429,-74.546825,0,0 -1277,2015,1277,42.17323299999999,-77.77628800000001,49,24903.687266 -1278,2018,1278,42.619364,-76.319097,0,0 -1279,2016,1279,42.69560299999995,-77.38454899999998,44,27847.359424999995 -1282,2019,1282,43.02158300000001,-77.39302899999998,10,22417.314895 -1283,2013,1283,40.848822,-73.880703,0,0 -1284,2017,1284,42.922021999999984,-77.58540500000001,14,3024.443281 -1286,2015,1286,44.981607000000025,-73.70631700000004,66,26740.105905999997 -1287,2013,1287,43.10376900000003,-76.31936900000004,23,6089.903334 -1288,2016,1288,44.449626999999985,-74.66067199999999,50,14356.665885999999 -1289,2013,1289,43.659382999999984,-75.76461999999995,44,17756.810523 -1290,2019,1290,43.58633199999996,-74.79299400000005,77,21528.519257000004 -1291,2017,1291,43.99898099999997,-73.80439500000007,43,17757.024999 -1293,2018,1293,43.19995300000001,-73.75983600000006,54,51751.211733000004 -1294,2015,1294,42.433091000000026,-76.853568,34,11488.092815 -1295,2014,1295,40.809340000000006,-73.550303,11,1528.2260250000002 -1296,2015,1296,41.783165,-74.089799,0,0 -1297,2016,1297,42.492232999999956,-79.28838200000006,43,8033.604309999999 -1298,2016,1298,42.560658,-78.88774,0,0 -1299,2018,1299,43.267654,-75.38256099999994,32,14396.529565999997 -1301,2015,1301,43.998730000000016,-75.58298400000001,45,15202.056687 -1302,2017,1302,42.29330499999998,-79.01842100000006,36,33779.289107 -1303,2018,1303,44.30152399999998,-74.18124800000004,27,2746.1860079999997 -1304,2016,1304,42.46629,-76.981346,0,0 -1305,2016,1305,43.150171,-74.08774700000002,37,23118.861098 -1306,2014,1306,41.66934300000002,-74.045799,37,27478.818504000003 -1307,2013,1307,40.79727499999999,-73.38732099999999,30,19232.313248 -1308,2014,1308,43.54013699999997,-76.09448100000006,46,14369.923406999998 -1311,2018,1311,42.70536400000002,-77.700692,17,3816.158724 -1312,2015,1312,43.70255500000001,-75.91559099999996,52,24638.144639999995 -1313,2017,1313,42.975527000000035,-77.74215000000002,36,0 -1315,2015,1315,43.21120999999999,-77.50649000000007,40,34993.773476 -1318,2015,1318,43.26347200000004,-75.65388399999995,52,20269.749063999996 -1319,2019,1319,42.98443500000001,-73.51976999999994,31,15023.051741000003 -1320,2015,1320,42.95064,-75.507005,0,0 -1321,2014,1321,42.63318699999999,-75.51729500000006,30,25982.975674999998 -1323,2016,1323,42.729103,-76.09000799999997,21,0 -1324,2014,1324,41.89713499999999,-74.44893099999994,35,24444.860346999998 -1326,2014,1326,42.42667499999999,-78.56766100000003,37,16846.725715999997 -1327,2016,1327,44.65364600000007,-75.01704599999995,76,16615.763238 -1328,2013,1328,41.88153200000004,-74.27714100000001,51,31431.543935999995 -1329,2014,1329,42.949743000000005,-76.761551,8,2905.1857170000003 -1330,2017,1330,41.234294,-73.850171,9,18380.667325000002 -1331,2017,1331,43.135303999999984,-74.98531999999997,33,10441.987287 -1332,2013,1332,44.045376999999995,-74.25600000000001,33,33685.456379 -1333,2018,1333,42.21918399999999,-74.99393399999998,6,1587.312696 -1334,2018,1334,44.15976099999994,-75.17239900000003,54,0 -1335,2016,1335,42.238211,-78.246415,0,0 -1336,2015,1336,42.171919,-75.957498,14,2896.878231 -1337,2018,1337,41.419354,-74.157931,3,785.830046 -1338,2019,1338,43.00293,-75.179771,0,0 -1339,2019,1339,42.948944,-76.637864,0,0 -1340,2017,1340,42.427217,-78.758267,0,0 -1341,2014,1341,41.652345,-73.840056,0,0 -1343,2014,1343,42.150477,-77.434332,0,0 -1344,2015,1344,43.279604,-78.134479,0,0 -1345,2018,1345,43.020551,-77.503926,0,0 -1346,2015,1346,43.935691,-73.505582,28,5835.5023169999995 -1347,2014,1347,43.006256,-74.54446600000003,21,2814.6149609999998 -1348,2015,1348,41.179999,-73.613292,2,4204.667829 -1349,2014,1349,43.22062600000002,-75.34350399999995,38,23866.596771000004 -1350,2013,1350,40.723562,-73.434847,0,0 -1351,2016,1351,44.67085500000002,-74.25224100000004,48,37291.73353899999 -1352,2018,1352,43.35357599999998,-74.74034500000005,42,17286.657103 -1355,2015,1355,42.45678900000001,-76.447479,8,2834.1354200000005 -1357,2018,1357,43.58814499999998,-75.90368600000006,55,21589.479442 -1358,2018,1358,40.85830800000001,-72.84546200000003,33,18277.889794 -1359,2015,1359,42.37177300000002,-79.36782700000002,42,30274.693954 -1360,2018,1360,42.472518999999984,-74.632238,35,0 -1361,2014,1361,41.370834999999964,-74.34479499999999,34,9073.267874000003 -1363,2017,1363,42.21627999999999,-79.23359000000002,17,15218.327169999999 -1364,2017,1364,44.99049100000001,-74.245864,23,2233.383003 -1365,2019,1365,42.081432,-74.675116,0,0 -1366,2015,1366,42.44819699999999,-78.09819599999996,34,19034.45417 -1367,2016,1367,42.439481,-77.69264,0,0 -1368,2013,1368,43.39470200000003,-74.88248999999998,48,23154.805216 -1369,2019,1369,42.607817000000026,-74.00914999999995,55,30584.860918 -1370,2013,1370,44.22806300000001,-75.79508000000006,28,5965.954383 -1371,2016,1371,44.308588000000036,-75.09995200000002,39,23099.175801999998 -1374,2014,1374,42.62568299999999,-78.038793,31,36195.835268 -1375,2013,1375,42.923685,-74.666868,0,0 -1376,2017,1376,42.881151,-76.01488,0,0 -1377,2014,1377,43.321323,-76.557731,0,0 -1378,2015,1378,40.875602,-73.807521,33,26979.526868999998 -1379,2016,1379,43.36103099999999,-73.89511700000001,32,0 -1380,2014,1380,42.89507600000001,-74.97899599999997,43,26991.300107000003 -1381,2014,1381,42.924959,-78.739612,0,0 -1382,2015,1382,42.765314,-78.654456,0,0 -1383,2016,1383,42.84673999999999,-75.167552,16,4770.153785 -1384,2014,1384,43.350688,-75.57124400000005,37,15236.012933999998 -1385,2013,1385,43.507769,-75.356438,0,0 -1386,2014,1386,44.124839,-75.659835,0,0 -1387,2015,1387,42.555532999999976,-78.63425400000004,27,24623.336749999995 -1388,2019,1388,42.139948,-78.53302200000005,48,20529.635135 -1389,2013,1389,43.719420999999926,-75.97076399999996,73,27536.514723000004 -1393,2013,1393,42.28914299999999,-76.67738700000002,36,23424.734196999998 -1395,2015,1395,43.59696499999997,-74.90226900000005,44,22991.758989 -1396,2014,1396,43.031935,-74.750578,0,0 -1398,2016,1398,42.16986999999996,-75.26606099999994,46,16218.614128999998 -1399,2016,1399,44.859574999999964,-74.535441,43,5449.663890000001 -1400,2017,1400,42.01878299999999,-78.824796,34,63060.361145999996 -1401,2016,1401,42.29876199999997,-75.86323600000003,39,17530.060665000005 -1402,2015,1402,41.95360600000002,-74.74704500000004,44,23915.893638999998 -1403,2017,1403,44.92661900000001,-74.451003,9,2657.427454 -1404,2014,1404,44.046883,-75.733495,0,0 -1405,2017,1405,44.00212400000004,-75.29821600000002,45,31485.759505999995 -1406,2017,1406,43.948025,-76.022192,0,0 -1407,2019,1407,41.318834,-73.926578,8,5135.660127 -1408,2019,1408,43.787320999999906,-74.48953999999983,98,17038.620634000003 -1410,2019,1410,41.94504200000003,-74.30289399999997,26,0 -1412,2014,1412,42.319624,-74.870523,8,1888.568354 -1414,2013,1414,40.74090999999999,-73.59340499999999,10,2480.94982 -1415,2019,1415,43.83901500000001,-74.23475999999995,44,25805.346506000005 -1416,2019,1416,42.393956,-78.58997,0,0 -1417,2019,1417,42.404182,-74.672732,9,12865.092572999998 -1418,2016,1418,44.1196799999999,-73.76266899999993,98,21592.869190999998 -1419,2014,1419,42.56561400000004,-74.45035000000004,51,27259.650136999993 -1421,2016,1421,42.38665999999998,-73.877271,32,31654.192767 -1422,2016,1422,40.613633,-74.153955,0,0 -1423,2017,1423,43.533693999999976,-75.234637,47,26329.817983 -1424,2014,1424,42.762608999999976,-74.18382399999999,29,11260.418912000001 -1425,2014,1425,42.828316000000015,-74.35395699999998,35,10451.631076 -1426,2016,1426,43.664133,-73.490165,31,24643.072496 -1427,2019,1427,42.388147,-78.205004,0,0 -1429,2015,1429,42.066471,-73.906026,0,0 -1430,2017,1430,42.679855,-74.351319,0,0 -1431,2018,1431,42.820338,-75.826263,0,0 -1432,2014,1432,43.16347300000001,-73.56085100000001,36,34079.395578999996 -1433,2019,1433,42.89975800000001,-75.565124,29,15476.172483999999 -1435,2018,1435,42.433292,-77.859113,0,0 -1436,2016,1436,42.485908,-78.57231,22,3004.747349 -1438,2013,1438,44.00318299999997,-75.76215599999988,61,13088.2482 -1439,2015,1439,42.29254099999994,-76.99166599999992,75,25187.511947 -1441,2019,1441,44.58410400000002,-74.24827999999992,48,13575.159153 -1442,2013,1442,42.23445300000001,-73.62019699999999,11,10834.580608 -1443,2014,1443,43.817931,-76.015971,0,0 -1444,2013,1444,42.18276599999999,-77.12553900000006,31,15341.018561000003 -1445,2015,1445,42.596653,-74.129302,0,0 -1446,2014,1446,43.452095,-76.116023,0,0 -1447,2017,1447,42.622856,-73.819166,0,0 -1448,2018,1448,44.381992,-75.177541,1,2.52578 -1452,2014,1452,44.01038500000003,-74.45526400000003,42,17536.20532 -1453,2014,1453,43.19557899999998,-76.98152000000003,25,10911.391626999999 -1454,2018,1454,43.397471000000074,-73.45882999999994,69,23429.346975999997 -1455,2016,1455,42.443290999999995,-74.50075400000001,11,0 -1456,2015,1456,42.756730000000005,-77.39535500000007,38,22914.154269999992 -1457,2016,1457,42.971072,-73.454235,0,0 -1458,2018,1458,42.10854500000003,-74.36529100000003,29,11322.640326 -1459,2019,1459,43.47350400000003,-73.38416700000003,38,0 -1460,2018,1460,42.913607,-73.953974,0,0 -1462,2019,1462,41.48222800000003,-74.18021600000006,43,29599.664363 -1463,2013,1463,41.99683300000002,-74.73186699999998,23,23083.143496 -1464,2016,1464,44.46989100000008,-74.10248200000002,91,14024.961835000002 -1465,2018,1465,41.821514,-74.882103,0,0 -1466,2016,1466,42.23820400000005,-74.12159800000002,64,41168.88783100001 -1468,2016,1468,41.253602,-74.455072,30,9044.645953 -1470,2017,1470,43.337174,-78.413534,0,0 -1472,2013,1472,43.160135000000004,-73.92934100000001,32,15894.319710000003 -1473,2015,1473,43.430626,-75.49641,0,0 -1475,2018,1475,41.343255,-74.654758,38,10887.871465 -1477,2016,1477,43.315829,-78.657716,0,0 -1478,2015,1478,42.737473,-74.031264,0,0 -1480,2014,1480,43.25212000000005,-79.01053300000002,45,30869.872846000002 -1481,2017,1481,42.69488700000003,-76.684552,33,29524.761238000003 -1482,2013,1482,42.083527,-74.847123,25,8923.493331000001 -1484,2019,1484,43.034059000000006,-76.059925,6,8535.497105999999 -1485,2015,1485,43.485052,-73.851892,0,0 -1486,2017,1486,43.872385,-75.57115,0,0 -1487,2016,1487,42.54266,-76.412519,0,0 -1488,2016,1488,42.02953300000001,-74.27146700000002,17,14458.659571 -1489,2019,1489,41.892838,-74.822145,0,0 -1490,2018,1490,42.238847,-73.678342,0,0 -1491,2015,1491,44.69280400000002,-73.91023799999998,29,3531.562614000001 -1492,2014,1492,44.07943800000008,-74.09747400000008,82,22971.691410000007 -1493,2019,1493,42.205402999999976,-77.13122599999998,50,37887.162041999996 -1494,2013,1494,42.931979,-76.07526,0,0 -1495,2014,1495,44.00551700000004,-73.50947799999996,60,7133.371933 -1496,2016,1496,44.117742,-76.323931,5,114.64504099999999 -1497,2013,1497,40.78367200000001,-73.04215999999998,21,9072.157485 -1498,2013,1498,41.45038499999999,-74.33635200000002,26,10940.929799999998 -1499,2019,1499,43.317755,-78.725202,0,0 -1500,2016,1500,42.424849,-75.415875,0,0 -1501,2015,1501,43.015178,-74.34801500000002,18,5775.0315709999995 -1502,2017,1502,42.462246000000036,-76.79260100000008,59,23893.624968 -1505,2013,1505,43.20490700000003,-76.43347899999998,35,20617.854107000003 -1509,2015,1509,42.128775000000005,-77.116877,15,0 -1510,2017,1510,43.23982900000001,-74.52247899999999,28,32992.850479999994 -1511,2017,1511,42.681085,-78.982228,0,0 -1512,2015,1512,42.972957,-78.315042,0,0 -1513,2019,1513,42.080556,-78.417195,0,0 -1514,2015,1514,43.063994999999984,-73.83663400000012,53,23547.769791000002 -1515,2015,1515,42.692486,-74.252066,0,0 -1516,2018,1516,42.939033,-77.05235,4,5507.832978 -1517,2016,1517,42.51649099999998,-75.91362699999996,23,15988.799620999998 -1518,2017,1518,43.378616,-76.443781,4,0 -1519,2014,1519,41.105789,-73.790104,0,0 -1520,2013,1520,43.207553,-75.384254,0,0 -1521,2015,1521,42.483357,-75.96482,0,0 -1523,2016,1523,40.744037,-73.706812,0,0 -1524,2015,1524,44.119733,-76.030698,0,0 -1526,2013,1526,42.604774,-78.237358,0,0 -1527,2017,1527,42.832321000000015,-75.31930800000005,33,38625.350255 -1528,2016,1528,43.34025,-75.415573,0,0 -1529,2016,1529,44.627277000000035,-74.13500900000004,57,20266.321526999996 -1533,2014,1533,43.77052300000001,-75.492858,16,14553.361841 -1535,2015,1535,43.052517999999985,-73.750679,28,36286.825836 -1536,2019,1536,41.41753300000002,-74.43569499999995,24,4018.844578 -1538,2015,1538,42.48749900000002,-77.94566799999996,42,31472.934239000006 -1540,2013,1540,42.201221,-76.383837,0,0 -1541,2019,1541,42.61128299999998,-73.60519500000005,35,21999.081538999995 -1543,2016,1543,44.31445000000001,-74.19854499999998,28,31402.922392999997 -1545,2019,1545,44.87447,-74.979457,0,0 -1546,2014,1546,42.178397,-79.51777500000001,7,376.859024 -1547,2016,1547,42.18406800000002,-78.80563299999996,44,29615.563761999998 -1549,2018,1549,42.161798,-75.00620499999997,27,26010.603524 -1551,2016,1551,43.493795,-76.410045,0,0 -1553,2019,1553,42.188525000000006,-74.63392699999999,11,3314.5191760000002 -1555,2015,1555,40.831202,-73.098849,0,0 -1556,2015,1556,44.26821399999996,-74.59538299999998,39,13921.028742000002 -1557,2014,1557,42.399268000000006,-77.81617900000006,72,35266.17157899999 -1559,2015,1559,41.717829,-74.32120799999998,13,375.85759499999995 -1560,2014,1560,44.417646999999995,-74.71428799999995,30,14992.074945 -1561,2016,1561,42.388334,-75.088382,0,0 -1562,2016,1562,42.490396,-78.935962,0,0 -1564,2017,1564,42.42938200000002,-73.42615299999993,41,34380.763895 -1566,2019,1566,44.37330700000002,-74.58443399999999,44,18377.900884000002 -1567,2018,1567,43.89758799999997,-74.75672,65,0 -1568,2017,1568,40.749564,-73.680841,0,0 -1570,2019,1570,43.265645,-78.958855,3,2849.811112 -1571,2018,1571,42.23548400000003,-74.25335399999996,47,0 -1572,2015,1572,41.80177299999999,-73.528262,7,7858.305396 -1573,2018,1573,42.818925,-75.992884,0,0 -1574,2017,1574,44.56842800000001,-74.746359,22,6371.82735 -1575,2015,1575,42.400585,-75.689068,0,0 -1576,2017,1576,42.38454700000006,-76.33626099999995,52,26921.195999 -1577,2018,1577,44.023592,-75.24529399999994,49,30051.896010999997 -1579,2018,1579,41.224033,-73.521894,0,0 -1581,2016,1581,42.84358399999999,-77.28455899999996,26,16812.024071 -1583,2016,1583,42.74186899999997,-75.31516999999998,38,21890.612879999997 -1584,2015,1584,44.807004999999975,-74.80710300000007,54,14784.437272 -1585,2014,1585,44.51572100000005,-73.90913600000012,63,22013.789306 -1586,2014,1586,42.217112,-77.727316,0,0 -1587,2016,1587,43.63997700000003,-74.07732100000007,43,25796.388209 -1588,2013,1588,42.83157200000003,-76.10987400000003,33,43656.575563 -1589,2018,1589,44.77718,-74.555244,0,0 -1590,2016,1590,44.97234699999996,-73.55873100000005,42,5491.394482 -1591,2013,1591,42.35358599999998,-76.454109,27,10166.022213000002 -1592,2019,1592,43.29925400000001,-78.35467900000002,29,14858.61045 -1593,2018,1593,42.45985700000001,-73.397247,14,8565.525674 -1595,2014,1595,43.302097,-73.456011,0,0 -1596,2014,1596,42.450991,-77.081114,0,0 -1598,2016,1598,44.515378,-75.278277,2,1704.478418 -1599,2014,1599,40.783962,-72.859932,0,0 -1602,2015,1602,40.779459,-73.356699,0,0 -1603,2017,1603,41.35758799999999,-73.61396800000003,20,5890.507216999999 -1604,2018,1604,44.599251000000024,-74.850825,36,23443.342428000004 -1605,2014,1605,42.602265,-78.589704,1,0 -1606,2017,1606,43.24191300000002,-75.77901599999997,43,3103.335421 -1608,2013,1608,42.358012,-79.125408,0,0 -1610,2013,1610,42.77934300000001,-78.31966400000005,25,16752.155020000002 -1611,2016,1611,42.29251799999998,-78.97831499999997,36,11238.525650000001 -1614,2014,1614,43.253696,-76.58284100000002,10,0 -1615,2015,1615,41.314024,-74.384303,0,0 -1617,2018,1617,42.329623000000026,-79.34215399999995,51,16340.950085999999 -1618,2018,1618,42.983291,-77.146793,0,0 -1619,2019,1619,42.085939,-76.626864,0,0 -1621,2013,1621,43.86981199999997,-73.90383699999994,49,19611.837741 -1623,2015,1623,43.15598700000005,-76.0309470000001,62,11953.408173999998 -1624,2019,1624,42.022506,-74.085016,8,9463.150523 -1625,2013,1625,43.38652100000001,-74.701736,24,9990.292775000002 -1626,2019,1626,42.821829,-74.811964,0,0 -1627,2018,1627,41.45280300000001,-74.07780100000001,8,0 -1628,2018,1628,41.793028,-74.910612,0,0 -1630,2014,1630,44.42724799999997,-74.15500599999997,86,8970.811865 -1631,2019,1631,42.682352,-77.190823,3,3951.4416610000003 -1632,2018,1632,42.122981,-79.582516,0,0 -1635,2016,1635,42.193122,-77.158093,0,0 -1636,2016,1636,42.00234700000004,-76.74089899999993,46,24819.84408 -1637,2018,1637,42.478319999999975,-79.23135099999998,21,15191.132475999999 -1638,2018,1638,42.04636299999999,-78.24481699999997,31,14223.598674 -1639,2018,1639,42.291572,-75.33191,0,0 -1640,2015,1640,42.308417,-75.734889,0,0 -1641,2019,1641,42.000766,-78.639821,0,0 -1642,2013,1642,42.13235800000001,-79.40798899999997,35,3673.371825 -1644,2014,1644,42.791957,-78.105873,0,0 -1645,2019,1645,42.78562600000004,-75.16980199999993,47,11321.801327000001 -1646,2019,1646,43.84234100000001,-75.66184299999996,47,12921.115633999998 -1647,2015,1647,42.013211,-77.122667,0,0 -1649,2018,1649,42.035136,-74.337908,0,0 -1650,2018,1650,43.587774999999986,-75.71345299999999,36,19189.194784 -1653,2013,1653,41.83636399999995,-74.6544130000001,62,22040.755584000002 -1654,2018,1654,42.424522,-76.604414,0,0 -1655,2017,1655,43.149247,-76.394937,0,0 -1657,2019,1657,43.160479,-78.870899,0,0 -1658,2015,1658,41.26175799999999,-73.783644,14,4685.495411 -1660,2014,1660,42.461932,-76.68267,0,0 -1661,2015,1661,42.37311599999999,-79.03288399999998,26,0 -1663,2015,1663,43.767567000000035,-74.76387900000005,38,35327.249356 -1664,2014,1664,42.20919699999999,-76.964344,33,20016.379547 -1665,2013,1665,43.087647,-78.651488,0,0 -1666,2017,1666,44.700113,-73.484484,0,0 -1667,2017,1667,44.708677999999985,-74.780354,30,3285.9507129999997 -1668,2013,1668,44.817414,-74.280669,0,0 -1669,2014,1669,43.045259,-78.628429,0,0 -1671,2018,1671,42.74856999999998,-75.90891700000006,41,13563.547852999998 -1672,2018,1672,43.706458000000076,-74.5515340000001,85,14137.565043999992 -1674,2013,1674,43.087594,-76.881116,0,0 -1675,2013,1675,42.923935,-78.242208,0,0 -1678,2013,1678,42.258783,-77.196149,0,0 -1679,2019,1679,43.316567,-74.975989,0,0 -1681,2014,1681,43.013243,-78.858099,0,0 -1682,2016,1682,42.73788100000001,-74.34270799999994,28,6381.0305610000005 -1685,2019,1685,42.878743,-78.627152,0,0 -1687,2017,1687,42.733088,-78.780007,0,0 -1688,2015,1688,42.087771,-77.256737,0,0 -1689,2018,1689,42.111581,-79.263399,16,5253.089033 -1690,2017,1690,44.35873700000001,-74.88271600000003,36,14022.610923999999 -1691,2016,1691,42.08125999999999,-77.926761,31,7999.611556000001 -1692,2017,1692,43.068481,-76.25611,0,0 -1693,2013,1693,41.66579700000004,-74.41551899999999,37,25365.364551000006 -1694,2016,1694,42.866757,-74.244262,0,0 -1695,2015,1695,44.30390100000002,-74.47272299999996,50,21189.523039999996 -1696,2018,1696,43.687153,-76.168807,0,0 -1697,2016,1697,43.95314299999997,-74.73800100000003,40,13320.196991 -1698,2014,1698,42.49969,-76.871255,0,0 -1700,2015,1700,42.845002,-73.730826,0,0 -1701,2017,1701,43.35732599999998,-74.99049899999993,67,12859.975325 -1702,2018,1702,42.293397000000006,-74.93793699999996,28,35624.442725 -1703,2018,1703,44.531988000000005,-75.136531,8,2508.617481 -1704,2014,1704,41.161896,-73.81141800000002,30,25923.228346000004 -1706,2019,1706,44.78714300000001,-74.35565399999997,29,12708.584569 -1707,2018,1707,44.326753999999994,-74.10528400000005,38,11175.669381 -1708,2013,1708,42.85673999999999,-75.770236,17,1580.5747779999997 -1710,2016,1710,43.19714,-77.83773199999997,22,25040.435272000002 -1711,2015,1711,43.28907400000002,-73.870941,38,21717.186882000005 -1713,2016,1713,44.10321599999998,-75.05178400000008,43,19885.377142999998 -1715,2013,1715,44.16866499999997,-74.91852999999995,43,18720.155598 -1716,2019,1716,42.555371,-74.592966,16,0 -1717,2015,1717,44.83350800000003,-75.15116500000012,61,11621.912611 -1718,2013,1718,42.11494599999998,-78.54980500000006,38,20335.342350000003 -1719,2015,1719,44.56341999999997,-74.40222499999997,37,14044.099268 -1720,2013,1720,42.728918,-76.400954,0,0 -1722,2018,1722,42.385761,-79.10132400000006,38,26291.184426999993 -1726,2016,1726,42.344913000000005,-79.008646,18,10231.283849 -1727,2018,1727,42.64808599999997,-77.58272799999996,44,22157.508058000003 -1730,2019,1730,44.42065200000001,-74.76877599999995,35,28025.28961 -1732,2018,1732,43.49390399999999,-75.399679,12,0 -1734,2019,1734,44.02376800000001,-75.391844,23,28382.74191 -1735,2013,1735,43.168055000000024,-77.01338200000005,36,13908.990463 -1737,2014,1737,42.611295,-73.741905,0,0 -1738,2019,1738,42.45739000000001,-74.62542299999997,51,4771.375111 -1740,2016,1740,42.284422,-78.849397,0,0 -1741,2016,1741,41.16594,-73.980411,0,0 -1743,2015,1743,42.062014,-79.698197,0,0 -1744,2015,1744,44.133941999999976,-74.67292600000007,50,24246.914811000002 -1746,2019,1746,44.26114899999999,-74.10478700000004,35,27165.716629 -1747,2017,1747,41.89048000000002,-75.05148499999997,55,18206.208292999996 -1748,2015,1748,43.952524,-74.82921,13,342.662487 -1750,2017,1750,42.183151999999986,-78.590021,26,5184.629097999999 -1751,2014,1751,42.68293899999999,-75.361519,8,1958.101975 -1752,2018,1752,42.281372,-77.074471,0,0 -1753,2017,1753,42.16177800000001,-79.10711799999997,19,28238.023711 -1754,2016,1754,44.300071000000045,-73.806646,38,14852.360043 -1755,2019,1755,43.321480000000015,-75.33218600000006,42,0 -1759,2017,1759,42.19021299999999,-78.26303499999996,63,18981.764857 -1763,2013,1763,42.99644700000001,-76.735705,3,9458.963379 -1765,2017,1765,42.146495999999985,-79.45455300000002,22,15490.223369000001 -1766,2017,1766,42.750659,-77.681707,0,0 -1768,2019,1768,40.948005,-72.98727,0,0 -1769,2013,1769,42.27633,-76.302848,4,1601.936621 -1770,2015,1770,42.68602899999998,-74.73429100000007,42,0 -1771,2016,1771,44.371289,-74.511036,2,0 -1772,2017,1772,43.039969,-74.847349,5,983.7097110000001 -1773,2015,1773,42.925121999999995,-78.52968299999999,20,10733.648719 -1774,2019,1774,42.429372,-73.733676,0,0 -1775,2017,1775,42.67319300000003,-75.96540400000002,52,13827.605746 -1777,2018,1777,44.20986800000001,-75.06857799999996,28,8127.699536 -1778,2015,1778,42.28263,-78.928605,0,0 -1781,2015,1781,44.107313999999946,-73.78185699999996,94,20689.814549000006 -1783,2015,1783,43.157151,-78.551695,0,0 -1784,2014,1784,43.96556800000003,-74.07154099999991,53,20006.033674000002 -1785,2017,1785,43.83706499999999,-74.30478300000003,42,25781.988964000004 -1786,2018,1786,44.0472,-75.938294,0,0 -1787,2017,1787,43.70921399999998,-74.92075900000005,31,13845.84054 -1788,2014,1788,42.075113,-76.239592,0,0 -1789,2015,1789,42.881253,-76.597883,0,0 -1790,2013,1790,42.35401700000008,-77.6844199999999,74,9707.446245000001 -1791,2014,1791,42.622998,-77.83662400000001,14,1747.821582 -1792,2019,1792,43.625373,-76.075966,0,0 -1794,2015,1794,43.45438800000005,-75.08756600000005,51,19929.665496999998 -1795,2015,1795,43.821198999999986,-73.57956599999994,29,6034.45224 -1797,2015,1797,44.398098,-75.083671,9,2907.001802 -1798,2017,1798,44.976325,-74.042845,0,0 -1799,2018,1799,43.206787999999975,-75.02683299999993,42,29703.447876 -1800,2019,1800,40.800398,-73.061556,0,0 -1801,2019,1801,42.644663,-77.759603,0,0 -1803,2015,1803,43.50579900000002,-73.46646199999999,13,6566.733332 -1805,2019,1805,42.933598,-73.573734,0,0 -1806,2018,1806,42.10653499999995,-74.53452800000004,49,23467.951908000003 -1807,2014,1807,43.24858799999999,-77.80271700000002,11,2776.8988289999998 -1808,2019,1808,43.46233499999998,-74.86228399999995,47,23779.752833999995 -1809,2017,1809,43.637385,-75.55359499999994,55,24361.693012999996 -1810,2018,1810,42.63611199999999,-74.51290299999998,7,2875.388508 -1811,2016,1811,42.29009199999998,-74.26055099999999,35,32903.326102 -1812,2014,1812,40.662963,-73.416527,0,0 -1813,2018,1813,43.79698400000001,-74.31709300000007,38,33893.111984 -1814,2017,1814,42.707564,-77.147696,0,0 -1815,2019,1815,42.24817,-74.435688,0,0 -1816,2017,1816,42.258972,-78.010409,0,0 -1817,2019,1817,43.264157,-75.539447,0,0 -1820,2017,1820,44.75421200000002,-74.24886400000004,50,17727.65034 -1821,2015,1821,42.46152600000002,-73.80478999999997,24,32655.025967 -1825,2019,1825,43.611482999999964,-75.14642199999993,45,23339.101171000002 -1826,2016,1826,42.58473699999999,-73.43701300000001,30,9893.746648999999 -1827,2014,1827,42.346992,-74.817493,17,9313.496247 -1829,2017,1829,42.06411400000001,-76.348741,28,0 -1831,2014,1831,42.287769,-75.048118,0,0 -1832,2013,1832,40.859321,-73.172928,0,0 -1833,2014,1833,42.38795000000003,-73.94745700000007,43,37280.481982 -1834,2017,1834,42.811016,-77.141843,4,255.384006 -1835,2019,1835,44.26330300000003,-74.53974199999996,67,7544.0385400000005 -1837,2013,1837,42.007888000000015,-78.42388699999997,32,8387.604653999999 -1839,2018,1839,42.385721,-75.06804699999999,22,0 -1840,2016,1840,44.421254000000005,-74.80926900000003,29,6612.120098 -1841,2016,1841,42.65995900000001,-78.135589,31,24419.403006000004 -1844,2013,1844,43.311392999999974,-75.85265100000001,34,18713.399222 -1846,2014,1846,42.07559300000002,-74.262244,45,18545.158397 -1847,2015,1847,42.371543,-74.055338,0,0 -1848,2019,1848,41.755832,-74.031227,0,0 -1849,2016,1849,42.384222,-78.473609,0,0 -1850,2017,1850,44.452401999999935,-74.08340399999999,69,18766.471064999994 -1851,2016,1851,43.69649100000005,-74.73664900000013,90,19688.426866 -1852,2013,1852,44.302395,-75.56703599999994,53,0 -1854,2018,1854,41.61609799999998,-74.67430299999991,48,23853.135365000002 -1855,2018,1855,43.429775,-75.377128,2,0 -1856,2017,1856,42.85327299999997,-78.18232600000005,24,33468.309842 -1857,2016,1857,43.443117,-74.437967,0,0 -1859,2018,1859,43.037577999999996,-76.752121,34,38040.647623000004 -1860,2016,1860,42.85965,-77.827138,0,0 -1861,2018,1861,43.15539799999998,-74.37711899999985,80,25016.44758 -1864,2014,1864,42.593017,-77.681772,0,0 -1865,2019,1865,44.933386,-74.226203,0,0 -1866,2015,1866,42.54572,-76.485553,0,0 -1869,2014,1869,42.304825,-73.56409200000003,31,30368.246590000002 -1871,2018,1871,42.044300000000014,-79.48452899999995,42,46087.135589 -1872,2015,1872,41.42498799999999,-73.98097000000001,20,30037.094804 -1873,2015,1873,43.514756999999975,-74.74061500000005,42,22233.014825 -1874,2019,1874,40.637312,-74.193413,0,0 -1877,2017,1877,42.323216,-74.60662099999995,37,40678.916606000006 -1879,2018,1879,41.947183,-74.46516999999996,33,17618.058139 -1880,2014,1880,43.199664,-78.236755,0,0 -1881,2015,1881,43.401015,-75.158821,0,0 -1882,2015,1882,44.037707,-75.514292,0,0 -1883,2013,1883,43.694361,-75.604443,5,262.013175 -1885,2015,1885,44.09116500000002,-76.148774,19,0 -1887,2016,1887,42.44172700000001,-77.876815,16,5921.65617 -1889,2016,1889,40.85847199999994,-72.73116299999995,52,11794.690926999998 -1890,2016,1890,42.964915000000005,-78.306483,9,2094.292782 -1891,2015,1891,42.656573,-76.580715,0,0 -1892,2016,1892,40.961105,-73.734986,0,0 -1893,2016,1893,43.784027,-75.427058,0,0 -1895,2014,1895,43.134685,-78.786763,0,0 -1896,2016,1896,42.981242999999985,-76.025312,17,1393.814832 -1898,2017,1898,43.24324,-75.421692,0,0 -1899,2013,1899,43.61596599999997,-73.831125,66,25802.193178000005 -1901,2013,1901,44.94860700000001,-74.00645299999998,20,218.26734799999997 -1902,2014,1902,43.359816,-78.130582,0,0 -1904,2015,1904,43.370034,-76.140478,0,0 -1905,2018,1905,44.812974,-74.52755,0,0 -1908,2018,1908,44.08646099999998,-74.49490200000007,78,12947.736727 -1910,2016,1910,41.780949,-73.858383,0,0 -1911,2017,1911,43.32455399999996,-76.6367220000001,66,19011.018689 -1912,2018,1912,42.14649899999996,-76.313674,46,22348.112204 -1913,2018,1913,43.864291999999985,-75.13471600000001,44,16294.069048000001 -1914,2014,1914,42.02841399999998,-75.56500000000004,30,22556.365556999997 -1915,2013,1915,43.12185399999999,-73.46023700000003,31,15412.009388999999 -1916,2016,1916,43.56691999999999,-75.28400999999998,40,23694.135709 -1918,2018,1918,42.64569600000001,-73.99412600000002,20,1973.5977830000002 -1919,2017,1919,43.335198,-73.93521000000007,39,20428.714122999998 -1920,2014,1920,42.81289699999996,-74.13492000000004,63,39542.21615700001 -1921,2015,1921,43.62460000000002,-76.11091200000006,31,16098.659055999999 -1922,2014,1922,43.535337,-75.323644,0,0 -1923,2013,1923,42.686873,-76.800711,0,0 -1926,2016,1926,42.410232,-74.792997,13,0 -1927,2015,1927,42.288569,-78.452794,5,17864.292039 -1929,2016,1929,42.58257200000002,-76.59715700000002,39,10113.018802999999 -1930,2013,1930,42.42822099999995,-79.23008799999991,49,14909.386739000001 -1931,2013,1931,42.007263,-75.792064,0,0 -1932,2013,1932,42.166764,-75.865964,8,1117.403868 -1933,2017,1933,42.779424,-73.761352,0,0 -1934,2014,1934,42.20901400000002,-76.00589499999995,35,47422.956284 -1936,2018,1936,42.175195,-76.623996,0,0 -1937,2017,1937,42.510068000000025,-79.067702,33,0 -1938,2019,1938,44.46504000000003,-74.70802400000001,39,14797.140334000002 -1940,2017,1940,43.64426200000002,-74.23022200000003,45,39381.430543 -1943,2014,1943,42.02880099999997,-77.15115400000005,53,27883.850052 -1944,2013,1944,42.485714,-75.117843,0,0 -1945,2014,1945,42.857566,-76.823663,0,0 -1948,2018,1948,44.56457700000004,-74.47535599999995,46,24638.177022 -1949,2013,1949,44.57502599999999,-74.31855700000004,34,25861.537183 -1951,2018,1951,42.291682999999956,-76.73100299999994,60,39914.744845999994 -1952,2018,1952,44.451528999999994,-74.43121699999999,37,7278.763437 -1954,2013,1954,42.145440999999956,-79.56322,42,25085.349233 -1955,2017,1955,40.898961000000035,-72.69622600000001,41,7066.199063 -1956,2016,1956,43.247112,-75.123603,0,0 -1958,2015,1958,43.28862399999999,-76.57623199999993,55,39965.635772999995 -1959,2015,1959,42.447902,-77.21085799999997,23,8020.100774999999 -1960,2018,1960,42.501952000000024,-76.78345499999992,43,24030.655640000004 -1961,2018,1961,42.581132,-73.821619,8,5764.413843 -1963,2016,1963,42.37088899999995,-78.09767600000006,73,12077.143785 -1964,2019,1964,42.377531,-74.33379,0,0 -1965,2014,1965,43.620045,-75.243248,18,14973.154555000001 -1966,2017,1966,43.306153,-76.49776400000006,42,24051.538805000004 -1967,2014,1967,42.107173000000024,-75.71323699999998,34,34968.884084 -1969,2015,1969,43.33558,-76.31076400000002,10,4865.736676999999 -1970,2018,1970,43.046105,-74.77803,0,0 -1971,2017,1971,43.407126,-74.576339,33,35267.785126 -1972,2018,1972,43.103148,-75.878533,6,2711.608008 -1973,2015,1973,41.885077,-74.546187,0,0 -1974,2014,1974,42.27335799999998,-75.76870899999996,40,22850.760919000004 -1975,2014,1975,43.62777099999998,-73.61071200000006,55,29342.922730000002 -1978,2013,1978,41.419219,-74.202415,0,0 -1980,2016,1980,41.96367699999998,-73.57805000000003,36,0 -1982,2013,1982,41.601925,-73.735434,0,0 -1983,2015,1983,42.855545,-75.883186,0,0 -1984,2014,1984,41.416876,-74.174701,0,0 -1986,2018,1986,42.01258,-77.43689,0,0 -1988,2015,1988,44.240461000000025,-74.81717200000001,35,14025.268556 -1990,2017,1990,42.622519000000004,-74.60005600000002,33,0 -1991,2013,1991,42.046060000000004,-74.45751399999996,25,28168.867064 -1992,2013,1992,43.515175,-74.43171600000004,54,20461.978904000003 -1993,2016,1993,42.35370699999999,-78.15030500000006,36,15686.386737 -1996,2013,1996,44.589041,-73.495303,0,0 -1998,2016,1998,42.25863200000002,-75.83386999999999,59,30323.302791000002 -1999,2014,1999,41.331285,-74.616943,0,0 -2000,2018,2000,43.13964400000002,-78.29954499999998,34,6390.676742 -2001,2014,2001,43.766141000000026,-75.64173799999996,65,23239.022721 -2002,2016,2002,44.38223100000001,-75.53646400000001,17,4111.255388 -2003,2014,2003,42.984691,-78.035128,0,0 -2004,2019,2004,44.16061099999999,-73.94476900000006,53,28613.421318 -2005,2019,2005,43.08058400000003,-73.50475300000002,43,35642.220371999996 -2006,2014,2006,42.861496,-78.078018,0,0 -2007,2014,2007,43.84218099999999,-74.20335900000006,49,24381.410552999998 -2008,2014,2008,41.380926,-73.674379,0,0 -2009,2015,2009,43.731388,-75.660604,0,0 -2010,2013,2010,44.190678,-76.180528,0,0 -2012,2016,2012,42.909207000000016,-73.665209,23,17208.1809 -2013,2015,2013,42.593628,-76.267088,0,0 -2015,2018,2015,43.62933999999998,-73.49287499999996,38,10996.756526 -2016,2018,2016,44.136708999999996,-73.8654530000001,98,16958.320814 -2017,2015,2017,42.770662,-75.256615,0,0 -2019,2017,2019,42.264991,-74.98767900000003,26,37707.323909000006 -2021,2017,2021,42.066075,-79.139902,0,0 -2022,2018,2022,41.64749699999997,-74.63136799999994,51,15170.525971 -2023,2017,2023,44.29046799999998,-75.09986499999998,32,22843.573517 -2024,2017,2024,44.051618999999995,-75.65907199999997,27,10166.3143 -2025,2017,2025,43.122411,-77.45562,0,0 -2026,2019,2026,42.188153000000035,-78.84187400000003,35,10014.369156 -2027,2019,2027,44.152864,-75.914725,0,0 -2028,2016,2028,42.75482,-77.760606,0,0 -2029,2016,2029,43.58739799999995,-74.86155899999996,45,27870.529114999994 -2030,2013,2030,42.26673200000002,-77.56018999999998,22,8011.154318000001 -2032,2014,2032,42.552930999999965,-75.77114900000002,38,7815.345815 -2033,2017,2033,42.48366,-73.383311,0,0 -2034,2018,2034,42.82307,-74.220851,0,0 -2035,2017,2035,44.603313,-75.244993,0,0 -2036,2018,2036,42.79496199999997,-75.741483,27,29644.302738000002 -2037,2016,2037,44.785636999999966,-74.74884899999999,65,14125.252125 -2038,2019,2038,42.152475000000024,-78.86816700000001,56,0 -2039,2016,2039,42.613563,-73.34405599999998,21,0 -2041,2019,2041,44.204968,-74.17140400000004,48,26670.669618 -2043,2018,2043,43.494999,-75.796574,0,0 -2046,2015,2046,40.555725,-74.20043,0,0 -2048,2016,2048,43.86968299999995,-74.32696399999998,45,17019.564553000004 -2049,2014,2049,40.762337,-73.715741,0,0 -2051,2017,2051,44.927647999999984,-74.657108,16,2112.837443 -2052,2016,2052,44.645772,-75.259813,0,0 -2053,2019,2053,43.76539000000002,-75.030162,67,3703.1754699999997 -2054,2015,2054,41.643429,-73.622399,0,0 -2055,2016,2055,43.193557,-78.16825,0,0 -2056,2016,2056,43.003194,-75.596481,0,0 -2058,2017,2058,44.247386999999975,-73.795597,33,12279.55211 -2059,2015,2059,42.670367,-76.965119,0,0 -2061,2019,2061,43.080993,-75.820275,4,1011.1059290000001 -2062,2017,2062,44.32715899999997,-74.66752600000008,66,2548.64753 -2063,2019,2063,41.367544,-73.69777599999999,12,19035.555973 -2066,2014,2066,40.698351,-73.602356,0,0 -2067,2015,2067,43.07908699999999,-73.44705300000004,22,2693.731976 -2068,2018,2068,44.903808999999974,-75.04979300000001,38,13075.272274 -2069,2019,2069,42.391423000000025,-77.88662100000008,47,27672.976932999998 -2070,2013,2070,42.500823000000004,-77.21840999999996,31,9583.659663 -2071,2017,2071,42.717936999999964,-78.28367900000006,35,24341.265092 -2072,2017,2072,43.511526,-76.122503,0,0 -2073,2016,2073,42.880607,-77.387637,0,0 -2074,2016,2074,40.851061,-72.89787900000005,31,15190.107989000002 -2075,2013,2075,41.471469,-74.017357,0,0 -2076,2016,2076,42.578052999999976,-73.51842400000002,33,4576.231415 -2077,2013,2077,43.187013,-78.977141,0,0 -2080,2018,2080,44.52925899999997,-74.11653299999993,51,12914.219430000003 -2081,2019,2081,43.124323,-77.235072,0,0 -2082,2013,2082,42.924226000000004,-78.05132699999997,30,0 -2083,2014,2083,42.356337,-75.714521,0,0 -2086,2018,2086,44.051324999999984,-75.583018,33,11893.599547 -2087,2017,2087,43.170639,-76.242586,0,0 -2088,2015,2088,42.60629600000001,-75.04129400000004,27,16810.732941 -2089,2014,2089,44.31099500000002,-73.47552300000002,24,19853.848788 -2090,2014,2090,43.346248999999986,-75.8002889999999,54,30401.551728 -2091,2018,2091,42.161966,-78.727013,0,0 -2092,2013,2092,42.12306999999999,-76.429287,17,14291.977303 -2093,2015,2093,43.050959,-73.487142,1,23.522217 -2096,2018,2096,41.92694699999998,-74.13816499999997,39,0 -2097,2018,2097,44.705109999999955,-73.85553299999994,106,12671.418437999997 -2098,2016,2098,43.93360200000002,-74.84049899999994,55,21992.044158999997 -2101,2017,2101,42.52477100000003,-77.822725,40,30702.988241999996 -2102,2015,2102,44.695239,-74.684105,0,0 -2103,2014,2103,43.657913,-73.93971000000008,48,21466.892497 -2104,2013,2104,43.88459600000002,-75.25501500000007,54,12879.028034 -2105,2018,2105,44.05611799999998,-73.75519299999998,92,17015.842978 -2106,2016,2106,43.226130999999945,-76.50156599999994,47,53567.462826 -2107,2018,2107,42.506242,-76.266974,0,0 -2108,2018,2108,43.363004999999994,-78.356492,8,0 -2109,2014,2109,41.72067,-73.950628,0,0 -2110,2014,2110,41.57037,-74.93251,0,0 -2111,2019,2111,44.63994300000002,-74.45988300000009,68,22420.329962999996 -2112,2018,2112,42.99274,-73.358307,0,0 -2113,2013,2113,41.03017,-71.981756,41,18248.851480999998 -2114,2013,2114,43.094407,-73.989087,0,0 -2115,2019,2115,41.66251699999999,-74.78429800000002,44,31832.854058 -2116,2016,2116,41.809237000000024,-73.70931799999995,24,31565.407666 -2117,2016,2117,42.727913,-74.542374,0,0 -2118,2019,2118,44.628591,-73.678973,0,0 -2119,2017,2119,43.170846,-76.716719,0,0 -2124,2016,2124,42.665446,-73.685628,0,0 -2125,2015,2125,44.584227,-75.287255,0,0 -2126,2018,2126,42.858597,-78.836312,0,0 -2127,2019,2127,43.255704,-76.469177,2,407.067589 -2128,2016,2128,42.23963,-77.516871,0,0 -2130,2014,2130,43.491354,-74.60789900000005,37,15181.655817000003 -2131,2013,2131,43.04917400000004,-74.39813000000001,58,30303.564318999997 -2133,2017,2133,43.35831499999996,-74.258425,39,0 -2134,2013,2134,42.05788,-75.426275,0,0 -2135,2019,2135,44.253977,-75.430149,0,0 -2136,2014,2136,44.17338199999996,-74.70183300000005,56,19667.416486000002 -2138,2016,2138,44.85681399999999,-73.401122,23,9340.265451000001 -2139,2018,2139,42.377266,-77.171777,17,13788.185662 -2140,2014,2140,44.07508299999998,-74.41471700000004,24,6223.408503 -2141,2019,2141,42.169255,-75.53792500000006,53,0 -2143,2015,2143,41.492032,-73.78149000000003,26,0 -2145,2019,2145,43.225102,-77.902776,0,0 -2147,2013,2147,43.052438,-76.371335,0,0 -2148,2016,2148,42.62962199999998,-77.434252,34,34766.541447 -2149,2015,2149,42.990221999999996,-74.88713099999997,27,17302.694263999998 -2151,2014,2151,43.54598000000002,-74.22279399999998,32,24177.768999999997 -2152,2013,2152,43.51475200000003,-73.33754600000006,60,21156.028769 -2153,2016,2153,43.39912800000001,-75.51210300000001,41,35416.577807 -2155,2019,2155,42.044426,-76.464726,0,0 -2156,2014,2156,44.64878599999998,-74.900528,39,28027.910087999997 -2158,2013,2158,41.062060999999986,-73.743787,14,28274.384052999998 -2159,2015,2159,44.78219199999997,-74.12990399999994,37,23903.397605 -2160,2013,2160,44.607447999999984,-74.78799699999995,33,24867.907948 -2161,2016,2161,44.610212000000026,-75.06738200000001,34,6991.269254 -2162,2013,2162,44.645744000000015,-75.15218800000001,23,5823.282343 -2165,2015,2165,42.565847,-77.652182,0,0 -2166,2019,2166,42.27668500000002,-74.46399000000004,37,18982.096137 -2167,2016,2167,42.89432299999999,-74.623814,9,9741.620824 -2168,2017,2168,43.323382,-73.731168,0,0 -2169,2016,2169,42.410765,-79.328378,0,0 -2170,2018,2170,42.693885,-73.798403,0,0 -2171,2013,2171,42.28899700000003,-74.54233500000005,35,22249.250671 -2172,2016,2172,41.49596599999998,-74.37992699999997,26,27087.350486 -2173,2014,2173,43.543836999999996,-74.37703200000001,35,18392.511307000004 -2176,2014,2176,43.28847799999998,-78.69962400000001,21,8587.904094 -2178,2019,2178,43.790127000000005,-74.07444900000007,50,25998.274676 -2179,2014,2179,43.111608,-73.758132,0,0 -2180,2018,2180,44.121004,-75.714738,0,0 -2181,2015,2181,44.65388900000001,-75.42297699999997,13,960.0082040000001 -2182,2015,2182,42.236924,-74.553886,0,0 -2183,2019,2183,42.130024,-75.758971,0,0 -2184,2017,2184,42.028796,-75.677984,0,0 -2185,2019,2185,42.831066,-77.916819,0,0 -2186,2018,2186,42.39354400000001,-74.56643499999998,29,28712.996731 -2187,2017,2187,40.673819,-73.574284,0,0 -2188,2019,2188,40.836362,-73.369531,0,0 -2190,2014,2190,43.48998200000003,-75.84752299999995,30,0 -2191,2014,2191,42.712789,-76.879083,0,0 -2192,2018,2192,44.908928000000046,-73.51426099999993,44,28153.771892 -2194,2014,2194,42.576063,-74.829971,0,0 -2195,2014,2195,43.40223599999999,-73.93099800000002,30,12422.734584 -2197,2018,2197,44.845638,-74.84587699999999,17,9336.577368 -2198,2015,2198,41.841838000000024,-74.28511499999996,25,10931.932384 -2199,2016,2199,42.66571,-78.809832,0,0 -2200,2016,2200,44.64630499999999,-74.30997900000007,43,25268.381283000006 -2201,2013,2201,44.500792,-73.375892,0,0 -2202,2013,2202,43.63052,-75.81977700000006,53,28825.648645999998 -2203,2019,2203,41.528786,-74.904838,0,0 -2205,2017,2205,43.518585,-75.414738,0,0 -2206,2019,2206,44.23017000000002,-73.76010799999996,38,26556.219727 -2207,2015,2207,44.197432,-75.666659,0,0 -2208,2016,2208,43.72993799999997,-74.25110799999997,28,26332.425693999998 -2209,2018,2209,40.774778,-72.916894,0,0 -2210,2015,2210,44.163818,-75.97135099999997,23,164.435652 -2213,2013,2213,42.32979799999999,-77.0219699999999,50,22738.339015 -2214,2019,2214,42.728697000000004,-77.46702400000001,11,1072.132836 -2215,2017,2215,42.562571,-78.472968,0,0 -2216,2014,2216,42.22532999999999,-78.77937400000005,40,31243.811453000002 -2218,2013,2218,42.332828,-78.758022,6,2152.1374069999997 -2220,2019,2220,43.012385,-74.14015500000004,42,33384.591193 -2221,2016,2221,42.904156000000015,-76.47310999999999,15,6896.948377000001 -2223,2018,2223,41.766479,-74.19716300000003,24,0 -2224,2019,2224,42.862993,-74.892667,0,0 -2225,2016,2225,42.592262,-75.539306,0,0 -2226,2015,2226,44.510631999999994,-75.544257,25,17166.672897 -2227,2013,2227,43.015037,-75.86221100000002,19,6246.71053 -2229,2017,2229,43.139873,-78.56336,0,0 -2230,2016,2230,42.42215,-77.924442,0,0 -2231,2015,2231,43.055973,-78.491112,0,0 -2233,2019,2233,43.020963,-76.930884,0,0 -2234,2018,2234,41.86218199999999,-73.63293100000003,20,0 -2235,2014,2235,42.102865000000016,-76.922724,43,28353.914659000002 -2236,2017,2236,41.454973,-73.89708100000001,21,29723.745671 -2237,2015,2237,42.420305,-78.990973,2,67.929122 -2238,2013,2238,42.051764999999996,-79.29526599999998,11,2927.3067250000004 -2239,2013,2239,44.07029400000006,-75.56720200000008,67,16165.125043 -2240,2018,2240,43.026937,-78.196222,0,0 -2241,2019,2241,41.120378,-74.095761,0,0 -2245,2018,2245,44.42195900000001,-73.65577200000003,34,13906.893256999998 -2246,2016,2246,42.388147,-78.837546,0,0 -2247,2018,2247,43.49551500000002,-73.72634699999993,44,0 -2248,2016,2248,41.353693000000014,-73.832137,38,33659.104183 -2249,2013,2249,42.81557999999997,-77.44005800000002,41,42816.588598 -2251,2015,2251,43.65524699999999,-75.97301900000001,35,17428.176871999996 -2254,2019,2254,42.31627200000001,-75.92121599999999,20,18919.815054000002 -2255,2018,2255,43.088213,-75.30171,1,70.18809 -2256,2018,2256,42.16316400000004,-76.23865000000008,40,30540.558319 -2257,2017,2257,44.909203999999995,-74.136567,10,31954.223514999998 -2258,2013,2258,43.40271299999996,-73.35332000000002,43,0 -2262,2019,2262,42.258108,-76.191815,8,2783.889824 -2263,2018,2263,44.471014999999994,-74.95227999999999,21,12588.570140999998 -2264,2019,2264,42.55922299999998,-74.17486499999997,50,20856.316540000003 -2265,2013,2265,42.30809100000002,-78.26599200000005,33,8439.284512 -2266,2013,2266,42.47688,-78.494436,0,0 -2267,2016,2267,43.215398,-76.585757,0,0 -2268,2015,2268,43.68161199999996,-74.04913699999999,81,19416.349536 -2269,2016,2269,42.218971,-79.090144,0,0 -2271,2015,2271,43.420069000000005,-74.271839,8,836.722985 -2272,2017,2272,42.306245,-77.776999,0,0 -2273,2015,2273,44.622013000000045,-74.73368899999994,79,18967.649159000004 -2274,2017,2274,42.32176899999997,-77.06352800000002,42,33549.256729 -2275,2019,2275,43.32675,-76.608057,0,0 -2277,2014,2277,43.378338,-73.601562,0,0 -2278,2018,2278,41.254339,-74.394178,0,0 -2280,2017,2280,42.093417,-78.085586,0,0 -2281,2013,2281,42.541122,-78.115154,0,0 -2284,2016,2284,42.557988,-77.500141,0,0 -2285,2014,2285,44.06616900000001,-74.02351400000003,61,22068.333886000004 -2287,2015,2287,42.403503999999955,-78.63061500000002,40,23818.356207999997 -2288,2017,2288,44.627328,-74.472791,0,0 -2289,2016,2289,44.29102199999995,-74.35033400000007,45,29676.123152999997 -2290,2014,2290,43.164481000000016,-76.82990100000004,16,19292.849788 -2291,2019,2291,44.062592000000016,-74.83412099999997,51,16981.443175 -2293,2013,2293,44.679552000000044,-73.61636,61,19834.479033000003 -2294,2013,2294,42.92126800000004,-75.74788600000007,44,11338.056416999996 -2295,2013,2295,43.967869000000015,-75.23221599999992,43,9074.742972000002 -2296,2019,2296,43.667248,-75.831974,0,0 -2297,2013,2297,43.101086,-75.978367,0,0 -2298,2015,2298,42.355362,-78.299717,0,0 -2300,2013,2300,44.960067,-73.429916,0,0 -2302,2016,2302,44.891675,-74.683665,16,2147.9328779999996 -2303,2013,2303,42.193561000000024,-73.52975,36,28077.556729 -2304,2014,2304,42.52403499999999,-75.26707899999998,7,0 -2305,2019,2305,43.560983,-75.894876,0,0 -2307,2017,2307,42.13871399999998,-74.15958099999993,47,38581.427377 -2309,2018,2309,41.926638,-73.556857,0,0 -2310,2016,2310,44.62143999999994,-73.99030399999995,57,19929.076797 -2311,2015,2311,43.361298999999974,-75.73174900000002,42,39061.7646 -2312,2015,2312,43.14641699999997,-75.17617599999996,50,11391.426934 -2313,2015,2313,44.32094700000001,-75.31539800000006,58,18867.720871 -2315,2016,2315,42.226451,-74.86491299999994,31,23841.528044 -2316,2019,2316,42.207456000000036,-73.66393999999994,43,0 -2317,2017,2317,44.06695199999996,-75.06867000000007,49,32224.63664 -2318,2013,2318,44.277271,-75.20417699999999,13,3119.919306 -2319,2015,2319,42.477495,-76.295205,4,326.836132 -2320,2019,2320,44.914638,-73.836791,0,0 -2321,2017,2321,43.214648,-77.437534,0,0 -2322,2019,2322,44.435192,-75.699195,0,0 -2324,2018,2324,43.32351100000004,-74.65265799999999,47,30240.018975999996 -2325,2014,2325,42.842187,-75.385975,0,0 -2326,2015,2326,43.95958500000002,-75.36588799999993,50,19096.546627999996 -2327,2013,2327,42.106102,-76.081304,0,0 -2328,2015,2328,43.33928000000002,-78.288233,19,3816.077648 -2330,2015,2330,42.244338,-79.635472,0,0 -2331,2015,2331,42.987812,-77.97103,0,0 -2332,2019,2332,43.148083,-78.127645,0,0 -2333,2019,2333,41.683583,-74.78730300000002,26,30952.705471 -2334,2014,2334,42.800153,-77.905839,0,0 -2335,2015,2335,40.797342,-72.769499,0,0 -2336,2016,2336,43.84607400000002,-75.20237399999998,30,15262.864674 -2337,2014,2337,43.32492099999996,-75.450202,46,19566.951686 -2339,2016,2339,42.68613,-78.33095,0,0 -2340,2013,2340,42.122138,-77.650473,0,0 -2341,2017,2341,41.650354,-74.134071,0,0 -2342,2013,2342,42.04328200000002,-78.76316799999996,35,48580.247974 -2343,2018,2343,44.96129599999997,-74.843656,31,18602.882475 -2345,2016,2345,43.258657,-78.871437,3,0 -2346,2017,2346,44.81917599999998,-74.15874399999996,59,30334.202726999996 -2348,2013,2348,43.506622000000036,-73.52752899999997,34,24442.055942 -2349,2019,2349,42.800137,-73.668745,5,0 -2350,2019,2350,42.554664000000024,-74.24740299999995,53,17310.482389999997 -2352,2014,2352,42.743537999999965,-78.32616999999996,37,21798.041606 -2353,2019,2353,43.788796000000026,-75.91285299999997,36,31488.338703999998 -2354,2013,2354,42.682545,-77.862356,4,1911.6693139999998 -2355,2017,2355,41.967327000000026,-74.26608399999996,48,39706.693999999996 -2356,2018,2356,42.971447,-75.683038,0,0 -2358,2019,2358,43.274224,-78.083911,0,0 -2360,2015,2360,43.215809,-74.973719,0,0 -2361,2016,2361,44.409083,-74.85059,56,12565.345939 -2362,2018,2362,42.999657,-78.730363,0,0 -2363,2013,2363,41.703817999999984,-74.021755,27,14505.820736 -2364,2016,2364,42.466243,-76.636005,1,1436.471275 -2365,2015,2365,42.20727699999999,-74.69729699999999,19,16920.987701 -2366,2013,2366,44.711756,-74.90385299999991,55,29340.237636000005 -2367,2013,2367,42.377343000000046,-74.64244400000004,44,35997.770455 -2368,2019,2368,42.716173999999995,-73.54671400000004,57,37936.180909 -2369,2013,2369,42.81014200000002,-73.45731400000003,33,21212.46248 -2370,2016,2370,43.73727499999997,-73.68476700000004,64,34116.473461 -2372,2013,2372,41.93054099999996,-74.95205699999998,48,16573.321127000003 -2373,2015,2373,41.859218,-73.534962,0,0 -2374,2015,2374,43.47742399999998,-74.04954300000006,40,30629.316493 -2378,2015,2378,44.34807899999997,-74.18655000000007,84,24069.431790000006 -2380,2016,2380,43.123239000000005,-77.28736400000001,12,1906.2228719999998 -2381,2018,2381,42.569004,-76.934421,5,385.483399 -2383,2016,2383,44.51845000000002,-73.83358499999997,46,16642.711707 -2384,2017,2384,44.08659400000002,-75.40881499999999,30,0 -2385,2013,2385,43.19735199999998,-73.36365500000007,37,26578.220007 -2386,2018,2386,41.389978,-73.770561,0,0 -2387,2019,2387,44.17741899999999,-75.290542,9,2445.118922 -2388,2016,2388,42.18429700000001,-74.18112599999996,37,45248.233776999994 -2389,2019,2389,44.71203300000001,-73.98187599999996,44,7552.305906 -2390,2015,2390,43.028098,-78.965138,3,642.086528 -2391,2013,2391,43.095272000000016,-76.84332999999998,21,24109.947444999998 -2392,2015,2392,44.60375599999998,-73.818072000000015,41,10648.853620999998 -2393,2014,2393,42.005816,-76.464898,0,0 -2394,2017,2394,42.10380099999998,-74.900842,36,18389.239296 -2395,2018,2395,44.61724899999994,-75.21969900000013,73,17568.917267999997 -2396,2019,2396,43.993208999999986,-73.81904200000005,40,23167.731872000004 -2397,2015,2397,43.27491600000003,-76.55031700000006,47,31950.961435999998 -2398,2013,2398,42.5581,-74.797333,0,0 -2399,2019,2399,42.145187999999976,-77.33600500000009,75,28737.239697999994 -2400,2016,2400,42.037551999999955,-74.13845299999997,61,22258.71968099999 -2402,2014,2402,42.654485,-75.875941,0,0 -2403,2017,2403,44.45833100000001,-73.46909499999995,52,16318.752280999997 -2404,2015,2404,44.65018899999997,-74.57859700000004,59,0 -2405,2018,2405,44.200574,-75.469433,4,0 -2406,2014,2406,42.19666099999998,-74.01273799999996,24,17041.296791 -2407,2015,2407,43.806835,-74.013669,0,0 -2408,2014,2408,43.045753,-76.173203,0,0 -2409,2018,2409,42.579556,-75.971799,0,0 -2410,2018,2410,41.224348,-74.046991,0,0 -2412,2015,2412,42.880362999999996,-76.96202099999995,41,34105.020047 -2413,2014,2413,43.35613600000004,-74.6907500000001,50,40461.315721 -2414,2018,2414,43.356628,-78.449833,35,32819.797631999994 -2415,2015,2415,42.623942,-76.214693,8,4675.926628 -2416,2017,2416,44.338234,-73.77473,0,0 -2417,2013,2417,44.327482,-75.920109,0,0 -2418,2013,2418,42.390449,-76.97080699999992,86,18559.238742 -2421,2015,2421,42.058324,-77.825036,0,0 -2424,2016,2424,42.278162999999964,-77.67538199999993,54,27886.395772 -2425,2016,2425,42.310431000000015,-79.52574599999998,30,30407.846167000003 -2426,2014,2426,43.85677299999998,-74.5940569999999,57,25798.308861999998 -2427,2019,2427,42.21931900000001,-75.31143500000005,52,10934.021073000004 -2428,2018,2428,41.870797,-74.34436,9,4812.786546 -2429,2017,2429,42.71778199999997,-75.04261199999998,26,22853.133575 -2431,2016,2431,42.592514,-77.108535,0,0 -2432,2017,2432,43.117081,-76.922389,0,0 -2433,2016,2433,44.784859,-73.513148,0,0 -2434,2014,2434,42.739044,-75.64837600000001,15,11846.887826999999 -2436,2015,2436,43.80323899999999,-75.45534599999998,24,0 -2437,2017,2437,44.83955500000001,-73.59795600000004,42,18983.371624 -2438,2018,2438,44.71903999999997,-74.38037299999998,37,0 -2439,2013,2439,44.853262,-74.03877899999999,6,191.20757400000002 -2440,2014,2440,42.827757,-78.263209,0,0 -2441,2015,2441,43.885832,-75.559461,0,0 -2442,2018,2442,42.089120000000044,-75.25410800000006,53,2881.6714300000003 -2443,2017,2443,41.62568099999999,-73.83125399999997,15,28646.15514 -2445,2016,2445,43.29440599999999,-75.04032199999995,54,7551.694438 -2448,2019,2448,42.08390499999997,-75.0729929999999,48,33058.411511000006 -2449,2016,2449,42.119885,-79.681176,0,0 -2450,2019,2450,42.74304400000002,-75.709963,20,4146.598371 -2451,2017,2451,42.55936299999999,-75.52457099999998,20,4721.176629 -2453,2015,2453,42.191366,-73.490379,0,0 -2455,2018,2455,43.921145999999965,-74.17335199999994,42,12460.171235 -2456,2013,2456,43.28227199999996,-74.41480899999998,57,29224.948130000004 -2457,2016,2457,44.132812999999985,-75.24357899999997,54,30054.788277999996 -2459,2013,2459,43.468982,-75.944076,0,0 -2460,2013,2460,42.000477999999994,-75.59962700000001,12,0 -2461,2017,2461,42.897471,-77.242441,7,0 -2462,2017,2462,43.89545799999998,-74.96865900000009,54,25573.414260999998 -2464,2017,2464,43.20781800000002,-73.45430600000005,34,7586.301874999999 -2466,2015,2466,42.886054000000044,-75.74616400000006,73,28106.602089 -2467,2013,2467,43.575944,-74.22246300000005,31,32511.574829999998 -2468,2016,2468,42.00919,-75.171197,0,0 -2470,2015,2470,43.003243000000005,-77.45994700000004,26,15712.454564000001 -2472,2018,2472,43.53110199999996,-75.60370299999988,54,28444.852933 -2473,2018,2473,44.58101699999998,-73.56762699999993,51,15834.604508999999 -2474,2019,2474,43.011644,-76.449827,0,0 -2477,2017,2477,43.43343899999995,-76.22520900000009,63,32365.65718499999 -2479,2017,2479,43.063986,-74.2559,0,0 -2481,2018,2481,42.646396,-73.958586,0,0 -2483,2014,2483,44.705879999999986,-75.43495600000004,29,9180.453306 -2485,2016,2485,43.24413599999999,-74.20762699999989,54,35226.070273000005 -2486,2019,2486,43.41129999999999,-74.15021299999992,46,39994.313514999994 -2487,2015,2487,43.22838700000003,-77.59426100000003,35,28002.589386 -2488,2015,2488,43.352201,-76.438234,0,0 -2489,2019,2489,42.44432500000002,-77.48700299999997,24,0 -2492,2019,2492,42.364105,-75.137692,0,0 -2493,2015,2493,42.995732,-74.369811,0,0 -2494,2019,2494,41.92111499999999,-74.77876399999998,21,13900.470399999998 -2496,2016,2496,42.725541,-76.517644,0,0 -2497,2014,2497,42.81541,-78.806735,0,0 -2499,2015,2499,42.733643000000015,-75.44523400000001,18,3005.954224 -2501,2017,2501,43.04525,-76.104027,0,0 -2502,2019,2502,44.607955,-75.544088,6,461.150323 -2503,2016,2503,44.49608400000005,-74.81167199999993,49,3253.1883550000002 -2507,2018,2507,43.53993,-75.199542,0,0 -2508,2014,2508,42.06677200000001,-78.718026,22,27013.305843000002 -2509,2019,2509,41.640616,-73.91651,0,0 -2510,2017,2510,42.361112999999975,-77.58519900000007,50,29571.920525999998 -2512,2015,2512,44.694854,-75.259689,0,0 -2514,2017,2514,44.48767599999999,-74.97982700000007,50,14112.803747 -2515,2018,2515,44.653725,-73.674238,0,0 -2517,2016,2517,43.794781000000036,-73.99582099999999,37,10701.271368 -2518,2016,2518,42.03966300000001,-76.00286599999988,71,32920.056887 -2519,2016,2519,42.73264,-77.374589,11,6617.4899319999995 -2520,2018,2520,42.356737999999964,-78.11047300000004,59,12881.492382999999 -2522,2015,2522,43.018613,-76.542169,4,0 -2523,2018,2523,41.899164,-74.636902,0,0 -2524,2015,2524,42.952465,-77.607398,0,0 -2526,2019,2526,43.11365700000003,-74.74379600000003,35,0 -2527,2018,2527,42.66176700000003,-76.32605999999996,51,15605.184059999996 -2529,2019,2529,40.964275,-73.838908,0,0 -2531,2013,2531,44.678738,-75.494639,4,585.9614479999999 -2533,2015,2533,43.25357400000002,-77.40828199999997,43,37028.671879 -2534,2013,2534,42.907105,-73.424747,0,0 -2535,2013,2535,43.07642999999999,-74.71419999999999,19,3733.5063910000003 -2537,2013,2537,43.204489,-77.539684,0,0 -2538,2015,2538,42.259406,-78.244325,0,0 -2539,2017,2539,44.27658100000003,-75.26654899999996,43,0 -2540,2019,2540,43.41137100000002,-76.38152199999999,17,21641.264313000003 -2544,2019,2544,43.73475699999996,-74.95865300000003,42,35990.121849 -2546,2017,2546,42.362532,-75.286647,0,0 -2547,2014,2547,44.519478000000014,-75.179332,17,4571.148747 -2548,2017,2548,41.503574,-74.95121799999998,29,31902.025960000003 -2549,2013,2549,42.956028,-75.15471,0,0 -2550,2013,2550,42.69760200000001,-78.37353299999998,10,8401.953615 -2552,2015,2552,44.126412,-75.594421,28,13180.187983 -2553,2018,2553,42.069966000000015,-78.58683000000008,51,0 -2555,2018,2555,44.232412,-75.693768,0,0 -2556,2016,2556,42.196374999999996,-75.014258,33,15429.35596 -2558,2018,2558,42.73285699999998,-75.954649,75,13234.967156000002 -2559,2019,2559,42.365231,-78.245615,0,0 -2560,2014,2560,40.879296,-73.114813,0,0 -2561,2017,2561,42.240079000000044,-75.88122699999991,83,16399.066352 -2562,2016,2562,43.355788000000025,-73.73218799999997,44,32903.345261999995 -2563,2016,2563,41.84597299999999,-74.93502399999996,27,20473.425134 -2564,2014,2564,42.108572999999986,-79.02501100000002,36,0 -2565,2013,2565,43.936298,-73.956105,27,1269.9119630000005 -2567,2016,2567,41.74761199999998,-74.48002999999997,29,256.77241200000003 -2568,2018,2568,43.44587299999996,-74.72569599999999,35,38584.801574 -2569,2015,2569,44.18807100000002,-75.34265800000001,44,25733.341858 -2570,2019,2570,42.387921,-73.461059,0,0 -2571,2016,2571,42.79860699999997,-75.38366399999997,34,25896.588093999995 -2573,2018,2573,44.63192700000001,-75.017242,13,4336.715933 -2576,2015,2576,43.33856500000001,-73.59156699999996,40,30937.178943 -2577,2013,2577,41.604426000000004,-73.91162499999997,27,19266.890211 -2578,2018,2578,41.94260499999998,-73.96946299999995,28,5258.371566999999 -2580,2017,2580,40.59518,-73.775379,0,0 -2583,2019,2583,42.907335999999994,-75.06894899999999,13,974.682604 -2584,2019,2584,42.323059000000036,-77.932092,31,4850.974520000001 -2585,2019,2585,42.770969,-74.239217,0,0 -2586,2014,2586,44.81571600000004,-73.84738300000005,90,16220.557560000001 -2587,2013,2587,44.00134899999996,-74.00596799999995,34,27994.246690999997 -2588,2014,2588,42.897065,-78.390594,0,0 -2590,2016,2590,43.129185999999976,-74.45193999999996,30,5499.767318 -2591,2014,2591,42.097365000000025,-74.44728500000005,42,7803.610179 -2592,2016,2592,42.695188,-77.081315,0,0 -2593,2019,2593,44.942205999999985,-73.46628599999995,51,10037.305152 -2594,2018,2594,42.291967,-78.63025100000003,24,12043.694948999999 -2595,2015,2595,43.085237,-76.67770699999997,38,19904.797752000002 -2597,2013,2597,42.016233,-77.185650999999993,0,0 -2600,2014,2600,42.383252000000006,-76.25987699999997,31,0 -2604,2018,2604,42.574213,-76.543256,0,0 -2605,2013,2605,41.324846,-74.256122,7,9736.845677 -2607,2018,2607,44.05591999999995,-75.01552699999995,63,15333.515166999998 -2609,2019,2609,43.565763,-75.376761,0,0 -2610,2018,2610,43.665086999999986,-75.66745399999999,63,19016.936586 -2611,2018,2611,42.48277099999997,-77.81123099999994,75,31180.229979 -2612,2018,2612,42.62478700000005,-75.4008460000001,72,32595.091709 -2614,2018,2614,42.531522,-74.04204100000003,40,11749.846006 -2615,2014,2615,42.540028,-75.38434,0,0 -2616,2016,2616,41.66263499999999,-73.802394,32,26856.94032 -2617,2018,2617,42.233846999999976,-77.29871099999994,41,24692.977118000003 -2619,2013,2619,42.585653999999984,-78.15001100000005,56,20312.225593000003 -2620,2019,2620,42.565925,-76.707242,0,0 -2621,2013,2621,43.234835,-75.238844,0,0 -2623,2014,2623,41.92071699999999,-74.885723,38,18262.407265 -2624,2019,2624,43.048176,-77.099276,0,0 -2625,2019,2625,41.017052,-72.511049,0,0 -2627,2016,2627,43.32631,-78.229565,0,0 -2628,2013,2628,44.59202399999999,-73.63484300000002,12,1677.130376 -2629,2018,2629,42.95178000000004,-76.50741100000003,44,0 -2631,2017,2631,42.266581,-78.340077,0,0 -2633,2018,2633,42.591422,-74.916936,0,0 -2634,2018,2634,42.187568,-78.924554,8,0 -2636,2016,2636,44.73323699999995,-75.41607100000003,62,25430.456030999998 -2637,2014,2637,41.77360799999999,-74.648617,35,16370.373909999998 -2638,2018,2638,41.28206500000003,-74.15938599999997,37,20774.857084 -2639,2018,2639,42.296477000000024,-77.452363,36,18172.553308 -2640,2014,2640,42.11856400000004,-75.17519300000002,43,34460.719143 -2641,2016,2641,43.736961000000015,-75.30129900000006,40,31931.258582000002 -2642,2014,2642,42.68998299999999,-73.47846299999995,48,0 -2644,2016,2644,42.025171,-79.215934,0,0 -2645,2019,2645,42.131679,-73.935399,0,0 -2646,2019,2646,43.12679699999994,-74.34044799999995,63,0 -2647,2013,2647,41.691810999999994,-73.71778099999999,7,8019.171589 -2648,2018,2648,42.379183,-77.541775,0,0 -2650,2016,2650,41.42676400000001,-74.67200899999999,19,14646.168374999997 -2651,2016,2651,42.581905,-77.551461,37,28175.068429999996 -2652,2015,2652,44.44033399999996,-75.24097600000003,39,6746.916723 -2653,2016,2653,43.027593,-78.809256,0,0 -2654,2017,2654,44.13197300000001,-76.13069599999997,18,0 -2656,2018,2656,43.640619999999956,-74.81675200000005,46,28544.023302000005 -2657,2013,2657,44.761413,-73.544918,0,0 -2658,2019,2658,44.55558,-75.160657,0,0 -2659,2017,2659,44.88534900000002,-73.76721400000007,69,12057.981553000001 -2660,2017,2660,42.929032,-77.965518,0,0 -2661,2018,2661,42.256266999999994,-73.602851,11,7887.1554559999995 -2662,2014,2662,42.122576,-78.143786,0,0 -2663,2017,2663,42.23013599999999,-76.595828,33,14164.902301999999 -2664,2013,2664,44.143419,-74.57141900000003,33,12101.742976 -2666,2014,2666,43.065479,-77.414565,0,0 -2667,2019,2667,42.800572000000045,-77.62509899999996,43,13002.536782000001 -2670,2019,2670,42.525021,-76.93072,0,0 -2671,2019,2671,42.752276,-77.811693,0,0 -2673,2013,2673,42.03906899999998,-73.60167600000007,39,9945.832627 -2675,2019,2675,42.172104,-75.17768,25,20923.059219 -2678,2014,2678,42.38865499999998,-78.88400600000003,33,13916.644138 -2680,2013,2680,43.095774,-78.824441,0,0 -2681,2017,2681,42.151624,-76.814472,0,0 -2682,2018,2682,42.858769,-73.612943,4,692.8597940000001 -2683,2016,2683,41.597269,-74.174075,0,0 -2685,2013,2685,42.620202,-78.400077,0,0 -2686,2014,2686,42.281427,-73.949245,15,7692.356076 -2687,2016,2687,44.41508400000005,-73.70729500000007,68,19560.747665000003 -2688,2014,2688,42.123633,-76.746149,0,0 -2689,2017,2689,44.677597000000034,-75.22535100000003,37,10924.948537000002 -2690,2019,2690,44.004729,-75.97213,0,0 -2692,2016,2692,43.855436,-75.475548,0,0 -2693,2017,2693,42.40722000000003,-76.69640200000002,34,30721.359647999998 -2694,2013,2694,42.682315,-73.734949,0,0 -2695,2017,2695,42.26142200000001,-78.52234,25,20517.293525 -2696,2015,2696,42.11384500000003,-79.50007499999995,47,25203.173673000005 -2697,2014,2697,42.301953,-77.50883,0,0 -2699,2019,2699,42.14563999999997,-75.67019600000002,47,38702.875567999996 -2700,2018,2700,43.194501,-76.492717,0,0 -2701,2018,2701,43.790813000000036,-73.38485600000001,59,20089.270786 -2702,2019,2702,42.508918,-78.1421529999999,58,33419.69328 -2703,2014,2703,43.84871999999998,-74.51891500000008,51,17450.528366 -2704,2014,2704,44.72729600000002,-74.68273599999996,92,12227.898228999999 -2706,2015,2706,42.88697599999993,-75.67300299999991,66,23679.555131999994 -2710,2013,2710,42.057678,-77.858221,9,5036.355207 -2711,2018,2711,43.378669,-74.971432,0,0 -2712,2015,2712,42.58081,-73.705449,0,0 -2713,2016,2713,43.06138299999999,-77.02599,8,8256.607373 -2715,2014,2715,42.29735800000002,-77.15967499999992,51,29153.847330999994 -2716,2013,2716,44.713656,-75.117241,0,0 -2717,2016,2717,43.29216100000002,-77.94604399999997,35,29783.641410000004 -2719,2018,2719,41.480033,-74.420272,0,0 -2720,2018,2720,43.66199800000002,-76.06009300000004,49,0 -2721,2016,2721,44.10909,-75.425928,0,0 -2722,2015,2722,42.489748,-77.067002,0,0 -2723,2014,2723,42.925295,-73.531792,0,0 -2724,2019,2724,42.984328,-78.938671,0,0 -2725,2015,2725,42.120725,-78.805105,16,4312.020829 -2726,2017,2726,42.49165700000001,-75.72589999999995,29,8162.019662 -2727,2017,2727,42.46767100000001,-78.22045499999996,27,43131.698185999994 -2733,2017,2733,41.83288700000002,-74.56025700000004,22,17073.655734 -2734,2016,2734,41.462629,-73.71259099999999,13,24601.602985999998 -2735,2014,2735,41.681825,-74.37016199999997,20,12777.834403 -2736,2017,2736,42.519812,-77.48237000000003,39,13064.061644 -2738,2014,2738,43.98064499999999,-75.43140700000001,17,4268.23783 -2739,2014,2739,43.00636,-77.263025,0,0 -2740,2018,2740,44.264636000000024,-75.62977300000006,29,6187.823958 -2742,2013,2742,43.404575999999956,-73.99314199999993,47,26906.490822000003 -2743,2013,2743,42.2859,-73.633287,0,0 -2745,2017,2745,42.20648400000002,-75.215692,23,5878.293989000001 -2746,2019,2746,42.261102,-76.158376,0,0 -2748,2017,2748,44.97667499999994,-74.35108799999993,51,11445.475847999998 -2751,2019,2751,42.030466,-75.60123,0,0 -2753,2018,2753,41.78650800000001,-73.633247,25,24515.444383 -2756,2015,2756,43.79032899999998,-74.53454999999998,39,18921.081552999996 -2757,2019,2757,44.942701,-74.68526,0,0 -2758,2016,2758,43.259423,-75.469368,0,0 -2759,2016,2759,43.06559900000001,-75.38099000000004,25,16365.84136 -2761,2013,2761,42.175228000000004,-74.06576099999998,37,38955.469746 -2762,2019,2762,42.403098,-75.573614,14,5948.908039 -2763,2014,2763,42.16697400000002,-75.48655599999994,56,31441.060069 -2764,2013,2764,44.659798000000045,-74.21237199999997,81,22507.324892 -2765,2017,2765,43.092786,-76.948033,0,0 -2766,2017,2766,43.178920000000005,-74.75683200000002,37,23006.154937 -2767,2019,2767,42.327475000000035,-74.15411799999995,54,27169.212631000006 -2768,2013,2768,42.375446,-75.80387,0,0 -2769,2014,2769,43.24702900000004,-75.88928200000005,38,8694.055534 -2770,2013,2770,42.232701,-78.461926,0,0 -2771,2015,2771,43.574091,-75.450342,2,362.00787 -2773,2018,2773,43.09111400000002,-78.86871100000005,47,22555.268514000003 -2774,2013,2774,43.632647,-73.997329,0,0 -2775,2017,2775,42.829232999999995,-77.29865,8,1463.568471 -2776,2018,2776,43.103038,-77.61627,0,0 -2778,2016,2778,42.617922,-76.94846,0,0 -2779,2013,2779,43.77743400000001,-74.80607899999997,32,14589.131259 -2780,2013,2780,42.963743,-76.230208,0,0 -2781,2014,2781,42.313699,-78.168744,1,374.878115 -2782,2018,2782,40.944527,-72.808035,6,355.8512 -2783,2019,2783,42.48396400000001,-74.94639400000004,35,23754.753356999998 -2784,2019,2784,43.14645500000004,-75.08926500000003,36,14392.171880000002 -2785,2018,2785,43.59235199999997,-74.59962399999996,55,20655.276986 -2786,2017,2786,44.75985099999996,-74.43681699999989,62,24807.994428000005 -2787,2019,2787,40.834082,-73.939812,5,2739.11337 -2788,2013,2788,42.36988599999997,-75.56059400000002,32,0 -2789,2019,2789,42.31772,-74.684653,0,0 -2790,2018,2790,42.26503200000001,-74.58453899999996,45,0 -2791,2014,2791,42.607044,-78.777833,0,0 -2792,2016,2792,43.376580999999995,-74.09772599999998,23,20549.657448 -2793,2013,2793,42.34207700000001,-75.04468200000005,42,30778.95251 -2794,2015,2794,42.23972699999999,-77.16890600000002,31,23670.724129 -2796,2015,2796,42.54673899999998,-75.20357599999994,30,30550.083606999997 -2797,2018,2797,43.964543000000035,-74.00147000000004,44,26473.320671999994 -2798,2019,2798,42.492836,-78.650296,2,821.115317 -2800,2017,2800,41.263029,-73.995271,17,17768.273912 -2801,2016,2801,43.54422499999999,-73.40437399999999,21,18590.039351 -2802,2019,2802,44.13802000000004,-74.07311099999987,118,15640.665872999993 -2803,2015,2803,42.355815,-77.132271,0,0 -2805,2014,2805,43.61308200000001,-74.53009900000005,53,23671.529716000005 -2806,2016,2806,43.23046799999998,-74.71994300000004,36,26496.894529 -2808,2015,2808,42.13671100000002,-74.928389,32,26790.054988 -2809,2016,2809,42.632200000000005,-75.10720399999995,32,20364.043666999998 -2810,2013,2810,42.393743,-74.38774400000005,30,0 -2811,2015,2811,43.074942,-76.827564,0,0 -2812,2015,2812,43.702905,-75.374517,0,0 -2813,2014,2813,41.75131499999999,-75.03564600000001,12,14889.335363 -2815,2014,2815,44.53467900000001,-74.277959,30,19231.642921 -2816,2018,2816,44.50867599999996,-73.77529099999995,64,17821.270188000002 -2817,2017,2817,42.961952,-75.24019,12,5895.4258709999995 -2819,2013,2819,43.184544,-77.382821,0,0 -2821,2014,2821,42.60346,-73.375306,0,0 -2822,2016,2822,42.781197,-75.035371,0,0 -2823,2015,2823,42.28219600000004,-75.21933600000007,39,19985.884379999996 -2824,2017,2824,42.788887,-74.800222,0,0 -2825,2019,2825,43.90383300000002,-74.35594499999995,37,17368.873987 -2826,2014,2826,44.313143000000004,-75.13702000000006,33,6195.933194000001 -2829,2016,2829,41.11837500000001,-72.36174899999997,26,4995.069888000001 -2830,2014,2830,42.097842,-75.402249,0,0 -2831,2013,2831,42.95102900000002,-74.26052900000003,47,22150.797785000002 -2833,2014,2833,43.784584,-74.39704299999995,39,17394.571128 -2834,2013,2834,43.724632,-75.698124000000007,28,4693.51358 -2835,2017,2835,42.656040000000004,-77.24551500000003,33,8446.615497 -2836,2016,2836,44.55253500000001,-74.21876999999996,47,13992.279323999997 -2837,2016,2837,43.53280899999999,-74.91204699999992,56,14718.148992000002 -2839,2016,2839,44.725508,-74.47643999999995,28,4167.395426 -2841,2019,2841,44.21609200000008,-73.830483,71,8214.387794 -2842,2014,2842,42.947104,-76.572792,0,0 -2844,2015,2844,43.312166,-76.391995,0,0 -2847,2015,2847,41.549911,-74.944585,0,0 -2848,2014,2848,42.93659099999997,-73.88360000000007,39,31607.696204999993 -2849,2018,2849,42.850537,-76.858154,0,0 -2850,2014,2850,41.488607,-73.94338699999993,44,13689.269789 -2851,2019,2851,44.445645,-73.44901699999994,49,13028.376269 -2852,2013,2852,42.634827,-75.933031,2,0 -2853,2015,2853,41.73565499999997,-73.91790899999997,33,6996.96621 -2855,2014,2855,44.005317,-74.582203,0,0 -2856,2014,2856,43.400761000000024,-74.81429799999997,33,20947.124314 -2857,2017,2857,43.490221999999946,-75.61592299999997,61,21867.109221 -2858,2013,2858,41.98247399999998,-75.268707,34,19559.079969 -2859,2018,2859,43.061997999999974,-76.92960200000002,34,38799.158572 -2860,2013,2860,42.812202,-75.450462,0,0 -2861,2013,2861,42.29468599999997,-75.61007299999999,39,18595.00519 -2862,2015,2862,44.54536600000001,-74.98367099999997,53,14193.748463999998 -2863,2018,2863,42.24258400000002,-79.16746299999994,38,34368.61574 -2865,2014,2865,42.07675500000001,-77.81719700000001,20,5240.218707 -2866,2013,2866,43.768012,-75.37170199999994,42,17932.168373 -2868,2017,2868,42.284492,-75.164844,0,0 -2869,2016,2869,42.42605,-79.151427,4,876.283834 -2871,2019,2871,42.74405,-76.569379,0,0 -2872,2018,2872,41.4191,-73.717435,0,0 -2873,2014,2873,43.196042,-77.321458,0,0 -2874,2015,2874,42.588806,-75.888671,1,0 -2875,2019,2875,44.80980799999998,-73.39043499999998,72,26523.387948999996 -2876,2019,2876,43.138465,-76.501469,0,0 -2879,2018,2879,41.221703,-74.21554999999994,32,21659.027958000002 -2880,2014,2880,43.43672100000001,-75.19722,50,13311.590091000002 -2881,2016,2881,44.74614,-74.87163300000009,50,12597.407874999999 -2882,2014,2882,44.47490700000002,-73.56311499999995,47,17837.406541 -2883,2017,2883,43.32621,-73.509158,0,0 -2884,2015,2884,42.283419999999985,-73.98522099999995,31,31548.368776 -2889,2018,2889,42.650621,-75.266648,0,0 -2890,2017,2890,44.10353799999998,-74.83090099999997,64,13081.034961000001 -2892,2014,2892,42.797757,-73.969793,0,0 -2893,2016,2893,43.30099,-76.125865,0,0 -2895,2013,2895,43.126223,-79.012073,0,0 -2896,2013,2896,43.180756000000024,-74.86558199999996,60,9416.987169999999 -2897,2013,2897,42.572311,-78.976121,0,0 -2898,2013,2898,41.634169,-74.080766,0,0 -2899,2016,2899,43.899947,-75.90218099999998,19,14692.57567 -2901,2013,2901,42.850917999999986,-75.72873400000002,15,1194.304191 -2903,2015,2903,43.078566,-75.432905,0,0 -2906,2016,2906,42.765751,-74.681653,0,0 -2907,2016,2907,43.94073200000002,-74.05227800000009,65,9251.717788000002 -2908,2014,2908,42.839135,-73.396773,0,0 -2910,2016,2910,43.157248999999986,-74.59480600000005,69,28683.340805 -2911,2018,2911,41.12430400000001,-73.75130199999998,17,6405.803258 -2912,2014,2912,44.657056999999945,-73.97047999999997,54,5638.4535559999995 -2913,2015,2913,44.415803,-75.410446,0,0 -2916,2014,2916,43.795429000000006,-74.58473899999994,32,30142.458543 -2917,2015,2917,43.08045500000002,-78.42096799999996,25,12085.34845 -2919,2019,2919,44.711203,-74.703751,0,0 -2920,2017,2920,42.735086,-77.022898,0,0 -2921,2016,2921,43.58672299999995,-74.67447099999994,45,24886.437361 -2922,2016,2922,44.59281899999998,-74.89203499999995,56,20267.607059 -2924,2015,2924,44.54769699999998,-74.60226400000008,44,6685.892332 -2925,2013,2925,44.313952999999934,-73.8769410000002,101,11290.857628999996 -2926,2019,2926,43.91073,-75.885161,6,11805.469575000001 -2927,2019,2927,43.00304400000002,-73.88532299999999,18,12607.911988999998 -2929,2013,2929,43.248988,-78.397753,0,0 -2930,2018,2930,42.25520200000003,-75.47379099999995,31,32841.252047 -2931,2014,2931,41.75065,-74.390132,10,0 -2932,2014,2932,42.856039,-76.457078,0,0 -2935,2014,2935,41.268599,-73.89952700000002,33,37685.918303 -2936,2016,2936,43.69441000000001,-73.85797300000003,47,6856.525226 -2938,2013,2938,42.36081199999997,-75.34454400000003,38,0 -2939,2014,2939,43.93487800000002,-74.31155600000007,49,25422.124869000003 -2940,2014,2940,41.77739900000004,-73.59837500000002,29,16529.623002 -2941,2014,2941,43.120312000000006,-74.12034899999998,35,12393.313236999998 -2943,2019,2943,42.29078499999998,-78.06474100000003,40,0 -2944,2015,2944,43.19676000000002,-74.56183300000002,32,19243.698715000002 -2945,2016,2945,42.685361999999955,-77.56473599999994,44,32230.584424 -2946,2016,2946,44.01020999999996,-74.95293200000006,64,12670.574579999997 -2947,2014,2947,41.317576,-74.423478,0,0 -2949,2018,2949,43.043307,-77.33582,0,0 -2950,2019,2950,42.09798099999997,-78.769049,35,44490.88317700001 -2951,2018,2951,44.29501299999998,-73.77107500000007,37,16239.764417000002 -2952,2014,2952,44.32509199999999,-75.52207999999999,10,1712.8516550000002 -2954,2017,2954,44.508392999999984,-75.348561,21,11067.242776 -2956,2018,2956,43.567947,-75.5432,0,0 -2957,2013,2957,42.433525,-75.34338,2,0 -2958,2019,2958,42.833211,-74.911069,0,0 -2960,2019,2960,42.675023,-74.687703,0,0 -2961,2018,2961,41.726686,-73.848235,0,0 -2962,2014,2962,43.103534,-76.070806,0,0 -2964,2016,2964,42.758633,-77.907711,0,0 -2965,2016,2965,43.077473000000026,-78.57333399999997,25,10995.757646999999 -2967,2017,2967,43.158028,-73.833875,0,0 -2969,2013,2969,43.085617,-74.921939,0,0 -2970,2017,2970,43.262789,-77.210079,0,0 -2972,2013,2972,41.761072000000006,-74.53825200000004,25,17859.220351 -2974,2015,2974,42.151249,-74.035172,14,17613.652052999998 -2976,2016,2976,42.325904000000016,-77.4536,31,7998.151083000001 -2979,2015,2979,42.123398999999985,-77.61838200000004,27,4205.483356999999 -2981,2016,2981,44.16469700000002,-73.84448100000006,60,26152.622785999993 -2982,2015,2982,42.069956999999995,-75.183875,6,584.1415870000001 -2983,2015,2983,42.838007,-77.560751,0,0 -2984,2016,2984,44.85288300000001,-74.96455599999994,34,7793.3918570000005 -2985,2018,2985,40.656847,-73.874645,0,0 -2987,2015,2987,42.03001300000003,-78.519028,31,1275.8535430000002 -2988,2015,2988,42.555583,-75.607915,0,0 -2989,2014,2989,42.674580999999996,-73.31790499999993,43,25609.745622 -2991,2018,2991,42.20034800000003,-77.83909600000005,48,6958.031945 -2992,2016,2992,43.13775899999999,-78.38837800000005,22,23444.307486999995 -2993,2014,2993,42.088177,-79.378235,0,0 -2995,2013,2995,43.241207000000024,-74.06701199999999,39,0 -2997,2017,2997,43.48471700000003,-74.86813200000005,44,21307.73702 -2998,2018,2998,42.76175600000004,-77.48542499999998,40,9086.187597000002 -3002,2017,3002,43.90344,-75.353656,0,0 -3004,2017,3004,42.75360600000001,-75.36240899999994,50,39904.571958 -3008,2014,3008,43.031789,-73.641827,0,0 -3009,2013,3009,43.176851,-76.109337,0,0 -3010,2018,3010,42.153205999999976,-78.38485099999991,49,18120.894490000002 -3011,2019,3011,42.28863900000003,-73.87571900000009,53,0 -3012,2014,3012,43.291314,-73.288454,0,0 -3014,2013,3014,42.232832,-75.659867,4,573.112658 -3015,2016,3015,41.921203999999996,-75.077117,32,26541.253451 -3016,2018,3016,42.608868,-78.846872,0,0 -3017,2019,3017,44.429871,-74.411564,0,0 -3018,2014,3018,41.761391,-74.33145599999999,18,16929.906339999998 -3019,2016,3019,42.044835,-76.742686,0,0 -3020,2019,3020,42.007254999999994,-79.25992399999997,53,20245.395668 -3021,2018,3021,42.352421,-75.990629,0,0 -3022,2014,3022,41.028611,-73.729885,0,0 -3023,2016,3023,43.072562,-73.377143,0,0 -3028,2014,3028,44.617837,-75.474073,0,0 -3030,2013,3030,41.103626,-73.968898,0,0 -3032,2013,3032,44.530108,-75.593833,0,0 -3033,2015,3033,43.295904,-77.794799,0,0 -3034,2013,3034,42.959629,-77.845224,0,0 -3036,2014,3036,41.790999000000006,-73.78330600000001,14,4957.002933 -3038,2014,3038,42.780044,-77.757606,0,0 -3040,2016,3040,42.324971,-74.98717599999999,13,6642.179278999999 -3042,2019,3042,44.41749499999998,-73.49942700000004,38,7755.328888000002 -3044,2016,3044,44.179646999999996,-75.19673299999997,32,17747.828035 -3046,2017,3046,43.426567,-73.266203,0,0 -3047,2018,3047,44.341734,-75.591163,0,0 -3050,2014,3050,43.26629000000002,-74.90435399999996,67,13370.688872999997 -3052,2017,3052,42.376998,-79.496777,17,0 -3053,2013,3053,43.021101,-78.523259,1,131.182395 -3054,2016,3054,44.76797600000005,-74.24593000000003,63,16734.869826999995 -3055,2013,3055,42.34300800000004,-75.215275,49,9400.398912 -3056,2013,3056,43.798535,-76.076833,0,0 -3057,2017,3057,42.947744,-75.802681,0,0 -3058,2019,3058,42.839166,-78.674763,0,0 -3059,2016,3059,44.430768,-75.599134,0,0 -3060,2018,3060,44.50452099999997,-74.52244299999992,68,4566.936968999999 -3062,2016,3062,43.29105900000001,-73.61232500000004,24,20890.518597000002 -3063,2016,3063,43.87161599999996,-74.15465700000003,42,10638.820512 -3064,2013,3064,41.09671200000002,-74.08360700000001,36,39127.15198299999 -3065,2014,3065,42.634058,-75.345396,0,0 -3067,2018,3067,42.179563000000016,-78.17073099999999,19,6015.239613 -3068,2013,3068,42.894377,-74.311428,0,0 -3069,2019,3069,42.64459699999998,-77.34928899999994,36,25092.858754999997 -3070,2019,3070,43.318563,-77.845031,0,0 -3071,2019,3071,42.379646,-76.647479,0,0 -3073,2013,3073,44.491685,-75.697246,0,0 -3074,2019,3074,41.065981,-73.978046,0,0 -3075,2018,3075,44.34536000000002,-73.92046600000002,47,25699.521821000002 -3076,2017,3076,43.61606600000003,-73.53801999999997,43,25405.789728999996 -3077,2016,3077,42.25687599999997,-76.46995000000005,48,21386.210302 -3081,2019,3081,42.02576199999999,-79.66470299999995,47,21021.806976 -3082,2018,3082,42.342570000000016,-76.84466199999999,25,2389.388442 -3084,2019,3084,42.846579,-77.375454,1,0 -3085,2015,3085,41.51292400000006,-74.62138899999994,53,20755.161552999998 -3087,2013,3087,43.98047200000004,-73.574705,66,21735.162603 -3088,2018,3088,42.949251,-74.71937,0,0 -3089,2016,3089,43.001106,-78.248734,0,0 -3090,2013,3090,43.465200999999986,-74.83274900000005,55,34328.893427 -3091,2014,3091,42.026392,-76.382806,0,0 -3092,2017,3092,42.90206,-75.453038,0,0 -3093,2013,3093,41.566203,-74.131628,0,0 -3094,2016,3094,42.58036300000002,-74.95352700000002,52,14290.477862000002 -3095,2018,3095,42.82491600000002,-73.84288299999992,52,42979.994504999995 -3097,2014,3097,44.38025200000001,-73.58865399999996,22,9497.854634 -3098,2017,3098,42.017315,-78.64099,5,1686.54427 -3099,2017,3099,42.007822,-77.104118,0,0 -3101,2015,3101,41.093047,-73.844723,0,0 -3102,2019,3102,43.086881,-77.502354,0,0 -3103,2015,3103,43.121329,-77.020079,0,0 -3105,2013,3105,42.624996,-77.1805,0,0 -3106,2014,3106,44.79069000000002,-74.61598899999993,47,10768.609772000002 -3107,2019,3107,43.28291199999999,-78.40100400000003,28,23771.999471999996 -3108,2016,3108,43.095053,-75.929528,0,0 -3109,2014,3109,41.671236,-74.199186,0,0 -3110,2016,3110,42.21272599999999,-76.9914440000001,52,16815.446413 -3111,2017,3111,41.873954000000005,-74.86001300000005,63,25755.420392 -3112,2015,3112,42.320811,-79.403959,6,0 -3114,2014,3114,41.52151700000001,-74.79887899999996,25,15232.276906000001 -3115,2016,3115,42.82174999999999,-76.649504,16,26079.951599 -3116,2019,3116,43.272191,-78.916052,0,0 -3117,2014,3117,42.64363000000002,-74.11814799999992,69,39111.417189 -3118,2019,3118,43.80863600000001,-74.81163299999992,52,17563.508821000003 -3119,2016,3119,42.341794,-74.01534500000001,13,8583.863236 -3120,2014,3120,42.125195,-77.90368,0,0 -3122,2015,3122,42.818524,-75.550138,0,0 -3124,2018,3124,44.998768999999946,-73.81973500000002,70,8264.717877000001 -3125,2016,3125,43.230413,-73.29058500000002,35,10745.216226 -3127,2013,3127,42.41209400000001,-77.30542199999994,51,14440.498411999999 -3130,2013,3130,41.863817000000004,-74.97160600000002,26,9630.090997 -3131,2015,3131,42.774913999999995,-76.11007999999998,22,6315.91446 -3132,2015,3132,41.94239100000002,-75.270249,35,26096.919475000006 -3135,2016,3135,43.463641,-75.867649,0,0 -3136,2014,3136,42.736262,-75.082218,14,13216.619083999998 -3137,2014,3137,42.341948,-76.619866,34,19320.338524 -3139,2016,3139,43.59757599999995,-74.26429400000005,48,24546.270084 -3140,2018,3140,44.774327000000035,-73.97400100000003,47,0 -3141,2014,3141,43.27069500000001,-76.75025400000001,16,4156.927442 -3142,2013,3142,44.391344,-74.75978600000003,24,8506.844448 -3146,2014,3146,43.49682000000001,-73.317628,7,10978.573194 -3147,2016,3147,42.765410999999986,-77.20919400000001,18,6491.732759 -3148,2019,3148,42.673918,-78.417269,0,0 -3150,2015,3150,44.399475,-74.117275,4,1401.420914 -3151,2017,3151,42.05640500000003,-73.72456000000007,41,31017.168908000007 -3152,2014,3152,42.07288799999998,-75.01593899999993,40,0 -3154,2019,3154,42.345844,-79.504163,0,0 -3155,2017,3155,42.455755,-79.275204,0,0 -3156,2017,3156,42.871185,-76.895048,0,0 -3157,2015,3157,42.18245,-78.439947,0,0 -3158,2014,3158,42.942064,-78.217554,0,0 -3159,2018,3159,40.589737,-74.093883,0,0 -3162,2015,3162,43.93346100000003,-75.38173999999994,44,8527.891936 -3163,2017,3163,44.406109999999956,-73.87521299999995,43,17139.098287999997 -3164,2014,3164,44.05085099999998,-74.98067600000002,42,23140.717335999998 -3165,2015,3165,42.53704,-73.773525,0,0 -3168,2013,3168,42.42395699999998,-73.528503,43,28269.088181 -3169,2014,3169,43.614551,-75.64588299999997,23,5661.186347 -3170,2015,3170,41.913172999999986,-74.96081599999997,35,18366.068554999998 -3171,2015,3171,41.740287,-74.112284,9,7181.153625000001 -3172,2016,3172,42.997186000000035,-76.90785899999996,28,13544.653120999998 -3173,2015,3173,43.023679,-77.744528,0,0 -3174,2013,3174,42.57349,-76.316593,0,0 -3175,2017,3175,43.200886,-77.262177,0,0 -3176,2017,3176,42.966777999999984,-73.96860399999998,31,8809.277041000001 -3177,2014,3177,41.548986000000006,-73.71087400000005,24,24845.436652 -3178,2017,3178,44.70858099999996,-74.98881199999988,85,19479.954590999998 -3179,2014,3179,43.76465800000004,-73.52104999999999,39,12151.971132 -3181,2019,3181,41.545065,-74.352081,8,6736.592174 -3182,2017,3182,44.100358,-75.572847,0,0 -3183,2016,3183,42.216186000000036,-75.756669,40,24255.887149000002 -3184,2014,3184,41.565337,-73.843066,0,0 -3186,2019,3186,42.62960100000004,-75.25570800000004,51,19494.556184000005 -3187,2017,3187,43.974942999999996,-74.38457199999998,38,11280.620393 -3189,2013,3189,43.436264000000016,-75.21827800000003,15,2197.509834 -3190,2014,3190,42.4184,-76.910842,0,0 -3191,2018,3191,42.982886,-74.074611,7,676.008115 -3192,2019,3192,41.889674,-74.485556,0,0 -3193,2019,3193,43.443616999999996,-74.49522699999996,27,29063.148702000002 -3197,2017,3197,42.060293,-73.66578399999996,38,6788.397143000001 -3198,2018,3198,42.21094300000002,-78.344402,33,27087.754832 -3199,2015,3199,42.667120999999966,-75.68158899999992,53,25461.139604 -3200,2016,3200,42.499862,-73.76400199999996,25,0 -3201,2018,3201,42.551501000000016,-78.99816299999999,19,0 -3202,2016,3202,41.974517999999996,-74.66877400000003,31,31166.214883999997 -3203,2013,3203,43.520461999999974,-73.97691100000009,50,9153.056427000003 -3205,2015,3205,42.643206,-78.212988,0,0 -3209,2018,3209,43.654383999999965,-73.82764800000012,88,8670.94553 -3210,2017,3210,42.563437,-75.703411,0,0 -3212,2019,3212,43.546543,-73.327032,5,0 -3214,2017,3214,41.374343,-74.305131,7,0 -3215,2013,3215,43.08825,-78.314039,0,0 -3216,2016,3216,44.92490399999998,-73.700918,46,19607.325681000002 -3217,2014,3217,44.03496300000002,-75.13229099999992,54,17284.146311 -3218,2016,3218,42.064726,-76.62016,0,0 -3220,2016,3220,40.76968,-73.503876,0,0 -3221,2014,3221,43.24268699999999,-76.40819999999998,34,15879.920058000003 -3223,2013,3223,42.711555,-78.706857,0,0 -3224,2015,3224,42.367471000000016,-75.50790800000003,26,15407.505626000002 -3225,2014,3225,42.115108,-78.491344,0,0 -3226,2018,3226,43.136125,-74.90459700000001,31,18757.815534 -3227,2017,3227,42.613563,-77.953453,0,0 -3228,2013,3228,40.619459,-74.023764,0,0 -3229,2014,3229,42.34959200000002,-76.40782799999992,42,29805.479634999996 -3230,2018,3230,42.068388,-74.41950700000002,30,0 -3231,2015,3231,43.60190400000004,-73.90652600000007,41,23812.512447999998 -3232,2016,3232,44.68662700000003,-74.07850400000004,43,15724.607374 -3233,2018,3233,43.32723400000002,-74.27152700000002,37,10376.608616000001 -3234,2013,3234,42.2201,-76.707273,56,30110.982398999997 -3235,2016,3235,43.795014,-73.40856499999998,15,161.90116200000006 -3236,2014,3236,44.389960999999985,-74.35507700000007,65,16883.634457000004 -3237,2018,3237,42.214889,-73.897722,0,0 -3238,2016,3238,43.145455999999996,-74.99226600000009,51,19005.474491999998 -3239,2016,3239,42.356505,-74.590712,0,0 -3240,2014,3240,42.45364199999999,-74.38876400000001,17,824.3391140000001 -3242,2019,3242,44.746931,-74.590335,0,0 -3243,2017,3243,43.373949000000025,-74.45807999999992,40,36682.445901 -3244,2014,3244,43.825525,-73.440409,1,282.33103 -3245,2018,3245,42.434993,-78.542,0,0 -3246,2017,3246,44.296812000000024,-74.51554400000002,33,4421.450914000001 -3247,2016,3247,42.172732,-75.086773,55,19965.06552 -3248,2013,3248,42.491551000000015,-76.17491800000002,35,22546.89619 -3249,2019,3249,43.94591300000001,-75.79779599999995,57,18420.924866999994 -3250,2016,3250,43.566716000000014,-74.04316400000006,51,12072.360153 -3251,2018,3251,44.66597900000003,-74.6419810000001,52,11115.810714 -3252,2018,3252,41.287025,-74.535533,0,0 -3255,2019,3255,42.57848,-77.59669,0,0 -3256,2013,3256,44.108860000000035,-75.32460799999993,39,0 -3257,2017,3257,42.974979,-78.482113,0,0 -3258,2016,3258,42.19566200000001,-75.59288500000001,36,25690.468710999998 -3259,2014,3259,40.949203000000004,-72.36090800000002,21,10612.879992000002 -3260,2016,3260,43.78504999999998,-75.25742900000009,47,15673.837015999998 -3261,2018,3261,40.629223,-73.891389,0,0 -3264,2016,3264,43.70712099999997,-76.09229899999994,51,13902.681144000004 -3266,2013,3266,41.966014000000044,-74.38781000000007,68,13840.714511999999 -3270,2013,3270,42.47608799999999,-74.04931600000005,28,11504.200164 -3271,2015,3271,42.00448299999999,-78.38422799999992,53,25522.135428 -3272,2015,3272,42.21129100000003,-75.93544999999996,46,18714.946809 -3274,2016,3274,41.83183099999999,-74.59921299999998,35,39565.709683 -3276,2017,3276,40.805136,-73.453413,0,0 -3277,2015,3277,40.893933,-72.884183,0,0 -3279,2018,3279,40.741115,-73.824019,0,0 -3281,2015,3281,44.22276000000004,-75.240996,35,11273.840200999999 -3282,2013,3282,42.251793000000006,-75.982986,17,1020.8320139999998 -3283,2017,3283,44.144092999999984,-75.02708800000003,28,8641.734034000001 -3284,2016,3284,42.641582,-78.843881,0,0 -3285,2017,3285,41.78655800000001,-74.75835599999996,41,23443.79292 -3286,2014,3286,42.853621,-76.633125,0,0 -3287,2014,3287,42.04713999999999,-75.92372400000002,30,8896.296041 -3288,2018,3288,42.614415,-75.06935399999999,23,10900.719267999999 -3289,2015,3289,41.82248299999997,-73.78901899999995,44,12745.779888000003 -3290,2015,3290,42.461345,-75.111671,0,0 -3291,2014,3291,41.551555,-73.566156,0,0 -3292,2017,3292,42.312386,-78.288032,0,0 -3293,2014,3293,42.95868,-75.848468,0,0 -3296,2015,3296,44.449843000000044,-74.83437400000001,43,9612.921588999998 -3297,2018,3297,43.039781,-77.857686,0,0 -3298,2016,3298,42.20601699999998,-73.54955900000003,40,23299.859384 -3300,2014,3300,43.113671999999966,-75.73222900000009,66,4580.874513999999 -3301,2019,3301,42.785212,-76.394966,0,0 -3303,2018,3303,42.523175000000016,-73.53456000000004,38,35316.628206999994 -3304,2018,3304,42.880749,-76.690906,0,0 -3305,2019,3305,42.845638,-77.08509899999996,25,28456.237631 -3307,2018,3307,43.56178200000004,-74.10675600000003,58,28214.115403000003 -3308,2018,3308,42.38397899999998,-73.99273900000001,27,31430.037214999997 -3309,2017,3309,43.25016000000003,-74.68882399999998,35,26281.646106 -3310,2015,3310,43.271406999999975,-74.72089400000003,36,24832.338019 -3311,2016,3311,42.999238999999996,-73.829663,8,7654.110362000001 -3312,2013,3312,44.03785,-76.11606,0,0 -3314,2017,3314,42.18495399999999,-77.32976299999999,16,10180.612021 -3315,2018,3315,44.497106000000024,-74.70625399999999,33,16205.768635999999 -3317,2018,3317,42.993383,-75.01910199999999,21,13951.370448999998 -3319,2017,3319,44.974406999999985,-73.89375199999996,38,1579.80874 -3320,2018,3320,41.448849,-73.555067,0,0 -3321,2015,3321,43.30132499999998,-73.81838400000001,37,20519.915317 -3322,2019,3322,42.854432,-78.272233,0,0 -3323,2017,3323,41.696512,-73.571879,0,0 -3324,2014,3324,42.76552900000001,-78.826863,26,4481.073684999999 -3326,2016,3326,42.78286099999999,-75.82124999999998,31,33850.784863 -3327,2019,3327,43.581985999999965,-74.96479499999997,47,26483.518114 -3329,2019,3329,41.619097,-74.980986,0,0 -3330,2014,3330,42.15059799999997,-79.16246499999993,30,0 -3331,2019,3331,42.913351999999996,-74.13709200000001,7,2707.754636 -3332,2018,3332,41.559704,-74.03740400000001,13,5505.345936 -3335,2019,3335,42.00807200000002,-73.53670300000006,39,25372.362239000002 -3336,2016,3336,43.70482599999999,-75.1523820000001,55,14278.542932 -3337,2017,3337,43.192522000000025,-78.87623400000003,24,10882.613774000001 -3338,2017,3338,42.599322,-77.86210800000002,6,2802.405149 -3339,2013,3339,44.072091,-73.68433999999989,63,13268.434368 -3341,2014,3341,42.063567,-76.444683,1,685.546346 -3342,2016,3342,42.08479899999998,-74.57090100000003,22,15475.421221 -3344,2017,3344,43.04296,-78.40572,0,0 -3345,2018,3345,43.37764599999998,-75.35303599999999,38,0 -3346,2018,3346,42.44774299999999,-76.05255899999999,16,3844.870536000001 -3347,2014,3347,41.457733,-74.498863,0,0 -3349,2017,3349,44.45792400000003,-74.24882799999995,36,1732.1206860000002 -3351,2018,3351,42.234122,-75.13829200000005,28,11032.523206999998 -3352,2014,3352,40.836302,-72.791561,0,0 -3353,2017,3353,42.64789100000002,-75.10232600000003,43,32146.154465 -3354,2015,3354,42.186193,-78.052544,0,0 -3355,2016,3355,43.117561,-77.694628,4,2565.245351 -3356,2018,3356,43.82543600000002,-74.07809200000001,31,18007.786741 -3357,2016,3357,42.053088,-75.85937399999999,15,742.464238 -3358,2016,3358,42.12250999999997,-76.56426700000007,47,23862.945995 -3362,2016,3362,41.77234399999998,-74.2526039999999,54,11854.006657 -3363,2019,3363,42.708356000000016,-74.10938100000003,33,16989.96459 -3364,2013,3364,41.722539,-74.678728,0,0 -3365,2014,3365,42.726265,-78.560696,0,0 -3367,2015,3367,43.509790999999986,-75.321253,17,3631.9297810000003 -3368,2015,3368,44.864511,-74.167107,4,268.143799 -3369,2013,3369,43.220154,-77.694056,0,0 -3370,2017,3370,43.188533,-73.28696600000006,30,13110.936764999999 -3372,2015,3372,42.860593,-78.733704,8,1984.248805 -3373,2019,3373,44.294308,-75.52513300000005,28,0 -3374,2015,3374,42.483667,-75.415135,0,0 -3375,2013,3375,43.43533099999997,-76.298851,51,0 -3376,2014,3376,43.16660000000001,-77.85675300000001,26,7107.487458 -3378,2018,3378,43.30285,-73.337249,0,0 -3380,2013,3380,44.44305499999998,-73.603161,30,25112.231303 -3381,2016,3381,42.76090399999999,-74.58867099999999,14,2986.375392 -3382,2018,3382,44.729193,-75.331313,0,0 -3383,2018,3383,42.01292300000005,-74.6228970000001,62,26801.942093 -3384,2017,3384,42.946352,-73.81045,0,0 -3385,2016,3385,42.65052500000001,-76.38977500000006,36,23024.484386 -3386,2014,3386,42.761702999999954,-77.56901400000004,43,12925.751119999999 -3388,2019,3388,40.785456,-73.214515,0,0 -3389,2018,3389,44.973406999999966,-74.76002400000004,44,19020.639021 -3390,2014,3390,42.113204,-74.79456899999998,33,22933.533513999995 -3391,2014,3391,44.149646999999995,-73.59776899999999,52,16896.985086 -3392,2016,3392,43.83832999999999,-75.76438499999999,10,1505.297308 -3394,2014,3394,42.282344,-75.944938,0,0 -3395,2015,3395,42.662291,-78.528552,0,0 -3396,2019,3396,44.123211,-75.553402,0,0 -3397,2016,3397,43.03013499999999,-75.43277600000003,21,10434.123353 -3398,2013,3398,42.141746000000005,-75.51263599999996,46,32352.228214 -3399,2017,3399,43.441785999999986,-74.190969,24,9587.046973 -3400,2017,3400,41.01392599999999,-72.23756000000002,27,41504.61844 -3401,2015,3401,43.04611699999996,-74.14882000000007,89,15619.965150999993 -3402,2019,3402,44.54288099999998,-73.90248299999993,63,31690.200372999996 -3404,2016,3404,43.11506,-75.181714,8,4275.585389 -3405,2014,3405,43.93886300000002,-75.23356,38,25098.181994000006 -3408,2016,3408,40.621105,-73.962831,0,0 -3410,2017,3410,41.919913,-74.650113,0,0 -3411,2013,3411,42.76458599999995,-74.11360999999995,62,25286.752277 -3412,2013,3412,41.914215000000006,-73.89045499999999,28,9212.759720000002 -3413,2015,3413,42.82744200000001,-78.463507,16,3923.8816800000004 -3414,2015,3414,42.637594,-79.041063,0,0 -3415,2017,3415,41.296203,-73.670087,0,0 -3416,2017,3416,44.372564000000004,-75.570342,38,15573.268695999997 -3417,2018,3417,42.573197,-74.392023,0,0 -3418,2019,3418,42.02867700000001,-73.82867200000004,22,24028.111170999997 -3419,2018,3419,40.671901,-73.35195,4,64.93867499999999 -3420,2015,3420,42.67103,-77.798167,0,0 -3421,2018,3421,42.19419900000002,-74.94341700000001,61,23135.341663 -3423,2018,3423,43.0326,-76.250016,6,1696.2801840000002 -3424,2016,3424,40.883622,-73.597384,0,0 -3425,2015,3425,43.44764600000004,-74.59919099999992,59,12778.164643999999 -3426,2015,3426,42.637643,-76.602744,0,0 -3427,2015,3427,44.354583,-74.75706799999992,57,17910.641271999997 -3428,2019,3428,42.10283200000002,-77.12069299999997,39,13736.639310999999 -3429,2013,3429,44.687028,-73.82126800000006,55,16800.32872 -3432,2014,3432,42.155403000000014,-74.39621600000007,57,25495.314968 -3433,2019,3433,44.247652,-75.965759,0,0 -3434,2014,3434,42.007098,-78.055309,0,0 -3435,2019,3435,44.17480900000003,-73.98163800000005,39,26845.305707 -3436,2018,3436,43.238382,-78.123028,0,0 -3437,2018,3437,42.920817,-73.461323,0,0 -3439,2013,3439,43.57003899999997,-73.29960699999997,38,8378.913567 -3442,2014,3442,42.085871,-79.567682,5,1204.335597 -3445,2014,3445,42.22858599999997,-75.444113,32,41202.573846000014 -3446,2013,3446,42.092077,-75.915152,0,0 -3448,2013,3448,42.59286100000002,-75.250497,40,18130.808434 -3451,2017,3451,43.35523899999999,-75.16885700000006,29,20928.802419 -3453,2015,3453,43.13689,-73.87998500000002,34,8625.387195 -3454,2017,3454,42.978031,-74.316905,0,0 -3455,2017,3455,42.646693,-78.318794,0,0 -3456,2015,3456,44.174920999999976,-74.08091500000005,61,20708.431197000005 -3457,2016,3457,43.295355,-76.711898,0,0 -3458,2015,3458,43.098484,-78.765478,2,3.683642 -3460,2017,3460,43.871998999999974,-74.78814899999999,53,0 -3462,2017,3462,43.55620199999999,-73.92335200000001,13,8065.230215 -3464,2019,3464,42.984006,-76.11514,0,0 -3465,2017,3465,44.25015200000002,-74.56108200000006,49,16537.984813 -3466,2019,3466,43.68509,-75.419054,0,0 -3468,2017,3468,42.221162000000035,-77.662245,40,31505.962293999997 -3470,2016,3470,42.230899,-79.121816,0,0 -3471,2013,3471,42.82993800000002,-75.27617000000005,38,26527.988452999998 -3472,2015,3472,41.616608,-74.216219,0,0 -3473,2015,3473,42.85173099999999,-77.895448,21,11416.269532999999 -3474,2016,3474,42.98769199999999,-75.99458300000002,12,16159.129262999999 -3476,2015,3476,44.697179,-74.839402,3,1497.3054060000002 -3477,2019,3477,43.293881,-75.502666,0,0 -3478,2016,3478,40.869576,-73.421798,0,0 -3479,2017,3479,43.38104100000006,-73.68974999999989,76,25167.016322000003 -3480,2016,3480,41.55575999999999,-74.24540500000003,22,29323.197708 -3484,2019,3484,42.818222,-74.60784,0,0 -3487,2013,3487,43.065714,-75.640293,0,0 -3488,2016,3488,41.183335,-73.73998800000001,12,10105.883309 -3489,2018,3489,42.61245099999998,-75.45215000000005,39,43608.797755 -3490,2018,3490,42.541445999999965,-74.97003600000004,44,30413.665662000007 -3492,2019,3492,43.214400000000026,-76.13738999999997,45,15267.267706000004 -3493,2015,3493,44.18694099999999,-75.033997,38,18750.929742 -3495,2019,3495,43.72616300000003,-73.79451100000004,46,13550.823086 -3497,2016,3497,42.52051800000002,-74.69869400000002,36,0 -3500,2014,3500,43.79582800000002,-75.70831200000005,37,22805.41104 -3501,2014,3501,42.597389000000014,-77.00104400000004,22,3955.976796 -3502,2018,3502,41.503831999999996,-74.20381300000001,18,6612.338981000001 -3503,2018,3503,41.84087799999998,-73.984279,23,0 -3504,2014,3504,44.003649,-75.16019400000002,21,0 -3506,2014,3506,42.365574,-75.157321,0,0 -3508,2016,3508,41.983565,-74.859834,17,9600.920912 -3509,2013,3509,43.291997,-77.856913,0,0 -3511,2017,3511,43.246037,-76.683834,0,0 -3512,2015,3512,42.007428,-73.61692300000001,18,16174.483327 -3513,2017,3513,41.473201,-74.048412,0,0 -3514,2016,3514,42.271926,-73.745603,0,0 -3515,2017,3515,41.728573,-74.103084,0,0 -3517,2019,3517,42.63606100000002,-78.94639399999993,69,23973.131957999998 -3518,2015,3518,43.243909,-78.701319,0,0 -3519,2015,3519,44.04868800000003,-74.532017,44,11386.141273000001 -3520,2015,3520,41.67709,-73.95624,0,0 -3521,2017,3521,43.74639200000001,-73.99438999999998,37,0 -3522,2015,3522,41.031909,-73.704739,0,0 -3523,2015,3523,43.113455,-76.631898,0,0 -3524,2019,3524,42.935476,-78.747735,0,0 -3525,2013,3525,42.32615699999998,-74.30777900000007,40,27292.688905999996 -3526,2014,3526,42.61346099999997,-78.69736000000007,50,31156.624158000002 -3527,2015,3527,43.86089399999997,-73.56694000000005,44,15156.968689000001 -3528,2017,3528,42.05178999999999,-75.11002999999995,44,27071.182661 -3529,2013,3529,44.021687,-75.913657,4,72.81624500000001 -3532,2016,3532,43.68756200000004,-74.82625800000004,74,14032.690176999997 -3533,2017,3533,43.46324200000002,-74.389138,47,19191.214270000004 -3534,2018,3534,41.265774,-73.783304,1,451.795976 -3535,2014,3535,43.377168,-75.22895,4,832.508202 -3537,2015,3537,43.065534,-78.837446,0,0 -3539,2018,3539,42.166673,-77.499412,0,0 -3542,2019,3542,42.761547999999976,-74.81440900000001,22,11575.172193999999 -3543,2016,3543,42.479669,-74.360395,0,0 -3544,2014,3544,42.202391,-78.627644,0,0 -3545,2018,3545,43.46245,-74.16076399999999,27,15514.509433 -3546,2015,3546,42.739299,-76.692422,0,0 -3547,2016,3547,42.086906999999975,-77.45367100000001,29,15880.901204000005 -3548,2013,3548,42.13937099999999,-76.76096400000004,30,25752.908204000003 -3549,2016,3549,41.713060999999975,-75.01709299999996,48,20530.400186 -3550,2019,3550,42.034723,-78.10865400000009,52,47820.157672999994 -3551,2019,3551,40.751578,-73.281556,0,0 -3552,2014,3552,41.386427,-74.53724,0,0 -3553,2017,3553,42.22014600000006,-79.4053439999999,46,23665.710511999998 -3554,2014,3554,44.42594099999999,-75.110005,47,6682.182012000001 -3556,2016,3556,43.400696,-73.521671,0,0 -3557,2013,3557,42.362705,-79.284688,0,0 -3558,2015,3558,43.23157700000001,-73.50588800000001,13,11438.105794 -3559,2016,3559,44.382703,-75.44499500000003,15,1313.1648209999998 -3560,2015,3560,42.988889,-73.449644,0,0 -3561,2013,3561,40.911329,-73.833361,0,0 -3562,2017,3562,43.71414699999997,-75.86101199999995,49,0 -3563,2019,3563,43.587125999999955,-74.38578199999996,45,14057.926452 -3564,2019,3564,42.71375799999999,-76.30175399999999,11,8520.14898 -3565,2013,3565,44.96085399999997,-73.77448000000003,53,11392.775068000004 -3567,2015,3567,44.735200999999975,-73.90506899999995,57,12311.090205999999 -3568,2013,3568,43.269461,-74.23918699999994,28,6870.605011 -3569,2019,3569,43.338989999999995,-74.389547,24,16689.160534 -3571,2016,3571,42.845074,-76.047356,0,0 -3572,2017,3572,42.417887,-76.130789,0,0 -3573,2015,3573,44.36638899999999,-75.657585,7,7976.302174 -3574,2013,3574,43.723396999999984,-75.040159,39,9740.421704 -3577,2018,3577,42.817569,-77.670667,0,0 -3578,2013,3578,42.441429,-79.372721,0,0 -3580,2015,3580,42.806004999999985,-77.530718,35,10414.945749999999 -3581,2016,3581,42.25227599999999,-73.93063900000006,52,25666.167943 -3583,2019,3583,43.23245999999997,-73.38284800000002,36,24268.318309000002 -3584,2015,3584,41.650017,-74.520349,0,0 -3585,2017,3585,43.68155000000001,-74.36757500000003,31,33956.140289 -3586,2017,3586,42.525855,-78.909676,0,0 -3588,2013,3588,42.615258999999995,-74.687362,13,0 -3589,2017,3589,43.413737999999995,-74.96414099999996,50,33233.408514999996 -3591,2018,3591,42.72067600000001,-78.94027800000002,19,6034.146793 -3593,2015,3593,44.984176000000005,-74.66482700000005,28,3270.546593 -3594,2018,3594,43.207386,-77.202035,0,0 -3595,2016,3595,42.82216100000002,-76.98648099999997,22,26879.292290999998 -3597,2013,3597,41.667245999999984,-74.23567599999996,34,29634.863929 -3598,2014,3598,44.27015899999999,-73.8694409999999,112,16568.627013 -3599,2019,3599,43.22869599999998,-74.85782300000001,32,2157.0597589999998 -3600,2019,3600,43.089479,-79.028329,0,0 -3601,2019,3601,42.714207,-77.889729,0,0 -3602,2017,3602,44.74441,-75.143462,0,0 -3603,2015,3603,44.79318700000012,-73.44143600000005,101,13113.167399000004 -3605,2019,3605,42.31219499999998,-76.453703,34,8364.26772 -3606,2019,3606,44.89301500000006,-73.45054299999995,69,26624.666438000004 -3607,2015,3607,43.748696,-76.04900599999999,13,3394.775297 -3608,2015,3608,40.73045,-73.356792,0,0 -3609,2015,3609,43.26357000000003,-74.36221900000007,46,33686.591603 -3610,2014,3610,42.22605199999999,-78.97597200000003,34,13122.885414999999 -3611,2017,3611,41.928472999999954,-74.48544799999993,40,28368.855794000003 -3612,2017,3612,42.63723800000001,-78.48110100000001,21,14491.015073 -3614,2018,3614,43.274838000000024,-73.911444,32,25538.909087999997 -3615,2015,3615,42.502245,-77.862717,0,0 -3617,2016,3617,43.784118000000014,-73.83061500000002,29,11233.565 -3618,2017,3618,43.67630400000003,-75.14285800000006,51,23864.194345 -3619,2017,3619,43.09372299999998,-78.20684600000003,24,10197.20084 -3620,2017,3620,42.715036,-78.107116,0,0 -3621,2015,3621,43.969702,-75.707021,0,0 -3622,2015,3622,42.674478,-73.828588,0,0 -3623,2014,3623,43.561449,-73.816783,0,0 -3624,2014,3624,41.87202900000006,-74.76529100000008,55,20844.053684000002 -3625,2015,3625,41.959597000000024,-73.80045999999997,24,18304.962214 -3628,2019,3628,43.195616,-77.00781,0,0 -3629,2015,3629,43.081011,-78.093372,0,0 -3632,2018,3632,44.76016000000001,-74.74660199999995,25,0 -3634,2016,3634,44.057483999999974,-73.82432100000004,57,16266.362796000001 -3636,2018,3636,44.83276999999999,-74.37615900000002,67,16785.223409999995 -3637,2013,3637,42.800556,-78.653692,0,0 -3638,2015,3638,42.97664899999997,-74.94673600000004,23,5174.400052999999 -3640,2015,3640,44.46542300000005,-73.88944200000003,58,17552.812971 -3643,2016,3643,43.958666,-74.236359,42,17723.776426999997 -3644,2013,3644,44.26412299999998,-74.28884900000001,41,17693.505243 -3645,2013,3645,41.60601500000001,-74.64131200000004,38,0 -3647,2019,3647,43.08580800000002,-74.4173300000001,49,31890.875342999996 -3648,2018,3648,43.394871000000016,-76.18345999999998,11,8730.95855 -3652,2017,3652,41.56490299999997,-74.91883999999993,52,27296.144214 -3654,2019,3654,43.15621200000003,-73.27662700000005,41,21701.325238999998 -3655,2017,3655,43.298651,-77.72345399999999,10,5377.639469 -3656,2015,3656,42.25672599999999,-78.71985599999996,21,29401.151897999996 -3657,2013,3657,42.624278,-74.86679000000002,50,21332.080337 -3658,2016,3658,42.26836,-78.027575,6,5271.306807999999 -3660,2018,3660,42.181720999999996,-79.39371200000004,31,16099.290464000002 -3661,2014,3661,44.88714400000001,-74.76932600000006,47,8443.531228 -3662,2018,3662,43.145582999999995,-74.39687299999997,24,8233.838354000001 -3664,2013,3664,43.903825,-75.47186,0,0 -3665,2019,3665,42.297827,-74.74854200000003,27,15459.763561 -3666,2013,3666,42.968318,-78.594668,0,0 -3667,2017,3667,43.120659999999994,-75.30382799999998,11,10293.502658000001 -3668,2017,3668,42.903936000000016,-76.54893999999996,22,14480.232927000001 -3669,2013,3669,42.191027,-76.20350099999997,30,0 -3670,2016,3670,44.669515999999994,-73.73800200000004,33,12149.101845 -3671,2018,3671,42.962495,-73.725234,24,2145.7167329999997 -3673,2013,3673,43.63503799999997,-74.91446700000004,33,14661.041033000001 -3676,2017,3676,42.07021200000001,-75.43995099999998,34,34498.741316 -3677,2017,3677,42.42041300000004,-77.36811600000007,43,30867.497390999997 -3679,2016,3679,43.997798000000024,-74.18215400000004,31,39624.430381 -3680,2015,3680,43.31417800000001,-75.96717499999995,35,0 -3681,2013,3681,44.12960399999998,-74.365481,31,8994.542072999999 -3682,2014,3682,42.768425999999984,-76.19293599999997,22,20949.915816 -3683,2013,3683,43.627653,-75.61360999999994,34,5947.288603999999 -3684,2017,3684,43.11217,-73.47942600000005,24,11965.573704999999 -3686,2019,3686,42.65583400000001,-78.95080600000003,17,3664.8112220000003 -3687,2018,3687,44.359383,-75.778192,6,0 -3689,2014,3689,44.076282,-75.14391800000008,45,19502.062346 -3690,2015,3690,44.65590799999999,-73.56927399999998,17,7144.5724949999985 -3691,2014,3691,42.15920000000004,-78.10143999999994,42,22533.439373999998 -3692,2018,3692,43.06613500000002,-75.19098499999998,16,644.060815 -3693,2017,3693,43.243564,-77.916316,0,0 -3694,2013,3694,44.76458499999994,-73.75694699999994,63,15032.238859000001 -3695,2013,3695,42.35033,-78.93425,0,0 -3696,2014,3696,42.632102000000025,-74.28201200000004,55,18037.257519000003 -3698,2017,3698,43.379639,-73.49627700000005,41,19677.254355999998 -3699,2019,3699,43.169616000000005,-74.66558600000005,65,33811.975297000005 -3700,2018,3700,44.876225,-74.298918,0,0 -3701,2018,3701,42.974412,-75.560916,0,0 -3703,2017,3703,42.812957,-76.082569,0,0 -3704,2016,3704,43.333243000000024,-75.207183,56,21188.014593999997 -3705,2013,3705,44.31025100000002,-74.64148500000006,30,17385.812167 -3707,2013,3707,42.024518,-75.057775,0,0 -3709,2017,3709,44.393677,-75.768803,0,0 -3710,2016,3710,44.966556,-73.923748,0,0 -3711,2017,3711,42.45468000000003,-74.67683500000003,33,30360.994267 -3712,2019,3712,43.07450500000003,-74.570284,67,25941.267863 -3713,2016,3713,43.034483,-77.518598,0,0 -3714,2015,3714,41.40230999999998,-73.81684300000003,19,19281.793075 -3715,2015,3715,43.43991499999997,-74.05421699999991,52,15273.514328000001 -3716,2019,3716,42.107763,-73.58847000000002,6,7983.641761000001 -3717,2014,3717,43.72531399999999,-74.0701049999999,55,11280.099443 -3718,2015,3718,43.56454699999996,-75.973344,41,25298.52458 -3719,2014,3719,42.61034700000002,-76.06226399999997,23,6473.210507999999 -3720,2015,3720,42.998010999999984,-74.48636500000009,56,21813.002774 -3721,2019,3721,42.077904,-76.801919,0,0 -3722,2019,3722,42.21416399999999,-77.77780499999999,30,10560.30585 -3723,2018,3723,44.29481400000001,-74.68554399999995,32,5467.96563 -3724,2014,3724,42.00225300000003,-74.33368000000002,36,51767.70761500001 -3725,2014,3725,43.934551999999954,-74.66631000000004,47,17535.080833 -3728,2014,3728,42.08870299999995,-78.32570600000005,51,25881.517534999995 -3730,2016,3730,42.158894,-75.998772,0,0 -3731,2013,3731,42.604636999999975,-76.50109000000005,38,26850.574045999994 -3733,2019,3733,44.168835999999985,-75.85231300000001,63,20029.040678999998 -3734,2017,3734,42.013307,-73.891722,4,4569.369382 -3735,2016,3735,43.108281999999974,-76.45196100000007,43,31642.481211000002 -3736,2014,3736,42.344969,-78.169065,0,0 -3737,2016,3737,44.031715000000034,-74.07834599999991,46,20427.960827 -3738,2016,3738,43.220944,-73.807399,0,0 -3739,2019,3739,41.84246699999996,-74.43093099999999,42,28151.615673000004 -3740,2015,3740,43.207422,-78.000589,4,1045.088532 -3741,2013,3741,42.515775,-79.181777,0,0 -3742,2018,3742,43.88047800000002,-74.24183999999998,32,13525.221856999997 -3743,2016,3743,44.908865999999996,-73.78090399999998,27,6496.6682200000005 -3745,2013,3745,42.20326799999999,-76.54946400000001,35,11673.376355999997 -3746,2019,3746,43.668290000000034,-73.59492200000012,68,37146.368043999995 -3747,2019,3747,42.636143,-76.072662,8,0 -3748,2018,3748,42.74916699999998,-73.927825,20,18213.267756 -3749,2015,3749,42.710927,-78.798325,0,0 -3750,2016,3750,42.27352900000003,-79.39570199999997,32,13068.679274000002 -3752,2014,3752,43.213421,-75.164626,5,2911.0153880000003 -3754,2019,3754,42.51457600000002,-75.65277100000002,39,23428.871740000002 -3756,2016,3756,44.25693999999999,-74.05342800000004,44,22958.026903 -3757,2017,3757,43.35949000000002,-76.10430599999998,20,0 -3759,2019,3759,43.587226000000044,-73.88627099999997,43,13592.710791000001 -3760,2016,3760,41.394843,-73.952572,0,0 -3761,2017,3761,41.780385,-74.610649,0,0 -3762,2015,3762,43.37208099999999,-74.86787400000001,43,36265.69557899999 -3764,2014,3764,42.783287,-73.769255,9,15449.097177 -3765,2017,3765,42.02849300000002,-74.95913199999997,45,43337.912146 -3766,2017,3766,41.43913700000001,-74.13676199999999,13,4099.237023 -3768,2016,3768,42.897793,-78.520382,0,0 -3769,2013,3769,42.804651,-78.845916,0,0 -3770,2019,3770,43.857871,-75.824561,0,0 -3771,2013,3771,42.787368,-75.492361,0,0 -3772,2015,3772,44.873976999999996,-73.677468,0,0 -3774,2013,3774,42.40337500000003,-75.00590000000007,44,29530.619581 -3776,2015,3776,42.634875,-74.790701,0,0 -3778,2015,3778,42.72515499999999,-77.08563000000001,14,17470.218597 -3780,2017,3780,43.97861400000004,-73.47378399999994,41,19722.641962 -3781,2014,3781,43.104362,-78.252708,0,0 -3782,2018,3782,42.039132000000045,-79.33552800000004,42,14649.027244 -3783,2013,3783,42.386868999999976,-73.78213700000005,53,46054.60554900001 -3784,2017,3784,43.558207000000024,-73.74555200000002,68,22675.676394000002 -3785,2014,3785,42.694735,-77.654556,0,0 -3786,2014,3786,42.23914,-74.71547400000003,27,26961.938882000002 -3787,2016,3787,42.210305000000005,-73.96679399999995,35,25523.632302000005 -3788,2015,3788,42.323649999999994,-75.57832299999998,10,4582.956676 -3789,2019,3789,44.82051099999996,-75.23770599999993,38,10170.452646999998 -3790,2019,3790,42.904203,-76.863485,0,0 -3791,2019,3791,43.189424,-73.598042,0,0 -3793,2018,3793,43.080766000000025,-74.36107399999999,62,38841.338825000006 -3795,2017,3795,41.547393000000035,-74.72510600000003,31,14601.749006000004 -3796,2018,3796,42.099857,-77.871153,0,0 -3797,2016,3797,43.45847899999996,-76.03935099999993,40,33455.98873800001 -3800,2016,3800,42.558741,-75.356096,0,0 -3801,2014,3801,42.268511999999994,-78.39588000000002,12,2459.1654140000005 -3803,2019,3803,42.591871,-75.417131,0,0 -3804,2014,3804,42.898092,-75.170042,0,0 -3805,2015,3805,42.054071,-76.051258,0,0 -3806,2018,3806,42.27530900000002,-76.87137699999995,31,15908.390069000001 -3807,2019,3807,44.11763500000001,-75.13439400000001,37,18000.114918 -3808,2016,3808,43.45795599999999,-75.11089700000001,29,0 -3809,2015,3809,44.19107000000002,-73.50453299999992,53,26399.154867999998 -3810,2015,3810,42.68688599999997,-75.46680899999996,62,17017.216391 -3811,2019,3811,42.073439,-74.208119,0,0 -3812,2018,3812,43.71646099999999,-75.08006000000006,36,22384.321745999998 -3813,2015,3813,41.44656,-74.557769,0,0 -3814,2017,3814,42.991365,-73.591069,0,0 -3816,2014,3816,43.502062999999964,-75.48076200000004,49,27587.53230700001 -3817,2016,3817,42.044069000000015,-74.624812,51,20577.742897 -3818,2014,3818,42.49705999999999,-78.28060599999999,31,12431.259888 -3820,2015,3820,41.522491,-74.07086200000002,8,0 -3821,2014,3821,42.534294,-73.630368,0,0 -3822,2018,3822,43.39406399999999,-73.280644,3,46.098113 -3824,2013,3824,42.99221999999999,-73.69592800000002,21,18184.444771000002 -3825,2016,3825,42.46264699999994,-78.450339,85,26860.717864000002 -3826,2013,3826,42.80696699999999,-77.19689900000002,11,6715.444563 -3831,2015,3831,44.074291,-76.229247,0,0 -3832,2014,3832,43.902076000000015,-74.10507000000004,54,36700.765006 -3833,2015,3833,40.82512699999999,-73.66479699999998,19,6200.807567000002 -3834,2017,3834,42.429458999999966,-79.13171700000011,53,25258.144291 -3835,2013,3835,42.859253000000024,-75.96841899999997,34,28814.2169 -3836,2014,3836,42.160235,-77.915955,0,0 -3837,2013,3837,44.30645499999996,-75.84968599999998,60,17251.800489 -3839,2016,3839,43.029451,-76.81761200000001,10,7646.813914 -3840,2015,3840,43.011137,-78.649266,2,148.92643800000002 -3841,2019,3841,42.305044,-75.96391,0,0 -3842,2018,3842,42.29527099999998,-76.74626600000006,33,0 -3843,2017,3843,42.165583,-75.59454,0,0 -3845,2013,3845,42.549082,-77.04382399999999,35,0 -3846,2018,3846,43.555362999999964,-74.29938399999996,34,5989.320983 -3847,2017,3847,42.68227000000001,-77.73399900000003,33,25884.806169999996 -3848,2014,3848,43.32678699999999,-75.24391399999995,28,15678.222432000002 -3849,2018,3849,42.47936699999996,-74.28496399999996,54,22788.535064000003 -3850,2019,3850,42.065879,-78.04798400000003,26,4015.8621709999998 -3851,2018,3851,42.59968600000001,-78.47003700000003,51,24412.995240000004 -3852,2015,3852,42.19907899999999,-76.48850099999996,37,26607.286832 -3853,2013,3853,41.923717,-73.817191,0,0 -3855,2016,3855,41.953532,-74.097016,36,22771.463419 -3857,2014,3857,42.13763399999998,-78.80390200000008,50,17815.391964000002 -3858,2017,3858,41.194,-73.549114,0,0 -3860,2015,3860,42.76996100000001,-78.243239,13,0 -3861,2014,3861,41.899012999999975,-74.21070700000004,40,25013.611679 -3862,2015,3862,42.85895,-74.091616,0,0 -3863,2013,3863,44.838809,-74.427282,0,0 -3864,2014,3864,42.365196,-76.063528,0,0 -3866,2016,3866,42.332738999999975,-76.063128,23,10647.129298 -3867,2013,3867,44.447357000000004,-74.36218800000005,41,20708.526255 -3868,2015,3868,44.54357100000001,-74.0421070000001,56,22455.310650000007 -3869,2016,3869,41.501167,-74.446858,0,0 -3871,2014,3871,42.66648,-78.341408,0,0 -3873,2018,3873,42.36837300000001,-78.44179000000001,30,18009.055074999997 -3875,2015,3875,40.964351,-72.707336,0,0 -3876,2018,3876,43.16174199999997,-75.26521000000001,55,0 -3877,2013,3877,43.27729,-76.23846499999995,34,15532.899415000002 -3878,2016,3878,44.20672800000001,-74.63014900000003,28,17313.35639 -3879,2018,3879,42.509584,-75.523775,0,0 -3882,2015,3882,40.820386,-73.16654,0,0 -3883,2017,3883,41.270932,-74.210322,0,0 -3885,2013,3885,43.033888000000026,-76.20844899999999,23,21574.213971999998 -3886,2019,3886,42.56931800000001,-73.659486,35,0 -3887,2016,3887,41.957971999999984,-74.50288300000008,54,14743.167618999998 -3889,2017,3889,43.146185999999965,-75.45437399999994,50,7205.193933 -3890,2013,3890,41.932581,-74.043417,0,0 -3893,2016,3893,44.48795999999998,-74.98502300000006,44,6595.463395 -3894,2018,3894,42.14588800000001,-77.14833,10,4433.894623 -3895,2016,3895,42.835011,-73.90371900000001,14,14636.445587 -3896,2015,3896,42.63732499999997,-73.52093400000003,48,25700.518845999995 -3897,2018,3897,41.052241,-73.788808,0,0 -3898,2019,3898,42.923044,-77.575465,4,1338.873494 -3899,2017,3899,43.075902,-74.799663,0,0 -3901,2013,3901,42.47243499999997,-78.19013600000002,38,15701.744567999998 -3903,2019,3903,43.952308,-75.315932,0,0 -3906,2013,3906,42.002732,-77.031093,0,0 -3907,2014,3907,42.65042700000002,-75.72156899999996,41,32753.636278999995 -3909,2016,3909,44.431342000000015,-75.21646800000005,53,17517.181968999997 -3910,2016,3910,42.43147299999999,-77.56604300000001,15,7640.675620999999 -3911,2016,3911,42.608715,-74.831802,0,0 -3912,2018,3912,43.05130400000002,-73.32214699999996,48,13394.09589 -3913,2014,3913,42.12836599999997,-78.64151499999993,44,27039.207842 -3914,2014,3914,42.49575199999997,-78.45852899999988,68,12567.621546999999 -3915,2015,3915,44.569868000000014,-74.98532900000002,19,4943.807754 -3916,2016,3916,42.95322499999998,-74.907111,30,16776.203351 -3917,2017,3917,42.846054,-76.764778,0,0 -3918,2019,3918,42.505493,-79.031224,0,0 -3919,2015,3919,42.007265,-76.647988,0,0 -3920,2014,3920,44.48976199999998,-74.17501099999993,49,34041.207133 -3923,2016,3923,43.17842299999998,-73.48672000000003,20,8739.050335 -3924,2013,3924,43.341005,-75.13134499999997,32,17717.166612 -3926,2013,3926,44.079383,-74.631896,39,11197.863432000002 -3927,2014,3927,42.864669,-75.561232,0,0 -3928,2019,3928,42.464553000000016,-79.049977,35,0 -3929,2014,3929,43.92590900000003,-73.57532099999999,70,18196.467899999996 -3930,2015,3930,42.72213,-77.594523,0,0 -3931,2019,3931,42.480588,-77.078495,0,0 -3932,2013,3932,42.208837,-77.934747,0,0 -3933,2018,3933,40.851352,-72.708927,0,0 -3934,2017,3934,42.51304,-78.741557,0,0 -3936,2015,3936,42.355188,-73.68894,0,0 -3938,2013,3938,42.40188600000006,-77.15653099999999,64,15540.806194999997 -3939,2017,3939,42.560398,-74.088253,6,1857.589919 -3940,2019,3940,41.59472400000002,-74.99979600000005,33,28898.996223 -3942,2014,3942,41.48013399999999,-73.583165,30,50379.738704 -3943,2019,3943,42.735066,-76.226497,0,0 -3944,2013,3944,44.523989000000014,-75.67789200000001,14,724.635119 -3945,2019,3945,42.035576,-77.74467300000003,40,18478.076162 -3946,2019,3946,42.67190099999999,-77.62224999999992,50,36607.74817199999 -3948,2017,3948,40.876928,-73.073826,0,0 -3950,2013,3950,41.298976,-73.768322,0,0 -3954,2019,3954,42.364305000000044,-76.99958500000001,42,0 -3955,2013,3955,42.997306,-78.207405,0,0 -3957,2015,3957,43.55935399999999,-74.57554100000006,56,17933.789051000007 -3959,2018,3959,42.965573,-77.736564,0,0 -3960,2013,3960,43.13915900000002,-75.59782099999995,46,14367.663389000001 -3961,2013,3961,43.450758,-75.577874,0,0 -3962,2017,3962,44.08756,-73.71513699999998,32,29077.065406 -3964,2015,3964,43.86207100000001,-74.69680800000003,45,22134.958875 -3965,2013,3965,43.30961499999996,-74.76336499999991,51,18620.048803 -3966,2014,3966,43.89696899999998,-74.90002299999999,33,24689.823331000003 -3967,2017,3967,44.065080999999985,-73.55551999999993,38,16708.525975999997 -3968,2015,3968,41.593166000000046,-74.73064400000007,41,28390.905156 -3969,2015,3969,43.51590800000003,-73.68211500000004,43,27168.571767999994 -3970,2019,3970,43.46815400000003,-73.919791,73,32516.392061 -3971,2016,3971,44.63058999999999,-73.53133399999999,30,15177.922365999999 -3974,2013,3974,41.717379,-74.193879,0,0 -3976,2016,3976,43.389966,-75.899889,0,0 -3977,2017,3977,42.694862000000036,-74.66717699999992,49,6651.378575999999 -3978,2017,3978,43.14332599999997,-74.25175099999997,31,9260.817601 -3979,2018,3979,40.710356,-73.661768,0,0 -3981,2018,3981,41.16697699999999,-74.23574400000001,20,22113.090276 -3982,2014,3982,41.551712,-73.885308,0,0 -3984,2015,3984,41.678389,-74.854477,0,0 -3986,2016,3986,41.717592999999994,-73.57859200000003,26,12251.336615 -3987,2016,3987,42.485897,-77.725264,5,9918.378333 -3988,2015,3988,42.322183,-76.786929,12,7944.278406 -3989,2016,3989,42.404217,-76.355091,0,0 -3990,2018,3990,42.06472299999999,-77.02171500000003,25,10922.269233 -3991,2018,3991,42.547043,-76.221066,0,0 -3992,2017,3992,42.220182999999956,-74.30163200000005,48,18841.337575 -3993,2015,3993,42.26946899999999,-74.37369600000002,64,31038.330894 -3994,2017,3994,43.20817,-75.101138,0,0 -3995,2019,3995,40.883635,-73.343764,0,0 -3996,2015,3996,43.91986999999996,-74.51339400000005,39,21156.265843 -3997,2016,3997,43.32380799999999,-73.363926,9,9483.9724959999985 -3998,2013,3998,42.57889,-73.541029,40,18553.491718999998 -3999,2015,3999,42.535665,-73.94098299999997,25,25520.142195 -4000,2013,4000,43.29111700000007,-76.17541199999991,75,25751.709236 -4001,2013,4001,42.39624099999998,-78.37683100000002,38,9406.840531000002 -4002,2016,4002,42.576688,-78.362396,0,0 -4004,2014,4004,42.38374599999998,-78.52682899999995,32,15711.617096000002 -4005,2014,4005,42.76330800000004,-76.00877399999993,47,14638.956209 -4006,2013,4006,42.755491,-75.51023699999999,10,4456.21836 -4010,2016,4010,43.213831,-77.764885,0,0 -4011,2013,4011,42.445641999999985,-77.82430400000003,32,7602.803748999999 -4012,2017,4012,44.78565000000001,-75.34214100000004,34,13869.273760000002 -4013,2018,4013,42.47334099999994,-74.4687439999999,52,40343.660022 -4014,2014,4014,42.23172500000004,-74.35565600000007,50,2373.523112 -4015,2015,4015,42.315169999999995,-78.51458000000001,20,16985.986839999998 -4016,2016,4016,44.22154600000001,-75.14362799999992,51,21407.415296000006 -4017,2017,4017,40.866961,-72.519767,0,0 -4020,2019,4020,40.988178,-73.787374,0,0 -4021,2013,4021,40.926059,-73.119269,0,0 -4022,2017,4022,44.34229099999997,-75.03520599999995,64,33440.189868 -4023,2014,4023,43.244105999999995,-73.65277799999996,22,15060.826755 -4024,2014,4024,44.16415800000003,-73.74082699999997,68,14773.215414999999 -4025,2017,4025,43.16986300000003,-76.91252799999995,42,29526.423800999997 -4026,2019,4026,42.531581,-78.316853,0,0 -4027,2013,4027,40.883983,-73.49967,0,0 -4028,2013,4028,42.39862100000001,-76.783142,19,17464.895341 -4029,2015,4029,42.096223,-76.025328,2,9154.459429 -4030,2016,4030,42.687910000000016,-76.01530000000001,29,9512.224052 -4031,2014,4031,42.977192,-74.233818,0,0 -4032,2013,4032,43.093271,-78.48428400000009,55,48175.403572999996 -4034,2013,4034,42.997245,-74.788824,0,0 -4035,2016,4035,43.634487000000014,-75.023768,32,19010.612673 -4036,2019,4036,44.576873,-75.199001,0,0 -4037,2015,4037,44.333866,-75.703305,0,0 -4038,2015,4038,41.880315,-73.643756,3,11925.893361 -4039,2019,4039,44.01937700000004,-74.48322999999992,66,25042.195902 -4040,2016,4040,43.49525900000001,-75.97828000000001,28,10347.712891000001 -4041,2013,4041,42.115875,-78.891616,0,0 -4043,2017,4043,43.316099,-75.74839700000001,20,0 -4044,2017,4044,42.307454,-75.667074,0,0 -4045,2017,4045,43.214444,-73.611853,0,0 -4046,2013,4046,42.934706,-77.34202100000002,10,1518.7424290000001 -4047,2015,4047,42.208613999999976,-77.74668199999998,35,9633.23414 -4048,2017,4048,42.891096,-78.68708100000002,31,3040.3174730000005 -4049,2019,4049,44.77625899999999,-74.43484600000005,30,8801.647769 -4050,2014,4050,44.282835,-75.889837,0,0 -4052,2016,4052,44.390697999999986,-74.70786399999999,50,23977.363993 -4053,2019,4053,44.310383,-75.74598999999994,43,17101.629651 -4055,2018,4055,44.235627000000015,-73.403722,16,10837.797438 -4056,2016,4056,44.06667000000001,-76.051907,3,493.664422 -4057,2015,4057,44.759990999999935,-74.03634599999994,84,18380.566218999997 -4058,2015,4058,42.21821,-79.631274,30,26065.988578000004 -4059,2019,4059,42.94714100000001,-77.93687099999995,28,20381.146905 -4060,2013,4060,42.32635900000002,-77.18375599999996,24,30650.592021999997 -4061,2014,4061,42.703046,-74.396157,0,0 -4062,2014,4062,43.114295,-75.927991,0,0 -4063,2019,4063,43.22916,-76.987218,0,0 -4064,2018,4064,44.306707,-75.950288,0,0 -4065,2013,4065,42.221938000000016,-79.34591300000005,22,7772.913504 -4066,2019,4066,44.83201699999997,-74.68937199999999,34,14365.507204000001 -4067,2017,4067,41.949059,-74.833394,0,0 -4068,2013,4068,43.23434900000002,-75.83103399999995,47,32817.707927999996 -4070,2018,4070,43.425099000000024,-73.80659699999997,32,14078.82067 -4071,2014,4071,41.100928,-74.032747,0,0 -4072,2019,4072,44.845167999999944,-74.09856800000006,83,44328.938608 -4073,2018,4073,42.406674999999964,-77.9110220000001,48,18791.634821000003 -4074,2015,4074,42.118339,-79.187183,0,0 -4075,2017,4075,42.777897000000024,-78.39856000000002,19,19978.298388 -4076,2017,4076,42.518545,-77.687238,4,3008.262106 -4077,2016,4077,41.07086,-72.43825,0,0 -4078,2016,4078,42.25733,-76.291741,1,1.822086 -4079,2018,4079,44.644041999999935,-74.2845189999999,70,4644.344344 -4080,2015,4080,42.805502,-74.653233,0,0 -4083,2018,4083,43.03375,-78.602618,0,0 -4084,2015,4084,42.489344,-74.589299,0,0 -4085,2015,4085,43.061655,-75.95246,0,0 -4086,2019,4086,44.496369,-73.621728,0,0 -4087,2016,4087,41.86326699999998,-74.223706,26,34845.433181 -4088,2015,4088,42.777307,-77.027983,0,0 -4089,2017,4089,43.258867,-78.645129,0,0 -4091,2017,4091,43.909226,-73.51164999999996,35,16706.894364 -4092,2016,4092,42.397112000000064,-76.02747199999995,100,25306.260329 -4094,2018,4094,43.11307399999999,-75.811456,14,0 -4095,2014,4095,43.14003200000001,-77.68312799999995,27,26541.195893 -4096,2016,4096,43.037259,-74.524374,0,0 -4097,2019,4097,42.79499500000001,-74.30093600000006,55,0 -4098,2013,4098,43.060420000000015,-73.65642299999995,31,28170.067110999997 -4099,2018,4099,42.683075,-75.209335,9,8826.914821999999 -4100,2013,4100,42.09215500000003,-74.12992600000004,50,36146.675670000004 -4101,2015,4101,42.927774,-78.01446,10,705.1249660000001 -4102,2013,4102,43.140885,-77.919448,6,0 -4103,2018,4103,42.404039000000004,-75.77441,35,27557.312664999998 -4106,2019,4106,43.77033499999998,-73.54644800000004,42,21692.798841999997 -4107,2017,4107,44.145542,-75.552364,0,0 -4108,2016,4108,44.37095499999999,-75.792004,13,10102.914431000001 -4109,2016,4109,43.1356,-75.879156,0,0 -4110,2014,4110,44.35705,-73.839247,0,0 -4111,2015,4111,42.385239,-74.967104,0,0 -4112,2019,4112,41.590762,-74.276896,17,2936.532564 -4113,2016,4113,42.829885,-74.429601,0,0 -4114,2014,4114,42.568904,-76.063308,5,0 -4115,2016,4115,42.908987999999965,-78.19828199999996,44,24721.560826 -4116,2015,4116,43.26594599999999,-74.87005699999999,12,5179.7065059999995 -4119,2018,4119,40.98765500000001,-72.234099,24,0 -4120,2016,4120,44.86574599999996,-75.08691300000007,80,28756.993407 -4122,2018,4122,42.871306999999945,-78.35244700000008,46,24824.154586000004 -4123,2019,4123,43.19791499999998,-76.69838900000003,43,32464.304798999998 -4124,2019,4124,44.324434000000075,-74.28827100000007,82,3713.0650639999994 -4125,2017,4125,42.806455,-73.51226799999996,29,5546.919029000001 -4126,2018,4126,42.996644,-75.902483,0,0 -4127,2019,4127,43.40305800000003,-75.454874,36,0 -4129,2015,4129,43.87467699999998,-74.35590599999989,56,12744.581689 -4131,2013,4131,41.453406,-74.713188,9,14199.429327 -4133,2013,4133,41.330125,-74.444009,0,0 -4135,2014,4135,43.23013799999997,-73.30571499999998,34,18201.820692 -4138,2014,4138,42.016527,-77.68997399999994,42,30974.417738 -4139,2013,4139,43.154496,-76.56317,2,1677.439514 -4140,2015,4140,44.736809,-73.66128699999993,50,12766.204200999999 -4141,2016,4141,42.697888999999996,-73.958231,7,981.4455230000001 -4145,2016,4145,42.072263000000014,-77.10463200000002,45,19044.590368999998 -4146,2014,4146,43.31656399999998,-74.53981599999999,32,17970.120032 -4147,2013,4147,42.41228999999999,-78.700216,7,5121.751478 -4148,2014,4148,42.317269,-74.465651,0,0 -4151,2017,4151,42.582945,-74.232649,0,0 -4152,2014,4152,43.175986,-78.064039,0,0 -4155,2015,4155,42.070456,-74.787337,0,0 -4156,2018,4156,42.17472,-74.85408,0,0 -4158,2018,4158,43.938989,-75.523776,0,0 -4159,2018,4159,44.69908899999997,-74.961147,39,19273.92476 -4160,2014,4160,42.618525,-76.421583,0,0 -4161,2015,4161,42.361774999999966,-75.84844100000007,74,49098.052553999994 -4162,2015,4162,44.019687,-76.056223,6,303.695856 -4163,2013,4163,41.147645,-73.680373,0,0 -4164,2017,4164,44.36447299999998,-75.243248,18,14326.261106999998 -4165,2014,4165,41.486532,-74.86273,0,0 -4166,2013,4166,41.476981,-74.313563,0,0 -4167,2018,4167,42.862127,-77.803103,0,0 -4168,2016,4168,41.74703700000004,-74.82840399999996,56,12331.449860000002 -4169,2016,4169,44.744879999999995,-74.03287300000004,27,1318.5076270000002 -4171,2017,4171,42.889981,-73.305617,0,0 -4172,2019,4172,42.936337,-75.003291,0,0 -4174,2013,4174,42.774718,-77.870405,0,0 -4175,2014,4175,42.435875,-78.826912,0,0 -4176,2018,4176,44.669107,-75.391118,0,0 -4179,2018,4179,42.323382,-73.459472,0,0 -4180,2019,4180,43.65871799999999,-74.59100399999991,57,18622.439620999998 -4182,2017,4182,43.20034800000003,-78.51733400000008,43,6910.6993760000005 -4183,2017,4183,42.837849,-73.84411,0,0 -4184,2017,4184,44.47866599999998,-73.644011,35,12476.727841000002 -4186,2017,4186,41.748655,-73.53604,0,0 -4187,2017,4187,41.194303999999995,-73.638696,8,2893.228231 -4188,2013,4188,42.23230600000003,-76.88949899999994,36,7264.3652630000015 -4189,2015,4189,44.951016,-74.888278,0,0 -4190,2014,4190,43.09728999999998,-75.57867100000007,40,19054.699015000002 -4195,2014,4195,40.94393300000001,-72.58008299999997,40,37357.697003999994 -4197,2014,4197,44.26663099999998,-75.56836300000002,31,22536.479142 -4198,2017,4198,44.51509200000006,-73.9640390000001,78,28196.904162000003 -4199,2017,4199,42.595156,-73.680362,4,2018.9726630000002 -4200,2019,4200,42.91717699999999,-75.96399099999998,12,11762.833141 -4201,2014,4201,43.02486499999999,-75.67009699999996,26,0 -4203,2013,4203,44.38934700000001,-75.685248,15,2673.132428 -4207,2019,4207,40.623529,-73.721892,0,0 -4208,2019,4208,43.881666,-76.031613,12,0 -4209,2016,4209,43.07101800000002,-76.671232,33,20562.718453999998 -4210,2014,4210,43.89309099999994,-73.96055900000009,109,20932.140975000002 -4211,2013,4211,42.08049200000003,-77.306564,49,20668.11134 -4212,2013,4212,43.750268,-73.56761899999997,32,17417.246492 -4213,2018,4213,42.793546,-74.393992,0,0 -4215,2018,4215,41.682624,-74.94694,0,0 -4216,2013,4216,44.918039,-73.637582,0,0 -4217,2015,4217,42.31936900000002,-78.874558,25,24045.226421 -4218,2016,4218,42.247978999999994,-78.89854800000009,43,20597.103468 -4219,2015,4219,43.88419599999998,-73.81209600000005,33,37648.866476 -4220,2017,4220,42.421473999999996,-75.03817399999991,50,20153.457352999998 -4221,2018,4221,42.33509499999998,-75.62387200000006,35,25312.392312 -4222,2019,4222,42.266274,-77.77727900000001,8,14077.199058000002 -4223,2016,4223,41.671553,-74.686024,0,0 -4224,2017,4224,42.481675,-73.41280800000003,31,27852.688038999997 -4227,2018,4227,42.068543,-76.554616,0,0 -4228,2015,4228,44.05737899999998,-73.62102099999997,26,14344.690945999999 -4229,2013,4229,42.38524299999996,-75.72228700000004,46,23109.520145000006 -4230,2018,4230,42.01195900000001,-77.950607,26,5205.971995000001 -4231,2017,4231,44.27140400000003,-74.90141899999992,51,6732.24433 -4233,2016,4233,42.778557,-78.025209,0,0 -4234,2017,4234,44.103573,-73.90095,0,0 -4236,2017,4236,44.215037,-74.20488500000006,32,31122.020950000002 -4237,2013,4237,44.303036999999996,-73.70012000000003,61,20096.122573999997 -4238,2018,4238,43.810571,-75.924232,0,0 -4239,2017,4239,42.821329999999996,-75.49797800000005,22,8705.024673 -4241,2015,4241,43.776679000000044,-76.02672600000004,54,23056.915485 -4242,2015,4242,42.68625900000001,-75.168309,35,20930.836024999997 -4243,2016,4243,44.10685199999992,-74.13861900000008,91,25102.066988000002 -4245,2015,4245,42.30489699999999,-76.95687399999998,11,2408.593751 -4246,2017,4246,42.80187700000004,-75.183091,40,19704.743104 -4247,2017,4247,43.19059599999996,-74.02513400000008,68,28457.814603000006 -4248,2014,4248,43.072273,-75.282649,0,0 -4249,2017,4249,42.050947,-76.185965,0,0 -4250,2013,4250,41.88250100000003,-75.16554300000004,54,26524.746641000005 -4253,2013,4253,44.512909000000015,-74.45573199999993,47,22931.190231 -4254,2013,4254,42.284462,-74.8527,3,664.549089 -4255,2013,4255,42.070244,-79.60026299999993,48,17093.077005000003 -4256,2018,4256,43.203091,-76.32385,0,0 -4257,2017,4257,41.61561499999999,-74.511486,38,19613.613801 -4258,2019,4258,42.143235,-76.907355,0,0 -4260,2013,4260,43.771165000000046,-73.88778199999993,41,17581.851542999997 -4261,2015,4261,43.36748600000004,-75.3926499999999,49,0 -4262,2013,4262,44.782227,-75.047026,1,951.184671 -4264,2014,4264,42.300709999999995,-79.06347100000002,14,14670.666744 -4266,2016,4266,44.933669,-74.09074299999999,6,3842.06544 -4267,2018,4267,42.199749,-75.68246,0,0 -4268,2018,4268,43.17160800000005,-73.45822,43,15467.679675999996 -4273,2013,4273,42.38055599999999,-78.05299199999996,27,34803.884914999995 -4274,2019,4274,44.47427500000002,-74.21905400000004,53,19071.130703000003 -4275,2019,4275,44.350496000000014,-73.68411699999996,36,15504.438159000001 -4276,2014,4276,44.134694,-76.190426,0,0 -4277,2017,4277,42.173227,-79.61530799999994,44,0 -4278,2019,4278,41.61871599999999,-73.581986,17,0 -4279,2014,4279,42.525158,-77.377731,4,1085.5568309999999 -4281,2013,4281,42.02461300000001,-77.69449899999998,19,10358.099298999998 -4282,2016,4282,42.12497400000001,-76.49138499999995,38,4806.200772000001 -4283,2016,4283,42.164217,-76.320316,0,0 -4284,2017,4284,43.87987699999999,-74.08198999999996,25,8717.671776 -4285,2013,4285,41.85173600000002,-73.92927400000004,36,37890.691509 -4286,2018,4286,42.53220899999998,-77.084163,47,31323.172941 -4287,2013,4287,43.03082800000001,-78.72155399999998,17,9874.181009 -4288,2014,4288,43.668998000000045,-74.28396199999999,42,18170.274212 -4289,2015,4289,42.62386100000002,-74.47432899999994,74,23272.67871600001 -4291,2015,4291,42.917691,-74.751845,0,0 -4292,2013,4292,42.700448999999985,-76.82575200000008,49,19341.627379999998 -4293,2018,4293,42.322703,-74.718118,0,0 -4294,2015,4294,44.127664999999986,-74.34231300000008,63,14975.262314000001 -4295,2015,4295,43.450286,-75.02962800000005,34,22830.921603 -4296,2013,4296,42.933753,-74.83068,0,0 -4297,2014,4297,42.66549199999998,-77.11087900000003,23,20215.147792000003 -4298,2014,4298,42.628993,-76.79853000000001,17,0 -4299,2017,4299,41.939548999999964,-75.002121,34,25365.907097 -4301,2019,4301,43.3902,-75.975465,0,0 -4302,2015,4302,42.11798900000001,-76.34531500000001,10,1998.7407289999999 -4303,2015,4303,44.37385799999997,-75.48179299999995,56,16872.977349999997 -4305,2016,4305,42.796430000000036,-73.58913799999993,50,16188.708611999999 -4306,2019,4306,42.09238,-76.299813,0,0 -4307,2018,4307,41.740493,-74.738191,0,0 -4309,2017,4309,42.363791000000006,-75.45044899999996,52,25020.244571999992 -4310,2015,4310,42.50447899999998,-79.14979999999998,21,0 -4311,2015,4311,42.343134,-79.23336,0,0 -4313,2014,4313,43.22165399999998,-74.01467099999998,21,15269.048837 -4314,2018,4314,44.32214999999998,-75.04518599999997,41,18636.863526 -4315,2014,4315,42.01441900000002,-74.48700700000002,37,21858.276483 -4316,2014,4316,43.725267,-75.881892,0,0 -4318,2019,4318,42.444866,-78.400137,0,0 -4319,2016,4319,42.51754400000002,-75.18487999999995,37,22681.198594 -4321,2014,4321,43.81718900000002,-75.11214900000003,56,17846.460539999996 -4322,2017,4322,42.381214,-73.653363,0,0 -4324,2014,4324,42.981171,-76.391519,0,0 -4325,2018,4325,42.711315,-78.742399,0,0 -4326,2013,4326,42.640015,-75.18466,13,5482.23614 -4328,2018,4328,42.087456999999986,-78.927219,17,22205.88759 -4330,2016,4330,42.601255,-78.210233,0,0 -4331,2019,4331,44.903578,-74.826031,0,0 -4332,2017,4332,42.75815,-75.579934,0,0 -4333,2014,4333,43.06804999999999,-76.339567,11,6218.004414999999 -4334,2013,4334,44.92465700000001,-74.739975,8,897.115391 -4337,2017,4337,42.60846800000001,-76.518018,16,3089.1682480000004 -4338,2015,4338,43.70728899999996,-73.72151200000005,54,16029.795469 -4339,2014,4339,41.63747,-74.761681,0,0 -4340,2014,4340,42.21006,-75.259927,4,230.639481 -4341,2019,4341,42.04365200000002,-76.84539200000005,45,0 -4342,2014,4342,43.2228,-78.732111,29,305.02019300000006 -4343,2017,4343,41.453183,-74.283125,0,0 -4345,2014,4345,44.451798000000025,-74.33418899999997,27,13059.910725999998 -4347,2018,4347,42.14536500000002,-78.218621,38,19468.943903 -4348,2016,4348,42.303815,-78.34523,0,0 -4352,2018,4352,43.745515999999974,-73.60168800000001,33,10362.220820999999 -4354,2015,4354,43.21619699999999,-77.12112299999994,34,28055.411348999998 -4355,2014,4355,43.237648,-77.64093,0,0 -4356,2014,4356,42.586039999999976,-75.90659000000004,52,21798.421469999997 -4357,2019,4357,42.331576,-75.269763,0,0 -4358,2017,4358,42.19494199999998,-74.11872900000002,19,6448.286518000001 -4359,2013,4359,43.25065199999999,-77.310869,15,10190.026052 -4363,2013,4363,42.075288999999934,-76.17497599999993,57,0 -4364,2014,4364,42.393127,-77.628327,0,0 -4365,2013,4365,42.351918999999974,-73.41469000000002,28,15219.711254 -4366,2016,4366,42.469487,-75.250603,0,0 -4367,2013,4367,42.699991,-78.212133,0,0 -4369,2013,4369,42.812449,-77.595122,0,0 -4370,2016,4370,43.895616999999966,-75.41704600000001,41,14406.787446 -4371,2019,4371,44.029446999999934,-75.0162879999999,60,28272.831508999996 -4373,2014,4373,43.97493200000003,-73.83916899999996,47,19173.301711000004 -4374,2018,4374,43.30312599999998,-75.05268699999998,57,16820.639927 -4375,2019,4375,43.79010999999996,-74.11203699999996,55,14934.229392 -4377,2019,4377,43.181062,-75.425484,0,0 -4378,2014,4378,44.769191,-75.21162400000001,56,10672.333675 -4382,2016,4382,41.82227800000004,-75.09356500000003,47,22336.681436 -4383,2015,4383,42.27614100000003,-76.631403,32,26008.907609 -4386,2019,4386,43.10819300000002,-73.61079500000004,22,10806.456215999999 -4387,2015,4387,42.365817,-79.531792,0,0 -4388,2014,4388,42.014843,-73.799104,0,0 -4389,2015,4389,42.177776,-73.885455,10,5509.16975 -4390,2017,4390,42.49955999999999,-74.25834399999995,52,33542.254607999996 -4391,2017,4391,42.914671,-74.210018,0,0 -4392,2016,4392,42.137604,-77.769203,31,17698.568375 -4393,2013,4393,43.69356199999998,-73.77738200000009,48,24562.243538 -4394,2016,4394,41.48084500000003,-74.12483999999996,44,22119.025833 -4395,2017,4395,44.586778000000024,-74.15881200000003,49,10750.269244000001 -4396,2019,4396,43.33820200000001,-75.69098800000005,25,0 -4398,2013,4398,44.59307,-75.371794,0,0 -4399,2017,4399,43.18762900000001,-78.69078099999993,43,19061.193122000004 -4400,2013,4400,42.116622,-79.207389,0,0 -4401,2015,4401,44.34963399999999,-73.43116000000005,33,27977.884309 -4403,2013,4403,43.917065000000015,-73.78766399999996,32,20316.374886999998 -4405,2015,4405,43.66187099999995,-75.51550700000004,68,26472.194472 -4406,2016,4406,44.071916,-74.73184399999997,25,5063.461508 -4407,2018,4407,42.456461,-75.17916999999993,50,0 -4409,2016,4409,42.01469500000001,-76.97368099999998,12,0 -4410,2015,4410,41.764849,-74.848442,0,0 -4411,2016,4411,43.971343,-74.58559,0,0 -4412,2018,4412,42.957734,-74.688201,0,0 -4414,2015,4414,42.554092999999995,-74.89687299999999,21,4628.888633999999 -4416,2017,4416,43.333804,-78.599796,0,0 -4417,2017,4417,43.83915099999997,-74.47423399999997,35,23088.837315999997 -4419,2013,4419,41.86958099999998,-74.82357199999996,35,24259.090182000004 -4420,2018,4420,43.111687,-76.723806,0,0 -4421,2017,4421,40.830277,-72.603988,4,1055.038147 -4422,2016,4422,42.24341499999996,-74.31148299999997,48,45168.908645999996 -4423,2014,4423,44.626418000000044,-73.42357700000001,45,15521.256704000001 -4427,2015,4427,42.331365,-77.650257,0,0 -4428,2014,4428,44.69656599999999,-74.51208100000001,18,11231.256073 -4429,2016,4429,42.604344,-73.919143,0,0 -4431,2019,4431,44.77612100000002,-73.7788220000001,55,13244.9038 -4433,2018,4433,44.00673600000003,-74.31580699999996,44,22451.060607 -4435,2017,4435,42.808343,-74.11264,0,0 -4438,2016,4438,44.987288000000014,-74.828481,36,8574.710229 -4439,2013,4439,43.488233999999956,-74.08924099999999,65,9538.032192 -4440,2018,4440,44.90050100000003,-74.64920500000004,47,0 -4441,2014,4441,42.12114599999997,-74.25393100000008,55,18763.011710000002 -4442,2013,4442,43.06860099999997,-76.54772299999996,26,15229.173474 -4443,2018,4443,43.114162,-75.454117,15,6369.761791 -4445,2019,4445,42.80262300000002,-76.45311099999999,31,14275.247506999998 -4446,2016,4446,43.61349799999999,-73.65047099999997,37,4095.2809669999997 -4447,2015,4447,43.033655,-75.085552,3,300.207655 -4449,2019,4449,42.723108,-76.145708,5,2124.31002 -4452,2019,4452,43.059916,-75.015648,0,0 -4453,2019,4453,44.07572899999999,-75.28849399999994,32,7373.430043999999 -4454,2017,4454,44.200565,-75.894504,0,0 -4455,2017,4455,44.81035299999998,-75.11767300000004,63,25091.42092 -4456,2015,4456,42.943751,-74.199167,0,0 -4457,2019,4457,40.578334,-73.880532,0,0 -4459,2019,4459,42.863772,-77.721314,0,0 -4461,2014,4461,43.04750400000004,-74.91469000000002,36,9297.703523 -4462,2014,4462,40.893128,-73.258977,0,0 -4463,2013,4463,43.608197999999966,-74.56025700000006,41,18492.526512999997 -4465,2018,4465,42.33134599999998,-76.49786899999992,50,26569.479436999995 -4466,2019,4466,43.49891299999996,-74.70794900000007,46,27941.56177299999 -4467,2019,4467,42.126038,-77.004237,0,0 -4469,2015,4469,42.92825000000001,-76.99304000000002,27,12833.083537 -4471,2014,4471,42.900185,-77.142524,0,0 -4472,2013,4472,42.66166900000003,-76.25577900000005,50,38513.84441399999 -4473,2014,4473,41.94954900000001,-74.541246,27,14715.387771000002 -4474,2016,4474,44.52984700000001,-75.00719299999994,40,20245.629564999996 -4475,2015,4475,42.250294,-75.36834899999998,9,2457.805062 -4476,2018,4476,42.26546099999997,-78.32214599999999,39,16173.576531 -4478,2016,4478,42.982125,-75.629518,1,0 -4480,2015,4480,42.71980199999998,-75.10440100000004,23,7248.248967 -4481,2018,4481,42.44182300000008,-74.35283999999987,78,35002.230621999995 -4483,2019,4483,43.147615,-77.742597,0,0 -4484,2019,4484,41.202062,-73.859217,0,0 -4485,2019,4485,43.748837999999985,-75.965465,23,0 -4487,2018,4487,42.22047199999998,-78.83457100000007,73,21037.692704 -4488,2017,4488,42.331324,-78.49604300000007,44,23152.896433 -4489,2018,4489,43.015376,-76.602466,0,0 -4490,2016,4490,44.41657,-75.515131,0,0 -4493,2016,4493,44.460047999999986,-74.61009800000005,34,0 -4494,2018,4494,42.01435199999997,-76.69800299999999,49,23858.899408 -4495,2013,4495,44.040037000000005,-75.19087799999993,50,10277.912125999997 -4497,2019,4497,42.560978,-73.819458,0,0 -4498,2015,4498,42.02622199999998,-77.44735099999997,34,29622.266095000003 -4499,2016,4499,42.91527799999999,-75.856885,17,2086.413956 -4500,2017,4500,40.787104,-73.27586900000004,34,11524.062098999999 -4501,2016,4501,42.251107,-77.336272,0,0 -4502,2017,4502,42.222446,-74.661204,10,3135.361572 -4503,2013,4503,42.511148,-76.361863,2,7612.2708840000005 -4504,2017,4504,44.03658500000004,-73.95180799999997,34,28997.24989 -4505,2019,4505,42.489233999999975,-73.55164299999996,34,26296.337784000003 -4506,2014,4506,42.568715,-74.688701,0,0 -4507,2017,4507,43.75341,-76.180152,0,0 -4508,2015,4508,43.865812,-75.288828,3,0 -4509,2014,4509,43.66779400000001,-76.113861,9,26672.407669 -4510,2014,4510,43.185434,-76.772919,0,0 -4511,2017,4511,44.346139,-75.81438900000008,42,16426.763593 -4512,2013,4512,42.226244,-73.855514,0,0 -4513,2019,4513,42.214218,-79.516137,0,0 -4515,2016,4515,42.25003399999997,-75.20804600000007,44,0 -4521,2016,4521,42.131321,-77.038011,0,0 -4524,2019,4524,42.032398999999955,-74.89531799999997,46,21579.066427 -4525,2013,4525,42.604017,-78.06913,0,0 -4526,2016,4526,42.908319,-78.023643,0,0 -4527,2013,4527,43.923131000000005,-75.628561,17,17493.551019 -4528,2016,4528,44.492269000000015,-75.20232699999998,52,11308.894994999999 -4529,2016,4529,40.771073,-73.168018,15,0 -4530,2015,4530,42.928167,-77.255382,0,0 -4531,2016,4531,43.73930999999998,-75.51774999999999,39,8771.992923 -4532,2018,4532,41.559347,-73.605888,0,0 -4533,2016,4533,43.242223,-77.26733,0,0 -4534,2013,4534,42.499710000000036,-74.40578700000005,57,13773.734988 -4535,2014,4535,43.33782999999996,-73.79979199999991,51,12203.83115 -4536,2013,4536,42.087152,-73.947213,0,0 -4538,2019,4538,42.265527,-77.39575,0,0 -4540,2019,4540,41.91569499999997,-75.16157900000005,42,27067.789833999996 -4541,2013,4541,44.21022800000001,-73.59494999999997,28,10590.036623 -4542,2017,4542,44.063057999999984,-75.280976,26,26871.369151 -4543,2016,4543,42.677671000000004,-75.446424,22,11746.56203 -4544,2014,4544,41.35164199999998,-74.05331099999995,22,37918.053565999995 -4545,2016,4545,42.629276,-74.986715,20,16345.884263999998 -4546,2014,4546,43.656141,-74.863738,33,11709.978826 -4547,2013,4547,44.418244999999985,-74.18328500000004,60,3055.132923 -4548,2016,4548,42.917045,-76.911048,0,0 -4550,2017,4550,43.072795,-78.049366,0,0 -4551,2015,4551,43.91540899999998,-74.67441899999996,22,0 -4552,2014,4552,42.673245999999985,-77.26477,17,1186.964371 -4553,2014,4553,42.86260599999998,-78.21457599999997,39,8742.563961999998 -4555,2013,4555,42.51869899999998,-77.94820200000002,23,7098.374951999999 -4556,2017,4556,42.628153,-74.915914,0,0 -4557,2016,4557,42.852681,-78.209495,0,0 -4559,2018,4559,42.422967,-73.950865,0,0 -4560,2016,4560,42.812392000000024,-77.90740099999995,33,29689.826531 -4561,2014,4561,42.684727,-75.33576,0,0 -4562,2018,4562,42.083769,-79.670178,0,0 -4563,2016,4563,44.264224000000006,-75.139849,37,0 -4564,2013,4564,42.256479,-78.647949,0,0 -4565,2013,4565,42.90359899999999,-77.89952700000002,34,39711.102682000004 -4568,2019,4568,42.835035,-76.681129,0,0 -4570,2015,4570,43.883289999999995,-76.14488699999997,29,3659.9956770000003 -4571,2019,4571,44.907185999999946,-73.87733500000006,70,9799.842424000002 -4572,2018,4572,44.41223799999994,-74.63353300000006,55,9886.749614999999 -4574,2018,4574,43.305133000000005,-78.57697800000001,11,0 -4575,2018,4575,42.02888,-74.56802799999997,30,27771.736428 -4576,2018,4576,43.836131000000016,-73.55312499999995,28,10861.898728 -4578,2014,4578,41.63087900000003,-74.409713,28,36067.746696 -4581,2014,4581,42.365006,-76.945112,10,8334.462808999999 -4584,2015,4584,43.042152,-78.118385,0,0 -4586,2018,4586,44.382712999999974,-73.881762,110,17344.676296999995 -4589,2015,4589,42.45542500000004,-76.58830200000007,41,17859.481625 -4590,2018,4590,44.253211,-73.581002,0,0 -4591,2015,4591,42.746786000000014,-74.86464700000005,25,21133.272487999995 -4592,2017,4592,43.070841,-77.6766,0,0 -4596,2015,4596,42.38728100000002,-73.55492400000001,31,8701.063441999999 -4597,2013,4597,42.657404,-75.031713,0,0 -4598,2018,4598,42.609316999999976,-74.17681700000001,39,9922.289876 -4599,2015,4599,42.95540499999999,-74.54176000000004,54,29333.888884999997 -4601,2017,4601,42.314165,-76.881413,0,0 -4602,2016,4602,42.55309400000002,-74.130473,36,19303.363377 -4604,2016,4604,41.64112100000003,-74.347855,33,30607.254247 -4606,2017,4606,42.998297,-73.759526,0,0 -4607,2018,4607,42.70467499999997,-76.10990399999996,31,9714.371464 -4608,2013,4608,43.14134599999997,-74.50752300000003,32,38047.714963000006 -4609,2014,4609,42.812342,-78.625649,1,1218.846426 -4610,2016,4610,42.51385200000003,-76.79162200000009,53,17760.756453 -4611,2018,4611,43.161289000000004,-75.63433000000002,19,5150.184845 -4612,2013,4612,43.288356000000014,-75.67135800000004,50,33786.231625 -4613,2013,4613,43.136839999999985,-76.67231799999996,27,25553.144474999997 -4615,2013,4615,42.605970999999975,-74.52479600000001,37,25046.210494999996 -4618,2017,4618,43.199161,-77.076548,37,10315.05528 -4619,2019,4619,43.73202399999998,-74.265377,35,0 -4620,2013,4620,41.069979999999994,-72.3259,9,9501.39951 -4621,2019,4621,44.715159,-74.165292,2,0 -4622,2019,4622,42.690486,-73.988573,0,0 -4623,2016,4623,42.128163999999984,-73.83581499999994,51,38080.046507 -4624,2013,4624,43.264676,-76.06453400000005,32,15729.56855 -4626,2017,4626,41.27304200000001,-74.38735399999997,28,18082.747498 -4627,2016,4627,42.967807,-76.524317,0,0 -4630,2013,4630,43.80871900000005,-74.98311699999991,46,7974.513829000001 -4632,2015,4632,43.37759,-76.635719,4,151.68697200000003 -4633,2015,4633,42.77777,-73.95274,0,0 -4634,2018,4634,42.781046999999965,-76.2311330000001,48,11906.462378 -4635,2019,4635,43.68651500000003,-73.44257999999995,37,14917.531630000003 -4636,2014,4636,42.702308,-76.576314,0,0 -4637,2017,4637,43.058118,-73.56659,0,0 -4638,2016,4638,42.780784,-78.707777,0,0 -4640,2017,4640,42.47923999999998,-74.09445799999999,60,17596.963879 -4641,2013,4641,43.285258,-75.327728,0,0 -4643,2019,4643,43.058179,-78.043358,0,0 -4644,2018,4644,42.72016099999998,-75.35547399999997,38,27896.544979000006 -4645,2015,4645,42.47375899999998,-74.16459,35,6656.7594770000005 -4646,2014,4646,43.213676000000014,-78.95022100000004,31,8321.040375 -4647,2013,4647,43.463641000000024,-75.35764999999998,24,5553.2249520000005 -4648,2013,4648,42.864202,-78.451288,0,0 -4649,2017,4649,43.74473099999994,-73.89783500000004,66,0 -4650,2015,4650,44.59652099999998,-74.42782900000009,54,2301.5959479999997 -4651,2013,4651,44.21171499999997,-75.443945,43,13630.282947000003 -4652,2013,4652,42.671992,-74.47349,0,0 -4653,2016,4653,44.09614400000005,-73.58676299999995,63,11525.072273999998 -4654,2013,4654,42.187621999999976,-75.31332299999998,39,12592.062276 -4657,2013,4657,43.030218999999995,-75.13029400000003,41,11934.919156 -4658,2016,4658,43.872358000000006,-75.06756799999998,10,3026.50239 -4659,2016,4659,42.43337699999999,-75.96426199999998,30,0 -4660,2017,4660,42.392187,-76.845449,1,0 -4661,2013,4661,43.715345,-76.1145,0,0 -4662,2014,4662,44.434104,-75.64117799999998,6,323.38441 -4663,2016,4663,42.711868,-78.631837,6,3963.8221439999998 -4664,2019,4664,44.172083,-76.242915,0,0 -4665,2015,4665,42.171067,-76.17850299999999,12,0 -4666,2019,4666,43.111248999999994,-78.15861700000002,10,10345.772606999999 -4667,2019,4667,43.249798000000034,-75.172231,38,25982.526835999994 -4668,2017,4668,40.855504,-73.226226,22,23850.637826 -4669,2013,4669,43.299598000000046,-74.59411700000007,44,23248.261231 -4670,2016,4670,42.94192300000002,-74.94144300000002,37,25395.254630000003 -4671,2019,4671,41.960599,-73.715672,0,0 -4672,2013,4672,43.565407999999984,-75.13385299999996,40,26627.117384999998 -4673,2013,4673,43.368058,-76.377475,0,0 -4674,2016,4674,43.236265999999915,-75.57321099999992,95,26156.536118000004 -4675,2013,4675,43.92723,-74.68696700000005,36,11987.447951999999 -4676,2019,4676,42.800757,-77.973636,0,0 -4677,2013,4677,42.04859899999999,-79.09191199999995,50,13239.327814999999 -4679,2014,4679,44.80771699999996,-74.06494900000004,45,18284.375394 -4680,2014,4680,42.01094199999997,-73.82149499999998,33,19183.100972 -4681,2016,4681,44.90897,-74.519252,0,0 -4683,2019,4683,42.332802,-77.585522,0,0 -4684,2013,4684,41.50840399999999,-73.58182599999999,10,1910.0690659999996 -4685,2019,4685,44.06718099999993,-73.93148099999988,93,4623.021440999999 -4687,2017,4687,43.76316999999997,-73.44807399999995,28,19320.252257 -4688,2013,4688,42.94506299999999,-74.40490700000004,26,7897.874494000001 -4689,2016,4689,41.88944799999998,-74.32691999999999,34,33119.659674 -4690,2019,4690,42.826706,-73.624653,1,195.584106 -4691,2019,4691,44.84161700000003,-74.65144000000006,44,25153.268189 -4693,2018,4693,44.18251199999998,-74.21305399999989,101,10796.241753000002 -4694,2014,4694,42.622073,-73.44887100000004,54,27920.693634 -4695,2017,4695,43.137368,-78.895697,0,0 -4696,2019,4696,42.76988200000004,-74.74993799999996,40,17383.474121 -4697,2018,4697,42.119899,-76.250051,0,0 -4698,2018,4698,42.274004999999995,-75.83540500000001,14,0 -4700,2018,4700,42.97587,-76.821068,0,0 -4701,2014,4701,42.68586000000003,-77.435469,33,19315.218985 -4702,2014,4702,42.430528,-76.729407,0,0 -4704,2017,4704,43.62032399999999,-73.68358200000003,63,27146.829918000003 -4705,2019,4705,42.82991600000002,-78.32410099999997,24,6456.732258000001 -4706,2014,4706,44.76582699999997,-74.77536800000004,48,0 -4708,2013,4708,42.75283,-73.684055,0,0 -4709,2015,4709,42.983683,-75.827692,2,56.202576 -4710,2015,4710,43.55792300000001,-75.70508999999996,53,25235.846705000007 -4712,2017,4712,42.42656499999999,-78.25084599999995,28,0 -4713,2016,4713,43.37751400000001,-74.63333899999999,47,20496.757325000002 -4714,2013,4714,44.53839100000003,-73.74692300000004,42,8035.177965999999 -4715,2015,4715,43.33239099999999,-73.27684300000006,35,35999.018801 -4716,2017,4716,44.04024200000001,-74.14109400000002,28,0 -4717,2017,4717,44.178361,-76.008573,0,0 -4718,2014,4718,44.78592100000001,-74.230593,25,14450.791036999999 -4719,2015,4719,42.939009,-75.119272,0,0 -4720,2018,4720,44.33468300000002,-74.494232,36,14672.531969000003 -4722,2015,4722,41.83617499999997,-74.76968399999998,46,19774.226097 -4723,2016,4723,42.931706,-76.174851,0,0 -4724,2013,4724,44.267,-76.14554299999999,8,9079.247793999999 -4725,2018,4725,42.835158,-74.369512,0,0 -4726,2017,4726,42.70511800000002,-75.74503800000001,50,23519.878848 -4729,2013,4729,41.255671,-74.15847000000002,23,13072.143935 -4730,2013,4730,42.695338,-78.48531099999995,30,24312.626277 -4731,2015,4731,42.849298,-73.319506,0,0 -4733,2014,4733,41.00295400000003,-72.153588,40,13229.263407 -4737,2014,4737,44.36007500000003,-74.00881399999997,35,22869.749678 -4738,2018,4738,42.36597799999999,-78.650561,7,1145.2030300000001 -4739,2013,4739,42.400771,-76.800073,0,0 -4740,2017,4740,42.311364,-77.972305,0,0 -4741,2018,4741,41.502717,-73.814812,0,0 -4742,2016,4742,42.396388,-77.260096,0,0 -4743,2017,4743,44.15913,-75.731874,0,0 -4744,2019,4744,43.93626500000003,-74.897837,49,14050.232328 -4745,2015,4745,42.29855599999999,-78.570156,21,6336.599249999999 -4746,2013,4746,43.007646,-78.356715,0,0 -4747,2014,4747,44.392416000000004,-75.284034,12,67.834069 -4748,2018,4748,42.844391,-76.411073,0,0 -4749,2015,4749,44.833634999999994,-74.73967700000001,17,3416.9119920000003 -4751,2014,4751,43.70039999999996,-74.87058299999993,43,18851.984746000002 -4753,2014,4753,44.566196,-74.37411100000006,40,27966.94012 -4755,2014,4755,41.703516,-74.855964,0,0 -4756,2018,4756,42.172023999999986,-75.37547500000001,32,14656.808304999999 -4757,2018,4757,42.037753999999964,-77.58979700000006,41,26143.537982 -4758,2017,4758,43.91949500000005,-75.1727340000001,52,0 -4759,2016,4759,42.639779,-74.194691,0,0 -4760,2018,4760,43.37830299999998,-76.57736800000004,35,22141.007846 -4761,2019,4761,42.01859199999999,-74.40211600000012,115,10808.62841 -4762,2018,4762,42.378454,-77.320719,0,0 -4763,2015,4763,43.69228000000005,-74.97192899999999,67,12276.621303000002 -4764,2014,4764,43.13075000000001,-75.02155899999998,32,5930.871023 -4765,2016,4765,43.003846,-74.708507,0,0 -4766,2019,4766,42.039838,-75.2324,0,0 -4767,2013,4767,42.48244,-78.828385,0,0 -4768,2016,4768,43.20622000000002,-78.42688100000004,26,8504.738774 -4769,2017,4769,42.63865,-78.115647,0,0 -4770,2013,4770,43.35717599999999,-74.37127400000004,31,37304.082457 -4772,2015,4772,43.207739,-78.394068,0,0 -4773,2013,4773,40.714191,-73.26615,3,2123.849603 -4774,2018,4774,42.456251,-74.82877699999999,33,16118.505817000001 -4775,2016,4775,44.280353000000034,-75.33788999999996,36,6611.092719999999 -4776,2016,4776,43.171949,-77.502934,0,0 -4777,2017,4777,42.595261,-76.395984,0,0 -4778,2013,4778,43.31072600000001,-78.507206,26,6850.086241999999 -4779,2014,4779,43.683241,-75.22842600000003,56,16910.723476999996 -4780,2014,4780,42.49007000000004,-74.89611399999995,30,25698.596186 -4781,2018,4781,41.599633999999995,-74.14991999999998,13,0 -4782,2019,4782,42.20955499999998,-75.486895,37,16992.090847000003 -4783,2013,4783,42.829035,-78.128344,0,0 -4784,2018,4784,43.63421199999998,-75.32460399999995,58,16365.760773 -4785,2013,4785,43.19844899999999,-74.127569,19,3807.06004 -4787,2015,4787,41.77671299999998,-74.95813399999999,22,4929.178858 -4788,2018,4788,43.130647,-73.815264,0,0 -4790,2018,4790,41.331775,-74.489879,0,0 -4792,2013,4792,42.288530999999985,-75.78217700000003,34,20602.042679999995 -4793,2016,4793,43.418059999999976,-75.68599799999996,44,33005.32403999999 -4795,2018,4795,43.90629,-75.662627,0,0 -4796,2018,4796,40.688314,-73.714252,0,0 -4798,2014,4798,44.231365000000046,-75.36768600000003,54,16354.403335 -4799,2017,4799,42.842673,-74.484513,0,0 -4800,2015,4800,42.874685,-77.815446,0,0 -4801,2018,4801,40.831966,-72.718899,0,0 -4802,2013,4802,43.25074999999997,-74.98435500000004,47,19227.009695999997 -4803,2017,4803,42.17259800000001,-78.04049800000003,27,10463.130758 -4805,2017,4805,43.765022999999985,-74.33690399999996,34,15874.227745999999 -4806,2018,4806,42.865354,-74.913588,0,0 -4807,2017,4807,43.72571599999997,-76.007607,40,13713.798626 -4808,2015,4808,42.800611,-73.419824,8,6239.877049000001 -4809,2013,4809,42.46781700000002,-73.85272500000005,61,22128.310516999994 -4810,2015,4810,42.095158000000005,-78.26343399999998,33,29834.469731999998 -4811,2014,4811,43.72376000000003,-74.659533,44,31339.880955 -4813,2016,4813,44.004799,-75.887322,0,0 -4814,2015,4814,42.46092600000007,-77.25282200000011,84,34350.099176 -4815,2013,4815,41.819266999999996,-73.60520599999997,27,22945.720612000005 -4817,2016,4817,42.936482,-77.080107,0,0 -4818,2018,4818,43.46855299999998,-76.334558,41,18969.207039 -4819,2015,4819,41.6263,-74.218211,2,141.11985099999998 -4820,2015,4820,41.867201999999956,-74.61704099999999,48,27323.887033999996 -4821,2013,4821,43.597324000000015,-75.46869199999996,34,27856.082112999997 -4823,2013,4823,42.762614,-77.987028,0,0 -4824,2015,4824,44.237339,-76.04132800000001,6,1631.3731010000001 -4825,2016,4825,42.086177,-76.71356,0,0 -4826,2016,4826,43.35718100000003,-74.32125499999994,47,28300.267353999996 -4827,2014,4827,44.030420999999976,-74.78867600000002,50,14245.234466000002 -4828,2014,4828,43.973627,-75.90872,0,0 -4829,2015,4829,43.039172,-75.59101299999999,10,9066.454533 -4830,2018,4830,43.96286400000004,-74.87872400000003,45,18270.135735 -4834,2014,4834,44.938014,-74.16562799999998,10,2277.142416 -4835,2015,4835,42.482265,-78.79087,0,0 -4836,2019,4836,43.286757,-76.811242,26,30604.945402 -4837,2015,4837,42.14081399999999,-74.639372,14,1703.0475649999998 -4841,2015,4841,44.24818999999996,-75.882107,64,12776.305315999998 -4842,2019,4842,43.88274900000005,-74.19395799999995,44,23071.264476999997 -4843,2014,4843,42.305936,-75.384015,0,0 -4844,2019,4844,42.77975700000001,-76.01731300000003,27,7984.018916999999 -4845,2014,4845,41.220329,-73.60015700000002,24,18333.752495 -4846,2018,4846,42.530210000000004,-75.852572,23,24767.461025 -4847,2015,4847,40.709186,-73.518339,0,0 -4850,2018,4850,44.54263100000002,-75.04986999999998,37,10204.474476 -4851,2013,4851,44.406876000000004,-74.01297900000006,40,35751.06678000001 -4852,2018,4852,42.929587,-76.894964,0,0 -4853,2017,4853,42.39726599999996,-78.42003999999999,59,31084.316187 -4854,2016,4854,43.761320999999974,-74.58794500000002,30,27167.739489 -4855,2018,4855,43.975305999999954,-75.6434499999999,74,19238.56927 -4856,2017,4856,41.767164,-73.733804,0,0 -4859,2018,4859,41.600169999999984,-74.84692200000006,51,15284.937731 -4860,2013,4860,44.634874,-74.969686,0,0 -4861,2019,4861,41.32728899999999,-73.65484999999998,16,22929.620605000004 -4863,2014,4863,42.91001000000001,-73.35363900000002,13,12237.661434999998 -4864,2018,4864,42.742582,-77.179275,0,0 -4865,2017,4865,43.53730300000002,-73.61265700000003,54,45483.064941000004 -4866,2016,4866,42.567332,-76.289387,0,0 -4867,2017,4867,42.996068,-77.540902,0,0 -4868,2015,4868,43.026923,-73.392857,0,0 -4869,2016,4869,42.83348,-75.078677,2,69.71000599999999 -4870,2015,4870,44.185161000000065,-73.89449799999984,125,10639.165787000004 -4871,2016,4871,42.78359900000004,-78.76456499999996,48,12250.960695000005 -4874,2019,4874,40.985364,-73.844186,0,0 -4877,2019,4877,42.966508,-76.399425,2,496.317109 -4878,2014,4878,43.276569,-78.180382,0,0 -4881,2016,4881,42.385703,-74.191226,0,0 -4882,2018,4882,42.031686,-73.652167,0,0 -4883,2018,4883,44.09891100000002,-76.31982500000002,34,7735.510909 -4884,2019,4884,41.41365,-74.394986,2,348.10458800000004 -4885,2018,4885,43.100911,-75.135,0,0 -4886,2019,4886,43.66405099999999,-73.99143700000003,50,20470.132405000004 -4887,2016,4887,42.18311899999999,-77.915839,12,12913.708159 -4888,2014,4888,42.04163200000001,-74.863658,28,0 -4890,2013,4890,41.53934699999996,-75.00052599999992,54,21371.649037000003 -4891,2018,4891,42.810018,-74.770483,0,0 -4893,2018,4893,43.17298400000003,-77.78855400000006,42,15149.234662 -4894,2014,4894,43.170825,-76.62273899999997,27,0 -4895,2015,4895,43.177122,-75.529265,2,412.44006 -4896,2017,4896,42.894061,-77.420459,0,0 -4897,2017,4897,42.20144200000001,-76.11271899999998,18,4020.0083790000003 -4899,2018,4899,42.898662,-74.547033,0,0 -4900,2015,4900,41.92936000000002,-74.36125399999996,35,29138.684229000002 -4902,2014,4902,41.87743799999999,-73.879529,31,16533.698024999998 -4903,2016,4903,42.677905,-76.73181999999998,42,31003.718305 -4904,2016,4904,42.746386,-76.819086,1,0 -4905,2013,4905,43.27436,-73.48513200000001,10,4907.312197 -4906,2015,4906,42.19025700000002,-74.20311899999989,60,18367.895621 -4909,2019,4909,43.742402,-75.53334,0,0 -4912,2019,4912,43.523963999999985,-75.9212,11,0 -4913,2018,4913,43.25384200000001,-74.487092,33,21939.456128 -4915,2019,4915,44.24774,-75.629327,5,4401.869635 -4916,2013,4916,42.737235,-78.906312,0,0 -4917,2018,4917,42.36324599999994,-75.93626200000007,57,24864.320066000004 -4918,2014,4918,42.09309699999996,-76.58704500000012,60,19482.161480000002 -4919,2016,4919,42.080334,-78.31892100000003,40,34452.718754 -4920,2013,4920,43.32820899999998,-75.99263500000005,54,0 -4921,2017,4921,43.658875999999985,-75.7066800000001,51,15023.958591999999 -4922,2015,4922,43.074612,-77.910684,0,0 -4923,2018,4923,43.865935999999984,-73.502473,11,0 -4925,2013,4925,44.16186599999997,-74.74237700000002,54,18781.296075000002 -4929,2015,4929,42.311332,-74.64092299999997,31,12965.839556999998 -4930,2017,4930,42.15104500000001,-75.40021900000006,37,20950.61015 -4933,2014,4933,43.80062700000003,-73.68449599999998,41,47485.012339 -4934,2019,4934,43.159536,-76.337494,0,0 -4935,2014,4935,41.808025999999984,-74.10298600000002,37,22767.131737 -4936,2017,4936,44.73639,-73.46558,0,0 -4937,2016,4937,44.031687000000026,-74.93077600000008,72,10479.12206 -4938,2017,4938,41.643269,-73.991655,0,0 -4939,2014,4939,44.58833899999997,-74.97746999999995,41,6423.503836999998 -4940,2015,4940,42.65381499999996,-75.30848699999991,45,26360.266041 -4941,2015,4941,41.143278,-74.133618,8,7669.795441 -4942,2018,4942,43.66845400000004,-74.73698,38,24732.921557000005 -4943,2016,4943,41.92716900000003,-74.186398,34,38401.204698 -4944,2015,4944,43.52084,-76.060538,0,0 -4945,2017,4945,41.917009,-73.75418399999992,50,9946.874908 -4946,2017,4946,42.361398,-78.833206,2,284.508383 -4948,2018,4948,43.334682,-73.647726,0,0 -4949,2015,4949,42.713074,-76.36097000000004,38,12053.344769 -4950,2016,4950,42.542984,-76.972147,0,0 -4953,2015,4953,43.622658,-74.30657900000004,42,25057.331354999995 -4954,2019,4954,42.52317300000002,-76.0011330000001,49,0 -4955,2017,4955,43.69860799999998,-75.46706700000003,51,54228.843107 -4956,2014,4956,42.214378,-77.38881599999996,26,11180.547797000001 -4959,2016,4959,42.791658,-75.895242,0,0 -4960,2017,4960,42.710071,-76.500337,0,0 -4961,2014,4961,44.939494999999994,-74.324875,20,8248.921355 -4962,2013,4962,43.447664999999994,-73.54146200000004,56,29837.289871 -4964,2016,4964,43.421268,-76.427426,0,0 -4967,2019,4967,44.34985099999996,-74.23338199999993,45,36547.554235 -4971,2014,4971,42.820518,-74.87041899999998,38,21136.655301000003 -4972,2018,4972,44.31111499999998,-74.84699899999998,46,7009.213354 -4974,2018,4974,43.144591,-77.2341,0,0 -4975,2014,4975,44.239701,-74.45181,0,0 -4976,2016,4976,44.60140900000001,-74.50902700000005,57,11119.233782000001 -4978,2013,4978,42.316023,-78.416472,0,0 -4979,2017,4979,43.64938400000002,-75.90732699999997,21,0 -4980,2014,4980,43.10071800000001,-78.44872399999998,10,1899.877944 -4981,2016,4981,43.27880900000002,-76.27993799999997,18,22017.429501 -4983,2018,4983,42.894522,-76.174554,0,0 -4986,2014,4986,43.17118899999999,-73.89532800000005,32,14220.864222 -4987,2018,4987,44.08623599999999,-73.539489,11,0 -4988,2014,4988,42.790564,-76.862171,0,0 -4989,2019,4989,43.815156,-75.38622299999999,20,0 -4990,2017,4990,42.24642,-79.740412,0,0 -4991,2019,4991,43.64849499999996,-73.62181999999996,58,20343.239841999995 -4992,2015,4992,42.426926999999985,-76.01054100000007,47,22652.166707999993 -4993,2018,4993,43.146176,-77.842228,0,0 -4995,2014,4995,40.638306,-73.63255,0,0 -4996,2015,4996,43.35501800000003,-76.50394700000007,62,45234.586446 -4998,2015,4998,43.18288700000002,-74.60170999999998,20,8884.248785 -4999,2015,4999,42.79406799999997,-74.70259699999998,47,28452.788784 -5000,2019,5000,44.960462,-74.15832499999999,44,12906.357492 -5002,2017,5002,41.23012900000001,-73.71787300000004,34,0 -5003,2019,5003,43.379906,-75.62989100000001,18,0 -5005,2013,5005,43.052993,-77.802873,5,2331.562694 -5006,2017,5006,40.76508,-73.835803,0,0 -5007,2019,5007,44.435428,-75.29501399999998,34,4886.991631 -5008,2019,5008,42.09069499999999,-78.38866499999996,22,25871.063166 -5009,2016,5009,42.926108,-78.887278,0,0 -5010,2019,5010,43.384455,-73.781063,0,0 -5011,2015,5011,43.141477,-76.995912,0,0 -5012,2014,5012,42.26640100000001,-74.15374599999997,58,22592.091935999997 -5013,2014,5013,44.06486500000004,-74.19184099999995,46,29168.512636 -5014,2014,5014,42.449487,-79.356179,0,0 -5017,2014,5017,43.178431000000025,-75.04487700000001,30,26392.024011 -5018,2018,5018,42.445621,-78.885011,0,0 -5019,2016,5019,41.69126899999998,-74.61458799999994,39,29919.528837 -5020,2013,5020,43.819212,-75.501709,10,3166.5689669999997 -5021,2018,5021,44.22013199999995,-73.97348500000015,85,18657.279739999998 -5023,2019,5023,41.547314000000014,-74.839818,15,13380.341592 -5025,2014,5025,41.97026200000002,-74.02014699999997,22,13874.557175000002 -5026,2019,5026,41.98597100000001,-74.94777999999998,40,32324.990700000002 -5027,2013,5027,41.79071199999999,-74.15079100000001,11,2761.029191 -5028,2019,5028,42.891975,-75.605987,0,0 -5029,2014,5029,43.613090999999976,-74.15397500000006,53,14247.016409999998 -5031,2013,5031,40.804741,-72.987743,0,0 -5032,2018,5032,42.111654,-75.935998,0,0 -5033,2017,5033,43.193446,-76.18523400000002,43,6039.153127 -5034,2018,5034,42.273083,-78.693235,0,0 -5035,2019,5035,42.466046,-75.36088900000003,29,18589.106906 -5036,2016,5036,42.079719999999966,-75.13456700000005,41,16616.236276000003 -5037,2014,5037,42.46873699999999,-77.42775700000001,14,12135.377193 -5038,2017,5038,44.28444799999992,-74.00207099999992,75,9151.439835000001 -5039,2016,5039,44.88248,-74.372179,0,0 -5040,2014,5040,42.070366,-77.61173,0,0 -5041,2018,5041,43.321028,-78.076451,0,0 -5042,2013,5042,42.40819699999998,-75.29759800000006,46,24640.209769999998 -5044,2013,5044,43.20608999999997,-74.295037,40,7274.357662 -5045,2017,5045,43.27933299999998,-74.28824800000002,26,25940.063211999994 -5046,2014,5046,44.167498,-75.599832,0,0 -5048,2018,5048,42.89979799999995,-75.78725300000016,96,15140.45826 -5050,2013,5050,43.24401,-73.89554500000001,18,9212.253866000001 -5051,2017,5051,43.113965,-74.07459400000003,41,11778.604914 -5052,2019,5052,40.703706,-73.459974,0,0 -5053,2017,5053,42.317871999999994,-78.142163,21,0 -5054,2018,5054,42.78492099999996,-75.31829400000001,34,19605.028853000003 -5055,2018,5055,42.91992099999999,-74.902744,38,4964.130939 -5057,2014,5057,42.52271100000002,-74.3238460000001,49,16334.901525000001 -5058,2016,5058,42.454557000000015,-78.298945,33,938.469312 -5059,2018,5059,42.237951,-76.778564,0,0 -5060,2019,5060,42.646548,-75.606338,0,0 -5064,2018,5064,41.747283,-74.58658199999995,30,26581.966014999998 -5065,2013,5065,42.541647999999945,-75.7972759999999,53,0 -5067,2014,5067,43.291082999999986,-78.35727399999998,22,2557.4358780000002 -5069,2018,5069,42.416799,-74.519588,0,0 -5071,2017,5071,42.208064,-74.470607,0,0 -5072,2017,5072,44.025179,-75.641601,4,3.916496 -5075,2018,5075,42.62055500000001,-73.60420799999999,12,2332.536599 -5076,2014,5076,42.465147,-75.303447,18,4671.892924 -5078,2018,5078,42.1471,-79.084612,0,0 -5081,2019,5081,44.01799899999998,-73.60878000000001,41,16841.318123 -5082,2013,5082,42.65801599999999,-76.442274,14,0 -5083,2017,5083,44.612461,-75.394061,0,0 -5084,2013,5084,42.222899,-74.396965,0,0 -5085,2013,5085,43.890066999999995,-73.45145000000001,11,4516.43398 -5086,2014,5086,44.829322,-74.23566,0,0 -5087,2013,5087,42.41452500000002,-76.57149200000002,22,16378.419570000002 -5089,2014,5089,42.912188,-74.247043,0,0 -5090,2013,5090,44.362296999999955,-75.33723599999996,45,15875.536100999998 -5093,2016,5093,41.289604,-73.907473,0,0 -5095,2017,5095,42.933779,-75.646256,0,0 -5097,2016,5097,43.30946,-78.299193,0,0 -5098,2017,5098,42.480188,-78.072881,0,0 -5099,2019,5099,44.20684600000003,-74.347984,39,28135.111376 -5101,2019,5101,42.44176300000002,-74.12112799999991,45,16655.951906000002 -5102,2019,5102,41.71705099999999,-73.69864499999996,28,45563.088998 -5103,2015,5103,43.323314,-74.100944,0,0 -5104,2014,5104,42.953612,-76.929237,0,0 -5105,2019,5105,42.391273,-74.832918,0,0 -5106,2018,5106,42.04772499999998,-74.007752,35,21028.934232 -5107,2014,5107,43.937678,-75.559234,0,0 -5109,2015,5109,42.210788999999984,-76.85873199999997,22,19843.893893 -5110,2015,5110,40.65819,-73.760885,0,0 -5111,2015,5111,44.65390299999994,-74.52119799999997,60,0 -5112,2013,5112,43.42243599999999,-76.14861299999998,14,1905.0338939999997 -5113,2013,5113,41.241581,-73.980252,0,0 -5114,2018,5114,42.678564999999985,-78.58716700000004,48,19105.961658999997 -5115,2013,5115,44.54619099999996,-74.65657000000007,59,0 -5118,2017,5118,43.368031999999985,-74.32083700000011,63,35831.957737 -5120,2016,5120,42.199317000000015,-79.13113100000007,40,35216.053394 -5122,2014,5122,45.000977000000006,-73.87265100000002,59,19981.435088000002 -5123,2014,5123,43.452041,-74.28647199999999,30,21410.173733 -5124,2015,5124,43.09412199999998,-73.53248800000004,28,14518.444255000002 -5126,2019,5126,43.26145199999999,-74.65884800000002,51,25461.916892 -5127,2015,5127,44.126610999999976,-74.15502199999995,50,25419.270654 -5128,2017,5128,42.153894,-77.892627,0,0 -5129,2014,5129,42.72125899999996,-74.935571,62,62809.157342000006 -5131,2018,5131,44.51222600000003,-75.66425600000001,23,3910.3032949999997 -5132,2019,5132,42.01012100000002,-75.28400300000006,39,40858.85320799999 -5133,2014,5133,42.44276300000001,-74.930663,11,14156.734345 -5134,2017,5134,43.185432,-78.163071,0,0 -5135,2013,5135,42.65630400000003,-73.40864700000006,41,14144.904962000002 -5136,2016,5136,43.199939,-75.654604,1,571.362959 -5137,2016,5137,44.36362299999998,-73.63107499999995,34,23568.762365000002 -5138,2017,5138,43.152045,-77.654342,0,0 -5140,2017,5140,43.60186599999999,-75.178087,19,12972.165796 -5142,2014,5142,44.83872800000007,-73.81653000000004,88,11344.586157000003 -5144,2017,5144,41.632163,-73.61749800000003,27,34558.143511 -5145,2013,5145,42.512584,-74.18296199999999,11,8861.338122 -5146,2016,5146,42.49734999999999,-75.045099,6,2466.768317 -5147,2015,5147,43.72271,-74.41715000000006,38,13812.698789000002 -5148,2013,5148,43.57098300000002,-76.061682,48,17644.16145 -5149,2019,5149,42.30838500000002,-76.34619900000003,32,14490.386878 -5150,2017,5150,42.13420300000001,-77.383057,31,4834.303285 -5151,2016,5151,42.63389600000002,-74.22904599999997,25,7955.680894 -5153,2018,5153,43.137059,-77.064143,0,0 -5154,2015,5154,44.725435,-73.658512,0,0 -5155,2017,5155,44.13403500000002,-74.23987799999999,38,11497.283209999998 -5156,2018,5156,42.668345,-76.986196,0,0 -5158,2017,5158,42.25580000000001,-77.280308,13,10265.113533 -5160,2019,5160,42.198094000000026,-79.17714299999992,46,6574.390956 -5161,2015,5161,42.500069000000025,-74.01721300000003,25,11270.339417000001 -5162,2014,5162,43.489183,-75.68575,0,0 -5163,2015,5163,42.788336,-75.608882,0,0 -5164,2016,5164,42.462558,-77.019367,0,0 -5165,2017,5165,42.46305600000004,-75.55338399999995,46,39216.528958 -5166,2014,5166,42.264120000000034,-78.08778699999996,46,23916.984411000005 -5167,2013,5167,42.60662899999996,-76.63663499999998,34,29698.831965999998 -5168,2016,5168,41.60807,-74.915708,24,24691.150493 -5169,2014,5169,43.024309,-77.06317,0,0 -5170,2013,5170,42.78835,-77.077311,0,0 -5172,2013,5172,42.879107,-77.557387,0,0 -5173,2014,5173,44.57925900000004,-73.68635099999996,58,24354.222814999997 -5175,2013,5175,41.929709,-74.59805899999999,20,13604.356713 -5176,2014,5176,42.74085999999995,-78.49986200000002,43,23713.856401 -5177,2014,5177,42.16698700000002,-79.33523999999991,47,23421.120498999997 -5178,2016,5178,42.441629000000006,-74.53006000000002,19,0 -5179,2019,5179,42.409251999999974,-76.08030300000006,43,4555.394986 -5182,2017,5182,40.94590999999997,-72.29496400000005,49,11109.72097 -5183,2013,5183,42.04351300000002,-76.30318600000005,38,6964.808927 -5184,2018,5184,41.505631,-73.67562199999999,11,6198.313468 -5187,2014,5187,44.548263,-75.52187099999995,57,6725.682348999998 -5188,2013,5188,42.561072,-75.973788,0,0 -5190,2019,5190,43.30418699999998,-75.91366999999995,29,16543.503982 -5191,2013,5191,42.20080200000002,-77.60559800000007,41,6221.2417080000005 -5192,2013,5192,42.952068,-78.616198,0,0 -5194,2013,5194,42.22629600000003,-78.13588100000004,46,14001.34195 -5195,2018,5195,40.849266,-73.55481,5,0 -5196,2016,5196,43.643728999999965,-73.55049099999998,37,25178.732101999998 -5199,2017,5199,42.218677999999976,-78.73072599999998,31,27321.548045999996 -5200,2017,5200,42.285725,-73.552976,0,0 -5201,2019,5201,41.407363000000004,-73.554302,21,10109.569456000001 -5203,2016,5203,42.391305,-74.718767,0,0 -5208,2017,5208,42.809469000000014,-73.35981999999998,37,31873.24287 -5209,2016,5209,43.30302699999998,-75.77761599999997,42,0 -5210,2013,5210,41.37239799999999,-73.881893,17,15922.242064 -5212,2013,5212,44.61557900000002,-74.61058099999994,43,1525.928556 -5213,2017,5213,44.50008699999994,-73.83417499999997,60,9724.383262 -5214,2017,5214,43.75293199999999,-73.61068800000002,70,22517.640828999996 -5217,2013,5217,43.747114,-74.46959999999994,34,29941.369399999996 -5218,2015,5218,44.016916000000016,-73.68624899999996,51,18575.078018 -5219,2015,5219,43.832872,-76.159205,0,0 -5221,2014,5221,42.989571,-78.518978,0,0 -5222,2014,5222,42.771531999999986,-74.37439399999997,20,2490.2072740000003 -5223,2015,5223,42.56235900000001,-73.41389400000001,13,2529.769033 -5224,2014,5224,44.21457600000003,-74.12085800000007,39,24853.054829 -5226,2018,5226,42.26812999999997,-77.919081,52,12450.948109 -5227,2017,5227,42.467559,-75.421976,0,0 -5229,2018,5229,43.22696,-78.803005,0,0 -5230,2017,5230,42.23620899999997,-78.20071099999997,48,26841.071250999998 -5232,2015,5232,42.22606900000003,-75.10372700000002,37,18736.236194000005 -5233,2014,5233,42.70552799999999,-77.78916699999996,24,0 -5237,2014,5237,42.22642999999999,-76.472448,34,11016.652245999998 -5238,2019,5238,42.849221,-77.435678,0,0 -5240,2016,5240,43.144635,-78.019986,0,0 -5241,2019,5241,40.764899,-73.610795,0,0 -5242,2014,5242,43.204488,-75.704004,2,0 -5244,2013,5244,43.637406,-75.991901,0,0 -5245,2013,5245,44.196597000000025,-75.073189,33,10144.78053 -5246,2015,5246,41.42790200000001,-74.50244199999999,20,20209.925336 -5250,2016,5250,42.215165000000006,-74.553205,6,14247.892004000001 -5251,2019,5251,42.62949099999998,-75.74418100000001,41,18784.964645000004 -5252,2014,5252,40.6938,-73.795762,0,0 -5253,2018,5253,42.256487000000064,-74.10165300000003,60,14436.434299 -5254,2018,5254,42.094301,-77.71229,0,0 -5255,2018,5255,42.781182,-76.58839,0,0 -5256,2019,5256,42.56746599999998,-75.068449,39,32321.522404000003 -5257,2014,5257,41.523052,-74.303569,9,3341.3583790000002 -5259,2013,5259,40.983016,-72.179645,0,0 -5260,2015,5260,43.218259,-76.752054,0,0 -5261,2013,5261,43.37539599999996,-75.46134399999994,55,36492.45049799999 -5262,2015,5262,42.430174000000044,-74.41848899999998,68,45818.360807000005 -5264,2013,5264,42.550987,-78.270126,0,0 -5265,2018,5265,44.956839,-74.420242,0,0 -5266,2013,5266,43.227396,-73.728435,36,21915.205842000003 -5268,2017,5268,42.944103,-75.041787,0,0 -5269,2013,5269,42.147289,-77.978257,0,0 -5270,2017,5270,42.515355,-77.29924699999995,44,22888.340034000004 -5271,2013,5271,44.557354000000004,-74.07329699999997,24,4714.326033 -5272,2018,5272,42.82826100000007,-78.14660300000007,62,36815.754252 -5273,2014,5273,44.809976,-73.69996300000001,13,1322.142694 -5274,2013,5274,43.044883,-75.307856,3,0 -5276,2013,5276,42.85052399999997,-74.12684900000005,36,18724.520065 -5277,2014,5277,42.814446000000004,-76.166428,30,29398.209443999996 -5278,2013,5278,44.853117,-73.882863,0,0 -5279,2016,5279,43.46760399999998,-73.80254000000005,55,22551.238232 -5280,2019,5280,43.066724000000015,-75.46752199999996,35,9788.926674 -5281,2017,5281,43.262741,-77.055475,2,493.418708 -5282,2015,5282,43.07152999999999,-74.50589799999993,42,30803.154029999998 -5284,2015,5284,42.229862,-76.347443,0,0 -5287,2015,5287,42.16923100000003,-74.35804800000004,44,25926.585601999996 -5289,2017,5289,41.670612,-74.270203,0,0 -5294,2019,5294,41.353927,-73.736667,5,2157.4479140000003 -5295,2015,5295,42.59558799999996,-74.61898800000004,48,29917.568713000004 -5298,2014,5298,43.226445000000005,-76.23985199999998,29,0 -5299,2017,5299,42.536465,-75.349227,0,0 -5303,2013,5303,43.264925,-78.740654,0,0 -5304,2014,5304,44.563918999999956,-74.79214499999993,40,14295.911768999998 -5305,2019,5305,42.766398,-76.025265,0,0 -5307,2013,5307,44.50005800000001,-75.244636,13,3752.055871 -5308,2016,5308,44.078743000000024,-74.67225700000006,49,13404.320724 -5310,2019,5310,43.26253799999998,-73.70714,25,15742.327208 -5312,2014,5312,42.167191,-73.68001000000001,14,2788.5302570000003 -5313,2019,5313,43.237556,-78.214576,0,0 -5314,2014,5314,43.14480600000001,-74.24881699999997,24,14372.014121 -5316,2013,5316,43.040806,-77.470655,0,0 -5317,2016,5317,44.40211399999999,-74.31462300000008,45,20139.280360999997 -5318,2016,5318,44.83410400000002,-74.5627760000001,53,8551.332103 -5319,2018,5319,44.25086400000004,-75.24775499999998,59,14586.994444999997 -5320,2018,5320,43.721917,-75.461437,0,0 -5321,2014,5321,42.832599000000016,-76.27257499999997,36,6623.037772000001 -5323,2013,5323,44.983456,-74.53223,0,0 -5324,2017,5324,42.51247800000005,-79.22764499999995,57,17707.503306000002 -5325,2015,5325,42.527969,-78.98071,0,0 -5326,2018,5326,43.222127,-74.31748599999996,30,38392.596247 -5327,2016,5327,42.37916700000003,-77.41093700000006,32,27181.732572999998 -5328,2018,5328,42.365603,-78.759448,0,0 -5329,2013,5329,42.466777999999984,-75.85508599999996,62,26373.786957 -5330,2013,5330,43.58747300000003,-75.29873700000005,42,23903.291131 -5331,2014,5331,42.429188999999965,-75.836452,45,34336.059436999996 -5333,2017,5333,44.864242999999995,-74.71577699999993,47,24088.544501999993 -5334,2017,5334,43.09223899999998,-76.789937,18,12094.467950999999 -5336,2019,5336,43.94245700000003,-74.35995099999995,35,24562.902518000003 -5337,2018,5337,43.28152799999999,-76.63996799999997,47,24156.281135999994 -5339,2017,5339,40.920698,-73.888941,0,0 -5340,2013,5340,44.03748099999998,-75.998247,23,1745.300709 -5342,2016,5342,44.906753999999985,-74.41722400000002,20,7228.609496000001 -5343,2016,5343,41.891092000000015,-74.4061660000001,54,34538.943706 -5345,2018,5345,43.13019100000002,-77.11560300000008,56,15048.873211000002 -5347,2016,5347,42.003381999999995,-78.86858099999996,26,35090.935591999994 -5348,2019,5348,42.09812200000003,-75.034408,52,33988.213713 -5349,2014,5349,42.549497,-76.660079,0,0 -5350,2016,5350,42.07666199999997,-78.67864400000003,24,34862.878715000006 -5351,2017,5351,44.25955700000001,-74.766049,32,7716.880696 -5352,2016,5352,44.227723999999995,-75.69908400000001,21,7016.593656000001 -5353,2019,5353,42.158543,-73.91440999999999,14,2515.6802369999996 -5354,2014,5354,42.556513,-73.945313,0,0 -5358,2017,5358,44.672070000000026,-74.44368900000005,44,22050.582560000003 -5359,2013,5359,41.99631,-73.99867799999997,34,14893.378826 -5360,2018,5360,41.176861,-73.698061,0,0 -5361,2017,5361,42.983013,-77.401924,0,0 -5362,2013,5362,44.27082000000001,-73.35052499999995,57,19744.111008 -5364,2016,5364,42.34281099999999,-74.4194530000001,74,31682.536706999992 -5365,2014,5365,42.005799,-77.847232,0,0 -5366,2015,5366,43.768329,-74.204303,54,13915.510897 -5367,2016,5367,44.11439700000004,-74.50352299999997,43,12436.202362 -5369,2014,5369,43.592767999999985,-75.09601100000006,43,27910.044738 -5370,2018,5370,42.62171600000001,-78.80759800000003,30,15549.11043 -5371,2019,5371,44.961312,-74.599284,0,0 -5373,2013,5373,41.155347,-73.853183,0,0 -5375,2015,5375,42.871048,-76.126119,0,0 -5379,2019,5379,44.760189000000025,-74.910891,38,3467.006093 -5381,2019,5381,42.00581,-78.259396,0,0 -5382,2015,5382,43.455973999999976,-74.965819,42,29162.910038 -5383,2017,5383,43.445218999999994,-74.57741399999996,54,13317.438323999997 -5384,2019,5384,43.406299,-74.186958,25,35631.462945 -5385,2017,5385,42.597987999999994,-74.37681999999997,36,22390.425551 -5386,2017,5386,44.03607400000003,-74.31885300000005,34,0 -5387,2014,5387,44.743923,-75.021818,0,0 -5388,2013,5388,40.675958,-73.812263,0,0 -5390,2017,5390,43.52898499999996,-74.15145399999994,57,20594.162786 -5391,2019,5391,42.474522,-76.568114,0,0 -5392,2015,5392,42.956166,-76.72740899999998,6,0 -5394,2017,5394,42.954836,-76.159018,0,0 -5396,2017,5396,44.113914000000015,-75.76194600000005,44,0 -5398,2016,5398,44.229547,-75.54384600000002,29,2790.333287 -5399,2016,5399,42.236457,-76.077481,0,0 -5402,2017,5402,41.78806499999999,-74.03461500000002,26,14703.185554000002 -5405,2018,5405,44.396641,-74.091142,0,0 -5406,2015,5406,41.43464,-74.830759,0,0 -5408,2013,5408,43.48563199999995,-75.15924200000005,46,0 -5410,2017,5410,42.122508999999994,-77.53671399999999,14,2240.2835769999997 -5413,2016,5413,42.262559,-76.93615700000007,42,9706.905795 -5414,2019,5414,42.73331099999998,-76.60669499999996,32,25242.563352999998 -5415,2016,5415,44.233001999999956,-74.20945599999992,76,19752.4621 -5416,2016,5416,42.997413999999985,-74.82348199999994,39,0 -5419,2018,5419,42.843182,-77.623624,0,0 -5421,2017,5421,42.763351000000036,-73.56889900000002,36,0 -5422,2019,5422,43.20434700000002,-74.47194799999994,32,13959.237031 -5423,2016,5423,42.423522999999975,-76.68930700000007,51,14668.002986000001 -5424,2013,5424,43.76507700000001,-74.63180099999988,76,7852.475289 -5425,2014,5425,43.454633,-76.23334,0,0 -5427,2019,5427,42.45931000000001,-76.75410899999999,12,6649.019069 -5428,2019,5428,42.31611099999999,-74.20099000000002,18,2729.91255 -5429,2018,5429,43.684164999999965,-76.03784899999997,37,16867.798907 -5430,2013,5430,43.86841099999998,-74.93538900000004,36,8002.698667000001 -5431,2017,5431,42.141080000000024,-79.26219300000001,35,25083.418426 -5432,2017,5432,42.490783,-76.077914,0,0 -5434,2019,5434,44.99963699999994,-73.57174299999998,52,7963.696505999999 -5436,2018,5436,42.804155,-76.904795,0,0 -5437,2019,5437,42.346464,-74.541623,0,0 -5438,2014,5438,44.875069,-74.392421,0,0 -5441,2015,5441,42.566393,-77.993375,0,0 -5444,2015,5444,43.85187799999996,-73.954428,42,23714.213277 -5446,2013,5446,43.014136,-77.124512,0,0 -5447,2014,5447,43.37558200000001,-75.03074199999995,71,16987.552053 -5448,2015,5448,44.060891,-75.478997,0,0 -5449,2019,5449,42.80803999999994,-78.51537599999997,59,31686.825386999997 -5450,2013,5450,44.43812299999995,-74.877023,62,37679.25010499999 -5451,2017,5451,41.45403600000001,-74.62063099999997,32,45930.400605 -5452,2017,5452,43.275351,-78.797563,0,0 -5453,2018,5453,42.058784000000045,-74.90419000000004,39,33699.99395499999 -5454,2015,5454,42.613292,-73.537748,0,0 -5455,2019,5455,42.536406,-76.868485,0,0 -5456,2014,5456,42.12264200000003,-74.93893699999992,43,18476.256190999997 -5457,2013,5457,42.585674,-77.891768,0,0 -5458,2019,5458,42.26511099999999,-78.09957600000004,54,21219.189847 -5459,2016,5459,44.15884000000002,-75.79595399999997,20,3537.6494620000008 -5460,2015,5460,44.304881000000016,-73.61321399999999,49,18354.1797 -5461,2018,5461,43.15286599999997,-75.38677700000002,40,17758.306343999997 -5462,2019,5462,40.582631,-74.20008400000002,11,5562.669304 -5463,2017,5463,42.822007,-78.559076,0,0 -5465,2016,5465,42.608032,-74.062302,6,1377.181169 -5466,2019,5466,42.24163700000002,-77.05308800000007,41,20066.774914 -5467,2019,5467,44.161025,-73.449962,3,0 -5468,2017,5468,42.146886,-74.506512,0,0 -5469,2017,5469,42.59215199999996,-77.6229239999999,51,25696.451744 -5470,2019,5470,43.394244,-73.646149,0,0 -5472,2013,5472,42.46125200000004,-74.78657700000001,33,21915.872428 -5474,2015,5474,44.05617099999997,-73.87878499999994,31,8062.638505 -5475,2018,5475,44.158952,-76.078582,0,0 -5476,2013,5476,43.652952000000006,-73.42256599999999,11,1937.631323 -5477,2015,5477,43.97080600000002,-75.72767599999999,38,25228.278214 -5478,2014,5478,43.663223999999964,-74.10676700000006,38,16367.290217999998 -5479,2013,5479,43.79093,-75.73512700000002,6,5056.44179 -5480,2013,5480,42.99453,-74.613814,0,0 -5481,2018,5481,42.75331,-77.144706,0,0 -5482,2019,5482,43.25143300000001,-76.904091,14,5482.986766 -5483,2018,5483,43.773232,-75.219998,0,0 -5484,2016,5484,42.33075000000001,-77.13226799999998,12,2558.146224 -5485,2014,5485,40.807217,-73.359718,6,0 -5486,2016,5486,43.019728,-77.224605,0,0 -5487,2017,5487,42.35112399999997,-73.498611,65,18143.444801 -5488,2019,5488,43.092333000000025,-77.824124,20,3068.560162 -5489,2018,5489,42.470779,-77.34299400000006,45,25311.808340000003 -5490,2019,5490,42.413878000000004,-75.16945299999999,31,16193.855247000001 -5491,2018,5491,44.50402899999996,-74.34545900000008,46,11243.042468 -5492,2019,5492,43.317011,-74.962007,0,0 -5493,2017,5493,41.73913700000002,-74.44935600000005,50,16350.835135999996 -5495,2017,5495,43.995559,-74.569356,0,0 -5496,2018,5496,43.051391,-75.678079,0,0 -5497,2014,5497,44.100971,-75.833244,0,0 -5498,2016,5498,42.711857,-75.25785499999998,40,27322.406581000007 -5500,2016,5500,43.150678,-76.790483,0,0 -5501,2016,5501,42.351188999999984,-77.9820330000001,48,18651.038925 -5502,2017,5502,42.08895,-74.74613900000003,29,34070.308353 -5503,2015,5503,42.219266,-75.06171700000003,33,15535.692674000002 -5505,2013,5505,44.35072300000001,-74.90596100000009,79,10782.106051000002 -5507,2017,5507,41.71965699999999,-75.00327800000001,13,7780.799861999999 -5508,2015,5508,44.329570999999994,-74.40766399999995,45,25914.443170000002 -5510,2014,5510,43.955037,-73.710716,7,149.986029 -5512,2016,5512,42.08224700000004,-79.02999499999999,40,25650.657369 -5513,2017,5513,42.44944600000002,-76.27522700000006,40,24026.818248000003 -5514,2018,5514,42.918095,-78.308261,0,0 -5515,2013,5515,42.059197,-79.43811,0,0 -5516,2014,5516,43.034159,-78.317,0,0 -5517,2013,5517,42.978065,-75.523685,0,0 -5518,2016,5518,42.632317,-75.637499,4,2328.220211 -5519,2018,5519,42.414461,-79.240611,3,1310.002548 -5520,2018,5520,44.63818199999997,-75.565963,49,4270.485000000001 -5521,2015,5521,43.756181,-74.251919,0,0 -5522,2014,5522,43.403070000000014,-73.73280999999999,34,38927.918715 -5523,2015,5523,43.10933800000003,-74.28782899999997,58,14423.845413000001 -5524,2018,5524,44.15115799999997,-73.63913299999999,34,22254.836431999996 -5525,2019,5525,43.074845,-77.990826,0,0 -5526,2013,5526,43.36791800000002,-76.537329,14,11782.374893 -5529,2014,5529,42.83521,-75.228548,0,0 -5530,2018,5530,43.21394899999999,-76.83537,15,8334.258652 -5531,2016,5531,43.994480000000024,-74.80554600000004,47,25204.859949 -5532,2016,5532,42.018278,-74.782904,0,0 -5534,2014,5534,42.51636899999996,-78.77888100000001,33,21546.369279 -5535,2014,5535,43.476767,-76.440402,0,0 -5536,2019,5536,42.07546700000004,-77.22484799999998,34,0 -5538,2014,5538,43.205015999999944,-74.77964100000005,51,25692.579456 -5541,2017,5541,42.09607400000002,-73.99406799999998,38,26532.477754 -5542,2018,5542,42.52703599999997,-76.66671099999996,43,19228.635027999997 -5543,2016,5543,41.70419,-74.131433,0,0 -5544,2013,5544,42.476592,-76.914386,5,5524.910293 -5545,2017,5545,42.53527,-74.650977,0,0 -5546,2017,5546,42.877936,-75.84695,0,0 -5547,2018,5547,43.015668,-74.46188900000006,53,22352.920511999997 -5548,2017,5548,42.768141999999976,-78.21535499999999,31,44231.534826 -5549,2019,5549,43.315777000000004,-73.99863899999994,50,0 -5550,2017,5550,42.680129,-74.505604,0,0 -5551,2016,5551,44.247707,-73.477068,2,1443.421458 -5552,2017,5552,40.91308800000001,-73.00065600000006,50,15653.310669 -5553,2016,5553,43.90948400000001,-74.41136999999995,41,3232.045326 -5554,2015,5554,42.024292000000045,-75.737605,76,31887.752772999997 -5555,2017,5555,42.02926300000001,-79.43791000000004,23,6536.893076 -5557,2019,5557,42.77040900000002,-73.34849799999996,59,15590.882162 -5558,2014,5558,43.276599,-75.848589,0,0 -5559,2018,5559,43.53535200000001,-74.04681299999996,62,15622.073186000001 -5560,2019,5560,40.835165,-72.504724,0,0 -5561,2015,5561,43.381677,-73.436782,0,0 -5563,2018,5563,41.843163,-75.041955,0,0 -5565,2013,5565,42.958426,-73.356076,0,0 -5566,2019,5566,43.461967000000016,-75.74686800000006,59,28014.714912000003 -5567,2015,5567,42.161507,-79.677823,0,0 -5568,2019,5568,42.066135000000024,-76.890543,48,35160.37084999999 -5569,2015,5569,43.022420000000004,-75.23739399999992,42,23479.114954 -5570,2017,5570,41.98386800000002,-74.44698199999996,42,11154.397207 -5571,2016,5571,43.282761,-78.465079,0,0 -5572,2013,5572,40.641277,-73.311266,0,0 -5573,2013,5573,43.85905899999994,-74.76330099999997,52,29954.612713 -5574,2015,5574,44.08468399999997,-74.87683999999993,57,19742.880013 -5576,2017,5576,42.284548,-79.190491,0,0 -5577,2019,5577,42.50264900000001,-73.49537899999997,64,38536.618485 -5578,2017,5578,42.87918200000001,-74.03238699999999,15,20960.728834 -5579,2016,5579,43.80009900000003,-73.98199000000007,39,30917.24007 -5580,2013,5580,41.275463,-74.462331,0,0 -5582,2018,5582,42.545256000000016,-75.28216300000003,30,15272.926815 -5584,2019,5584,44.09613499999995,-75.17177899999993,61,7680.3749179999995 -5586,2016,5586,42.037914,-79.5753,0,0 -5590,2016,5590,43.24443599999998,-78.53284499999997,24,14941.85441 -5591,2016,5591,41.74467000000002,-73.762528,30,29556.505703 -5592,2019,5592,43.219855,-78.612863,0,0 -5594,2013,5594,43.67610400000005,-74.52069699999994,119,23823.072232000006 -5595,2014,5595,42.449071000000004,-76.36357399999999,46,42444.136728000005 -5597,2017,5597,44.900846,-74.2731,0,0 -5598,2015,5598,41.99761999999999,-75.01344700000003,24,17947.784388000004 -5599,2019,5599,42.68668499999996,-77.51215300000001,38,28849.003688 -5600,2017,5600,42.75380099999999,-75.200564,26,0 -5601,2018,5601,43.07028399999996,-76.05501000000002,42,30012.799141999996 -5602,2019,5602,42.76031700000001,-78.54635899999995,50,33912.637102 -5603,2018,5603,42.69002700000004,-77.41163700000004,46,20779.771043999997 -5604,2018,5604,42.397034000000005,-75.41734900000002,10,9793.906524 -5605,2018,5605,43.279547999999984,-78.269553,15,551.913579 -5606,2017,5606,43.786792999999996,-74.53907799999993,41,19920.941956 -5607,2014,5607,40.90042,-73.446814,0,0 -5608,2018,5608,42.001545,-74.21550299999994,29,18912.021105 -5609,2019,5609,43.051856,-77.639483,0,0 -5610,2018,5610,42.69921300000004,-73.38381900000002,44,37357.549516 -5614,2019,5614,43.112378,-77.418583,0,0 -5617,2015,5617,42.218405,-79.493714,0,0 -5618,2015,5618,43.796172999999996,-75.83518000000002,54,27837.754731 -5621,2018,5621,41.18384500000001,-74.09927100000003,29,29293.123668 -5623,2019,5623,42.46153499999995,-76.22454500000002,85,39235.456701 -5624,2017,5624,43.61628599999997,-75.95396399999998,31,19320.130397999998 -5625,2015,5625,43.158799999999985,-75.15369999999999,36,19898.569931000002 -5627,2013,5627,43.303936000000014,-78.02866799999997,32,8760.968436 -5628,2019,5628,43.17127600000001,-78.42928200000004,51,30820.088308000002 -5629,2015,5629,41.96474400000003,-74.32278399999997,34,22422.038977000004 -5630,2016,5630,43.427632,-75.36758,0,0 -5631,2018,5631,43.923396,-76.058225,2,1044.461526 -5632,2015,5632,44.32230299999996,-75.29044000000009,45,9286.522529 -5634,2019,5634,44.363669,-74.139282,0,0 -5636,2016,5636,43.582636999999984,-74.34630299999993,44,20163.063666 -5637,2017,5637,42.776533,-76.631164,0,0 -5639,2013,5639,44.78217899999999,-73.95075300000005,54,11330.033468000001 -5641,2017,5641,42.63076999999999,-74.73662299999997,31,0 -5642,2013,5642,42.734528,-74.235765,0,0 -5643,2019,5643,43.50127100000002,-74.27250599999996,36,22315.979733 -5644,2017,5644,44.285244,-73.393148,0,0 -5645,2014,5645,42.440897,-74.012163,0,0 -5647,2015,5647,42.76588900000001,-77.36135999999998,43,0 -5648,2018,5648,43.525183999999996,-74.33307199999997,21,5716.086708999999 -5650,2019,5650,42.18567499999998,-76.78408799999998,18,40276.674192 -5653,2014,5653,40.97550900000004,-72.32051899999998,52,11236.813753999997 -5654,2013,5654,42.27663200000002,-75.26369100000007,51,18578.199110999998 -5656,2018,5656,42.470236,-76.406948,0,0 -5658,2018,5658,43.99670200000002,-74.703442,24,0 -5659,2019,5659,42.729919,-74.401127,0,0 -5660,2013,5660,41.581994,-74.35642499999997,19,9921.715372 -5661,2014,5661,42.120977,-76.033386,0,0 -5662,2014,5662,43.26762,-73.989147,0,0 -5663,2014,5663,42.481461999999986,-75.61845300000003,33,17622.1394 -5666,2018,5666,43.74269899999995,-74.65794499999996,57,22431.483813 -5667,2016,5667,42.02141900000001,-76.06004800000002,18,8699.42683 -5669,2019,5669,41.05424399999999,-72.45337300000001,10,8787.640949 -5670,2014,5670,41.62009,-73.526522,0,0 -5671,2018,5671,42.48285500000003,-78.39785399999991,65,42018.000083000006 -5672,2014,5672,42.496094,-78.087751,0,0 -5673,2013,5673,43.661045999999956,-76.15783199999997,43,24108.608185 -5674,2016,5674,43.516665999999994,-74.56473999999996,37,13124.868024000003 -5675,2016,5675,42.45590199999998,-74.87708900000003,38,25911.445746999998 -5676,2018,5676,43.95764,-74.56241,0,0 -5679,2018,5679,41.61706000000001,-74.79777599999996,26,45156.350586 -5680,2014,5680,44.24890499999997,-74.867037,40,23831.924392 -5681,2013,5681,43.227391000000004,-74.64146400000001,15,7746.0085070000005 -5682,2016,5682,41.717128,-73.799266,10,21145.046734000003 -5683,2017,5683,43.455065999999995,-75.48527000000003,51,0 -5684,2016,5684,43.796338999999996,-74.36312400000003,61,17495.018283 -5686,2013,5686,44.01062000000001,-74.84274400000002,35,4830.889182000001 -5687,2015,5687,44.733455000000035,-74.09290700000011,71,8368.942418999997 -5688,2013,5688,44.540034999999996,-74.82445499999996,37,6686.0358479999995 -5689,2015,5689,42.386535999999985,-74.244369,42,15858.193522999998 -5690,2018,5690,42.926919,-78.14899299999999,29,7353.231795000001 -5691,2018,5691,43.575976000000026,-73.68518200000005,50,18777.655397 -5692,2013,5692,42.43078100000002,-77.65692799999992,40,24832.236559999998 -5694,2019,5694,42.454004,-73.717756,11,0 -5695,2015,5695,42.34889500000001,-77.54652600000004,36,16412.868413999997 -5696,2016,5696,43.811821,-74.69615000000006,38,40518.889106999995 -5697,2018,5697,44.124759000000026,-74.77054200000008,47,23506.034689 -5698,2015,5698,42.091676,-73.740998,0,0 -5699,2018,5699,44.251076000000005,-74.37807100000006,61,26089.974933999994 -5700,2018,5700,43.38576899999999,-74.37216700000015,83,16133.442945999997 -5701,2019,5701,42.37849000000004,-77.45167499999995,39,16709.515268 -5702,2015,5702,43.191538000000016,-73.67334600000002,49,21907.075745000002 -5703,2018,5703,44.051231000000016,-74.69053400000003,25,15060.170434 -5704,2015,5704,41.856606999999975,-75.09792100000001,20,7218.99104 -5705,2015,5705,44.544304000000004,-74.0841819999999,84,27820.803216 -5706,2015,5706,41.32983199999999,-73.88964000000004,30,34810.666642000004 -5707,2013,5707,42.293271,-77.91536799999996,35,6114.330792999998 -5708,2015,5708,43.10629100000002,-77.17230599999999,18,11743.024592 -5709,2014,5709,42.03539099999998,-78.347387,30,12820.223101 -5710,2015,5710,42.148754,-78.643524,6,9225.037643 -5711,2013,5711,44.15272600000002,-75.663718,24,2844.5061689999998 -5712,2016,5712,43.26453300000002,-73.45157599999996,44,44939.954115 -5713,2019,5713,42.834362,-74.557711,0,0 -5714,2019,5714,43.105044,-76.179107,0,0 -5716,2017,5716,43.070692,-76.455834,0,0 -5718,2014,5718,42.00323,-76.813138,0,0 -5719,2013,5719,42.231663,-74.905075,0,0 -5720,2014,5720,43.591531000000025,-75.99555500000002,59,22495.87472 -5723,2016,5723,42.968971,-73.39789700000004,27,6377.411868 -5724,2017,5724,42.074804,-75.80469,0,0 -5725,2013,5725,42.430079,-77.465462,0,0 -5726,2019,5726,44.157243000000044,-74.389592,41,6320.671458 -5729,2018,5729,43.97106,-75.833497,17,13856.483198 -5730,2016,5730,42.900905,-76.329254,0,0 -5731,2018,5731,44.201747,-73.473096,0,0 -5732,2015,5732,42.90603700000003,-74.44629999999998,64,22600.155122000004 -5733,2018,5733,44.71857899999999,-73.45692900000002,7,455.48693799999995 -5735,2014,5735,42.983939,-78.703665,0,0 -5736,2018,5736,42.947327,-74.335359,0,0 -5738,2017,5738,42.914729,-77.798703,0,0 -5739,2013,5739,44.32874399999996,-74.06502399999994,65,15553.195081999998 -5740,2014,5740,42.905927,-76.27420699999996,51,13163.524183 -5742,2019,5742,42.339197000000006,-76.30306299999997,41,25405.769745 -5744,2014,5744,43.145211,-77.539344,0,0 -5745,2013,5745,41.95475400000003,-75.10365,41,18371.73819 -5746,2018,5746,42.928382,-78.634405,0,0 -5748,2016,5748,44.355738,-73.772709,0,0 -5750,2019,5750,44.05178999999999,-73.506302,11,1248.516071 -5751,2014,5751,42.778609,-75.772205,0,0 -5752,2013,5752,44.206164,-73.39659,9,6457.90842 -5754,2014,5754,42.550262,-79.115646,0,0 -5755,2015,5755,42.17137099999997,-78.35619700000002,38,7425.15905 -5756,2019,5756,42.939566,-74.489016,0,0 -5757,2018,5757,42.22747699999995,-76.37429499999998,64,0 -5758,2016,5758,43.323831,-73.560212,0,0 -5759,2018,5759,44.88148999999999,-74.48091899999999,10,3772.314735 -5760,2013,5760,44.72655599999999,-75.25520099999997,18,394.1672470000001 -5762,2019,5762,43.99232500000003,-75.09258700000007,63,26228.694516999996 -5763,2015,5763,42.75724499999999,-78.076126,3,0 -5764,2017,5764,42.120271,-77.232528,19,5586.520207 -5765,2018,5765,43.868984999999995,-75.73444399999997,13,0 -5766,2015,5766,41.788393,-74.822589,0,0 -5767,2019,5767,41.366987,-74.105798,4,389.798014 -5768,2013,5768,43.12275599999999,-75.25289100000003,25,72.479634 -5769,2018,5769,42.129286999999984,-74.14860499999996,50,5029.701302 -5770,2015,5770,42.09225999999999,-74.65579900000006,30,25533.280847000002 -5771,2017,5771,42.155536,-76.650325,0,0 -5773,2013,5773,44.38545799999999,-73.83296699999994,51,25388.535920000002 -5774,2015,5774,43.006316,-77.056885,0,0 -5775,2013,5775,43.325975,-78.162157,0,0 -5776,2018,5776,43.09224199999996,-73.71413100000002,37,33260.411741 -5778,2018,5778,43.167442,-76.134212,0,0 -5779,2015,5779,44.22748299999995,-74.31171300000004,49,34367.018586 -5780,2014,5780,42.71916600000003,-75.84397099999993,42,20534.103764000003 -5781,2019,5781,43.009678,-73.703504,0,0 -5783,2019,5783,43.55680299999997,-74.41622000000004,48,16229.611376 -5784,2014,5784,42.545442,-76.81185800000002,7,1642.6859299999999 -5786,2014,5786,42.906113,-77.339151,0,0 -5787,2017,5787,42.065135,-79.311609,0,0 -5788,2018,5788,42.097851,-75.55010600000003,45,14073.735097 -5790,2016,5790,42.517824000000005,-75.55654000000001,17,9349.095059 -5791,2019,5791,42.523881,-75.151911,0,0 -5792,2016,5792,43.784515000000034,-75.85402599999999,48,21323.821255 -5793,2019,5793,43.00679,-77.320143,0,0 -5794,2014,5794,42.288279,-79.275718,0,0 -5795,2019,5795,44.787206999999995,-75.26850599999997,21,7644.440979999999 -5796,2014,5796,43.454251,-73.535997,6,1957.452432 -5797,2013,5797,42.695103,-73.897606,0,0 -5798,2016,5798,44.546986999999966,-73.80293900000005,47,14529.337708000001 -5799,2015,5799,42.416042,-76.187007,0,0 -5800,2016,5800,43.877124999999985,-74.45623700000002,35,19501.77934 -5803,2017,5803,40.789691,-73.950603,0,0 -5804,2017,5804,44.39113900000002,-73.71765700000003,42,473.79738100000003 -5805,2016,5805,43.08354400000004,-74.64612799999998,59,0 -5806,2013,5806,43.343746,-74.189295,11,5834.0592 -5807,2016,5807,42.901297,-78.364756,0,0 -5808,2019,5808,43.353178999999976,-73.44633500000005,30,13294.382322000001 -5810,2015,5810,44.90212599999997,-73.919488,55,16119.116079999998 -5811,2015,5811,42.895339,-76.078652,0,0 -5812,2016,5812,41.52004899999998,-73.92027699999996,22,37136.55310599999 -5813,2017,5813,44.20604899999995,-75.31510500000002,45,19901.895049 -5814,2019,5814,41.64771,-74.494615,0,0 -5816,2014,5816,43.57334500000001,-75.637827,47,6420.230621000001 -5817,2017,5817,44.52951499999997,-74.59162399999998,31,0 -5818,2018,5818,43.163733999999984,-78.66322200000003,45,10274.876808000003 -5819,2018,5819,41.317334,-73.59247000000003,29,18243.054973 -5821,2015,5821,42.93097699999999,-78.38809199999999,30,17916.86228 -5822,2017,5822,43.04222099999999,-73.359323,12,2359.0433999999996 -5823,2014,5823,42.237858,-79.27162,0,0 -5824,2016,5824,43.147823,-77.14968,0,0 -5825,2013,5825,44.25658999999998,-75.03655899999997,31,16256.716816 -5826,2017,5826,43.055693999999995,-77.15844199999998,27,13479.376556999998 -5827,2013,5827,42.80075,-77.248871,23,0 -5829,2015,5829,44.264400000000016,-74.61176400000001,30,7587.350052000002 -5830,2017,5830,44.033225,-75.819053,0,0 -5831,2019,5831,44.320863,-73.350668,0,0 -5833,2013,5833,41.51671899999999,-73.77589200000006,33,26192.254954999997 -5834,2015,5834,42.112152,-75.469293,3,0 -5835,2018,5835,44.39698999999998,-73.44185,33,16293.659916 -5836,2015,5836,42.58556999999998,-76.07253899999995,36,24850.432095 -5837,2015,5837,44.59317400000004,-75.119506,56,23550.293847 -5838,2015,5838,43.49195999999995,-75.54795800000008,78,9345.191952000001 -5839,2015,5839,42.43463499999999,-78.15018800000001,27,18415.905304 -5840,2018,5840,42.83572900000001,-78.36071500000001,18,10391.586801000001 -5841,2018,5841,42.120362,-78.044945,0,0 -5842,2016,5842,43.41282999999999,-75.00298400000007,45,25339.064845 -5843,2013,5843,42.581665,-77.717973,0,0 -5845,2014,5845,42.914001,-77.50303899999996,25,21622.137980999996 -5846,2015,5846,44.870409000000016,-74.58817999999998,28,4400.616471 -5848,2015,5848,44.889094,-73.986774,0,0 -5850,2013,5850,43.03086499999997,-74.03367799999994,38,11187.845931 -5851,2015,5851,43.57586700000001,-75.81859699999997,36,4606.710228999998 -5853,2019,5853,43.04083,-78.386331,0,0 -5855,2017,5855,42.12254800000002,-78.77219800000006,27,22032.883666 -5858,2015,5858,44.99451700000004,-73.99216699999992,47,3243.34328 -5859,2015,5859,42.19953600000002,-75.65722700000003,42,32398.288439999997 -5860,2018,5860,41.691457,-74.076164,0,0 -5861,2013,5861,42.166426,-74.98181900000006,39,30896.079779 -5863,2019,5863,42.879896,-75.2405,0,0 -5865,2017,5865,42.392815,-74.896257,4,2828.412584 -5866,2017,5866,42.26402600000004,-77.450382,33,19124.337087000004 -5867,2014,5867,42.627605,-79.077576,0,0 -5868,2016,5868,44.170526,-74.85589000000002,14,7231.981036 -5869,2016,5869,44.077811,-76.00927,6,601.840609 -5870,2014,5870,43.25531899999999,-76.166997,7,0 -5871,2017,5871,42.01583100000001,-76.935878,21,0 -5874,2014,5874,42.944625,-74.789926,0,0 -5876,2018,5876,43.446791000000005,-75.96927899999994,55,28497.236308 -5879,2019,5879,42.739432,-78.187241,0,0 -5880,2015,5880,43.51324999999998,-73.80212600000003,50,14642.486846 -5881,2019,5881,41.776339,-73.983922,17,0 -5882,2019,5882,42.884611,-75.900965,0,0 -5883,2019,5883,43.179306,-78.770874,0,0 -5884,2019,5884,41.731383,-74.909621,0,0 -5885,2016,5885,44.19805300000002,-76.11760199999995,43,38102.09294399999 -5886,2016,5886,42.20825199999997,-73.61808699999999,38,23236.963364 -5887,2016,5887,42.287907,-74.655364,0,0 -5888,2018,5888,43.412194,-73.88273400000001,13,9183.430739 -5891,2014,5891,44.555517,-73.492781,0,0 -5893,2013,5893,44.183864999999976,-74.17130899999995,58,13457.978357 -5894,2018,5894,41.827128,-73.870048,0,0 -5895,2014,5895,43.62413199999997,-73.79261600000007,55,33262.052629 -5897,2016,5897,44.867576,-73.969303,0,0 -5898,2015,5898,42.88215199999999,-73.92902699999996,29,49882.319112 -5899,2013,5899,44.99908000000002,-73.76348699999996,23,0 -5900,2017,5900,43.190603,-77.927945,0,0 -5901,2018,5901,43.29649199999998,-75.15271000000001,18,4210.294135 -5902,2016,5902,42.957834,-77.442899,0,0 -5903,2014,5903,42.637736,-77.008404,0,0 -5905,2018,5905,44.765840999999945,-75.08872200000009,76,12693.340106 -5906,2017,5906,42.73631100000004,-76.27082299999996,39,12709.941263 -5907,2013,5907,43.013584,-74.95656,0,0 -5908,2016,5908,43.074791999999945,-74.12038699999995,50,20083.044521999997 -5910,2013,5910,40.962051,-72.622788,0,0 -5911,2013,5911,42.551292999999994,-73.97703700000005,32,7583.0252470000005 -5912,2017,5912,43.073265,-74.47557499999996,49,28806.91994 -5913,2016,5913,42.05093,-77.98269,0,0 -5914,2019,5914,43.762776999999964,-73.72121599999994,55,27320.337306999998 -5915,2013,5915,43.360707,-76.18901800000002,56,27503.114491 -5916,2016,5916,40.667621,-73.918119,0,0 -5917,2019,5917,41.67529399999997,-73.86146499999997,36,23512.377754999998 -5918,2016,5918,44.93953899999998,-73.59271499999988,59,16113.146469 -5919,2013,5919,44.11701599999998,-74.21728500000002,37,22421.611144 -5920,2018,5920,43.262033,-73.554488,0,0 -5921,2017,5921,40.990992999999996,-72.10461099999999,26,10266.420017 -5922,2015,5922,42.072306,-79.21575,0,0 -5923,2019,5923,44.230321000000004,-76.040521,0,0 -5924,2016,5924,42.758769,-78.438104,0,0 -5925,2015,5925,42.45211,-77.627176,0,0 -5926,2014,5926,44.559650999999995,-75.38939700000002,7,0 -5927,2014,5927,43.556821000000006,-75.450125,16,0 -5928,2016,5928,42.026913000000015,-73.55650799999995,23,13618.660176999998 -5929,2019,5929,42.188769999999984,-77.44428699999995,43,21866.554294 -5930,2015,5930,43.00353,-76.14414,0,0 -5931,2016,5931,42.153557,-73.778207,0,0 -5933,2015,5933,43.50888099999998,-74.80089200000008,46,18501.340823000002 -5934,2019,5934,42.219328000000004,-78.713837,20,530.25052 -5936,2014,5936,42.06500800000002,-78.90527400000005,39,24934.992054 -5937,2019,5937,42.936001000000005,-74.071371,27,10893.504298 -5938,2015,5938,41.915456,-73.996254,0,0 -5939,2013,5939,41.980672,-73.664929,0,0 -5941,2018,5941,42.158908,-73.640084,12,9237.109766000001 -5942,2018,5942,43.259453,-77.892844,0,0 -5943,2018,5943,43.273223,-73.401297,0,0 -5944,2013,5944,41.154322,-74.012442,0,0 -5945,2015,5945,42.968094,-76.231511,0,0 -5948,2015,5948,41.349002,-74.191383,0,0 -5949,2018,5949,43.257342,-78.322938,0,0 -5951,2017,5951,42.76956000000002,-76.43487000000003,62,14818.553238 -5954,2018,5954,42.017648999999984,-78.82715499999996,29,44562.332931 -5955,2018,5955,43.75872899999998,-75.28359500000003,33,12507.060535 -5956,2013,5956,42.394639,-75.905364,0,0 -5958,2014,5958,43.092592,-76.492564,0,0 -5959,2015,5959,42.359457,-74.06271300000002,6,7033.2798 -5961,2017,5961,42.749806999999976,-77.50629699999995,34,37255.995266000005 -5962,2017,5962,43.768163000000015,-74.89700200000004,58,12332.903515999998 -5963,2016,5963,43.17441,-77.968616,0,0 -5964,2018,5964,42.208975,-76.04548900000005,31,0 -5965,2019,5965,42.50369,-78.61208600000006,42,14839.360998999999 -5966,2016,5966,43.015435,-75.07213,0,0 -5970,2017,5970,43.316754,-75.405935,0,0 -5971,2016,5971,42.600167,-75.672832,0,0 -5972,2018,5972,44.60141599999998,-73.88689999999998,27,5091.323533999999 -5973,2016,5973,42.246855,-75.694915,0,0 -5974,2013,5974,44.43926899999998,-74.54420799999994,43,8923.156556 -5975,2014,5975,43.084857,-77.779179,0,0 -5976,2014,5976,42.64965800000002,-74.04707099999997,25,42979.987271 -5978,2015,5978,42.393192,-76.39204399999996,25,0 -5979,2018,5979,43.39629100000001,-76.52792600000008,45,30615.485255000003 -5980,2017,5980,40.912224,-72.83972700000002,29,8833.293869 -5981,2017,5981,41.804065999999985,-74.34959500000004,54,17414.791397 -5983,2014,5983,43.761974999999964,-75.33863100000003,34,17993.797239 -5984,2019,5984,42.68267,-75.532367,0,0 -5985,2014,5985,41.970794000000005,-75.06166699999996,33,0 -5986,2013,5986,41.355908,-74.59161,0,0 -5987,2016,5987,43.092534,-76.395395,0,0 -5988,2014,5988,42.235586999999974,-77.88536500000006,56,0 -5989,2014,5989,42.533611999999984,-73.41974000000002,20,1595.202815 -5991,2014,5991,44.086205000000014,-73.616947,47,8133.464192999999 -5993,2017,5993,42.479767,-75.888151,17,8595.069938 -5994,2015,5994,42.29094099999998,-74.28945299999997,50,23936.630144000002 -5995,2014,5995,41.969436,-74.893235,0,0 -5996,2019,5996,42.20700299999999,-75.84954800000003,43,30401.083668999996 -5997,2016,5997,42.289608,-79.559379,0,0 -5998,2014,5998,42.784756,-73.610964,0,0 -6000,2016,6000,42.646265,-75.484588,0,0 -6001,2018,6001,40.83915,-73.253117,0,0 -6002,2014,6002,44.30954100000001,-74.21750399999998,28,17494.057463 -6004,2014,6004,42.914208,-78.585114,0,0 -6005,2015,6005,41.957503999999986,-75.23028,18,21728.786159000003 -6006,2017,6006,43.210674,-75.259788,0,0 -6007,2017,6007,42.608517,-76.20936199999996,66,53908.86232800001 -6008,2013,6008,42.514304,-73.653551,0,0 -6009,2017,6009,43.071871000000044,-76.60711899999993,43,12615.918450000001 -6010,2017,6010,42.835584,-77.506353,0,0 -6011,2013,6011,43.025673000000026,-73.32584499999999,36,0 -6012,2018,6012,42.73081200000001,-78.04651800000008,44,25253.634691 -6013,2018,6013,41.603928,-74.321318,0,0 -6014,2015,6014,43.27343700000001,-76.01081299999996,56,0 -6016,2017,6016,42.252006,-77.851232,32,20744.821997 -6017,2018,6017,42.31027599999997,-77.75406299999996,45,33686.878731000004 -6020,2014,6020,42.42523400000003,-74.61156900000002,39,37016.618011 -6021,2018,6021,43.071455999999955,-78.53626100000007,45,25155.293570000005 -6022,2016,6022,42.03473099999999,-77.62214600000006,50,0 -6025,2019,6025,43.309073,-78.749716,0,0 -6026,2013,6026,42.179424999999995,-73.749003,14,27851.599123 -6027,2013,6027,42.821532,-77.768203,0,0 -6029,2015,6029,43.138597,-74.770009,0,0 -6032,2016,6032,42.02755499999998,-78.125295,33,18408.134588 -6033,2018,6033,41.608715,-73.967888,7,0 -6034,2018,6034,41.500715,-74.72560300000004,27,14123.775977 -6035,2015,6035,42.285309000000034,-77.85860599999991,44,22164.907992 -6036,2015,6036,42.19465399999996,-79.306564,48,30020.449255 -6037,2018,6037,44.435083999999975,-74.00293599999996,47,2592.288644 -6038,2016,6038,43.667637000000006,-74.953344,36,17648.409373 -6039,2018,6039,42.61481000000001,-73.78578,31,44613.623327999994 -6040,2015,6040,42.52607,-75.47436799999998,18,17995.887236 -6041,2019,6041,42.271466,-79.703366,0,0 -6042,2014,6042,42.553370000000015,-77.56727,37,24768.850561999996 -6043,2018,6043,43.738801999999986,-73.9232149999999,63,24737.940489 -6044,2019,6044,41.755429000000035,-74.69891499999996,44,31608.220336 -6045,2014,6045,43.77149600000001,-75.13727199999995,34,21392.023815999997 -6046,2013,6046,42.141351,-73.759537,0,0 -6050,2019,6050,41.90991200000001,-73.94139700000004,34,23059.665285 -6052,2014,6052,43.040062,-75.83885000000001,16,15404.006941000001 -6053,2014,6053,44.341502,-75.88433899999998,19,8119.26924 -6054,2018,6054,42.995703,-76.669915,0,0 -6056,2017,6056,41.690764,-73.906668,0,0 -6057,2019,6057,42.271107,-79.311614,0,0 -6058,2016,6058,44.50597900000002,-73.67683699999999,67,14389.300751 -6059,2014,6059,44.900858,-74.915544,0,0 -6060,2016,6060,44.29043900000005,-74.76035700000007,42,1280.204067 -6061,2016,6061,42.19479700000001,-78.40655500000001,28,10196.794689 -6062,2016,6062,41.083974,-73.79952,0,0 -6065,2016,6065,42.962149,-75.453727,0,0 -6066,2019,6066,41.474449000000014,-74.23557899999999,21,0 -6067,2017,6067,42.83769600000003,-76.237057,31,0 -6068,2017,6068,42.29869099999998,-76.41369200000005,48,22542.126095000007 -6070,2013,6070,42.152036,-76.027782,0,0 -6071,2016,6071,44.795068,-75.126071,0,0 -6072,2014,6072,42.688374,-76.376774,3,69.690082 -6073,2018,6073,42.562903,-77.77559999999995,30,0 -6075,2019,6075,42.565048,-75.448975,0,0 -6079,2019,6079,43.657026,-75.469189,0,0 -6080,2013,6080,43.40859700000004,-75.80333000000006,45,0 -6081,2018,6081,43.889296000000044,-73.73212199999993,47,23851.419155000003 -6083,2018,6083,44.068656,-76.120263,0,0 -6086,2013,6086,42.510121999999974,-77.59556599999998,40,13370.629713 -6087,2015,6087,41.23466699999996,-74.32664500000007,43,41075.61725200001 -6091,2017,6091,42.04486500000001,-73.50210699999998,33,10081.356428000001 -6092,2014,6092,42.806813,-76.706962,0,0 -6093,2018,6093,43.075321,-77.292262,0,0 -6094,2013,6094,43.532813999999995,-73.88573399999997,56,24325.345577999993 -6095,2018,6095,42.727184,-75.54065,0,0 -6096,2013,6096,42.880923,-75.397513,12,9677.132232000002 -6097,2017,6097,43.172698,-74.917264,0,0 -6098,2014,6098,43.999095000000004,-75.369331,9,1628.1487789999999 -6099,2019,6099,44.570797,-73.708564,0,0 -6102,2019,6102,43.078043999999984,-74.96603199999998,66,17812.640667000003 -6103,2016,6103,42.557665,-73.705533,0,0 -6104,2018,6104,42.703754000000004,-73.730572,0,0 -6106,2014,6106,40.768914,-73.570197,0,0 -6107,2015,6107,42.884856,-75.310998,0,0 -6108,2018,6108,43.291416999999974,-74.84346700000002,41,22981.298487 -6111,2014,6111,43.12885200000002,-77.19393599999997,18,4603.430531999999 -6112,2014,6112,42.05985199999999,-77.46085500000001,10,4218.197177 -6113,2018,6113,42.07089100000003,-75.93965900000008,50,20355.305430000004 -6117,2017,6117,44.523837,-75.469542,0,0 -6118,2016,6118,43.534539,-74.48647500000004,45,9465.694334999998 -6119,2015,6119,43.55917500000003,-75.05299800000007,51,25678.606949 -6121,2014,6121,44.19288300000001,-74.86065499999997,55,23635.832419 -6122,2018,6122,42.277101,-74.010848,0,0 -6123,2019,6123,42.891728999999955,-78.4518589999999,56,31227.600678 -6124,2019,6124,42.081651,-77.528421,0,0 -6128,2019,6128,42.85465,-77.034065,9,395.241497 -6129,2014,6129,43.144669999999984,-74.47823900000002,33,15188.330332000001 -6130,2017,6130,42.049453000000035,-78.97706900000001,52,28250.178925999997 -6131,2016,6131,43.122899,-75.528334,0,0 -6132,2019,6132,42.49099999999999,-73.78036300000002,26,23911.743730000002 -6133,2013,6133,42.72468200000002,-75.14855200000007,41,14246.562908 -6134,2018,6134,42.931477,-75.234506,0,0 -6135,2015,6135,44.607551000000036,-74.22607699999999,32,16320.900987999998 -6136,2018,6136,43.009489,-73.953376,0,0 -6137,2017,6137,41.491279999999975,-74.8055640000001,48,35800.199455 -6138,2014,6138,43.37850200000002,-74.514588,41,29608.232200000006 -6139,2013,6139,43.620269,-73.76790699999995,41,24455.531125 -6140,2013,6140,42.342614,-73.978087,9,7329.150560000001 -6141,2013,6141,42.380816,-73.582828,0,0 -6142,2013,6142,43.364079,-73.623355,0,0 -6143,2017,6143,44.14787700000004,-74.62849599999998,34,26734.557652999996 -6144,2015,6144,42.29795399999996,-74.81179500000003,38,11983.304607000002 -6146,2014,6146,44.349109,-73.653125,0,0 -6150,2015,6150,44.31082300000002,-74.95120399999998,42,23966.407527000003 -6151,2017,6151,43.839030999999984,-73.39279499999999,21,12455.787392999999 -6152,2019,6152,44.66695799999999,-74.38826299999994,52,31397.837406000002 -6153,2019,6153,42.85017699999998,-76.17766100000003,60,0 -6154,2017,6154,41.854436999999976,-73.99948600000005,42,44830.949389 -6155,2015,6155,42.40843900000001,-73.47752799999998,20,3929.23614 -6156,2013,6156,44.248246,-74.660391,7,72.14398 -6157,2015,6157,44.512923,-74.65507100000002,44,0 -6159,2013,6159,44.968554999999974,-74.30890700000006,43,9219.678672999999 -6160,2014,6160,42.333501000000005,-73.91176500000002,11,3423.867265 -6161,2018,6161,42.53504,-78.176913,5,15.760998 -6162,2016,6162,43.347960999999955,-74.84580400000003,42,23719.62559 -6163,2014,6163,44.86874900000005,-73.81563800000002,65,4228.049877 -6166,2015,6166,44.974788,-74.12159500000003,24,8477.303755 -6167,2018,6167,42.659149,-77.888532,0,0 -6170,2013,6170,41.588027000000004,-74.45852299999997,33,3847.601175 -6173,2017,6173,43.78673600000001,-75.96001800000003,33,0 -6174,2019,6174,42.67383,-78.014849,0,0 -6175,2019,6175,43.927778000000004,-74.528886,0,0 -6176,2016,6176,40.694967000000005,-73.39754700000002,13,4126.875351 -6177,2015,6177,42.169920999999995,-78.12355000000001,11,3132.9154320000002 -6178,2013,6178,43.527941999999996,-75.62231599999997,37,14063.712042000001 -6179,2013,6179,44.77532000000002,-74.68759899999996,55,8905.640996999999 -6180,2015,6180,43.603576,-75.802198,0,0 -6181,2019,6181,42.87823299999998,-78.14008400000007,37,0 -6182,2015,6182,44.28175400000001,-75.01608800000002,29,4988.668182 -6184,2019,6184,42.44813,-77.147015,15,0 -6185,2016,6185,43.45100099999998,-74.80371199999995,34,28143.018710999997 -6186,2013,6186,43.86048899999999,-74.01224499999994,34,16298.488947 -6187,2014,6187,43.461055,-73.690235,0,0 -6188,2017,6188,43.474527000000016,-73.98560600000008,52,16450.764207 -6189,2016,6189,42.850144,-73.334326,1,9.309626 -6190,2017,6190,43.132949000000004,-75.140567,0,0 -6191,2013,6191,43.158695,-78.282268,0,0 -6192,2018,6192,42.92249500000001,-76.32490400000003,18,0 -6193,2019,6193,42.10639799999997,-78.70893100000008,38,37619.250694999995 -6194,2015,6194,42.156650000000006,-77.36764800000002,13,0 -6195,2014,6195,44.66523099999996,-74.72234200000001,43,2640.963143 -6196,2013,6196,44.29779000000001,-73.53737600000002,31,13457.244251 -6197,2014,6197,42.173455,-76.736592,3,6047.126520000001 -6199,2019,6199,42.248284999999974,-78.82510899999993,34,22112.894946 -6200,2016,6200,42.69502,-73.642813,0,0 -6202,2013,6202,43.144979,-73.786448,0,0 -6203,2017,6203,42.36575300000002,-76.49717200000002,29,9649.912381000002 -6204,2013,6204,42.66477400000002,-73.57264299999999,40,30598.198806 -6206,2019,6206,43.61703699999999,-74.74215499999995,32,18189.897204999997 -6208,2017,6208,44.22468,-75.513979,4,351.03935999999993 -6210,2015,6210,42.570949,-74.259379,0,0 -6212,2014,6212,43.240103000000005,-74.24920100000004,29,43171.894592000004 -6214,2013,6214,44.11729300000001,-76.23098299999997,35,19324.909260999997 -6216,2014,6216,44.18066700000003,-74.48261899999993,47,12076.22905 -6217,2017,6217,42.183213,-75.759885,0,0 -6218,2016,6218,43.099501,-77.342849,0,0 -6219,2015,6219,42.146384000000005,-78.997628,16,9365.035878 -6221,2017,6221,43.119842,-78.74396,0,0 -6223,2019,6223,42.771981,-78.164447,0,0 -6224,2019,6224,42.849406000000016,-75.01114899999997,25,17179.35187 -6225,2013,6225,42.64045000000004,-78.59544199999993,51,18213.42615 -6226,2018,6226,43.07996100000001,-74.21164100000001,15,11922.161018 -6227,2018,6227,42.829631000000006,-78.50518999999998,37,12350.404507 -6228,2014,6228,44.63616299999997,-73.803903,34,8583.706762 -6229,2018,6229,42.22214099999998,-78.51793799999997,42,34465.402962 -6230,2016,6230,43.99984999999998,-73.706403,35,27131.436426 -6231,2016,6231,42.3129,-75.475043,0,0 -6232,2013,6232,43.11568800000001,-77.60090099999998,33,18690.133516 -6233,2014,6233,42.72432,-78.148183,0,0 -6234,2013,6234,43.15579299999998,-75.77264699999998,32,5173.267992 -6235,2013,6235,40.757657,-73.768025,0,0 -6236,2017,6236,42.732202,-74.101434,0,0 -6237,2015,6237,43.34945100000002,-73.96097200000003,30,19529.065958 -6239,2018,6239,44.2167,-75.853845,5,0 -6240,2016,6240,43.818031,-75.635233,0,0 -6242,2013,6242,42.663919999999976,-77.51792500000008,39,0 -6244,2015,6244,42.526166,-74.86233,0,0 -6245,2016,6245,42.536794,-76.327938,0,0 -6246,2014,6246,42.217891,-74.58053999999996,47,25076.549533 -6247,2018,6247,42.630241000000005,-77.23508799999995,32,13714.181960999998 -6248,2013,6248,41.440093,-73.68849,0,0 -6249,2016,6249,43.111255,-77.868506,0,0 -6251,2019,6251,42.302011,-75.09789300000007,42,13764.635153 -6253,2019,6253,42.918389,-76.849271,0,0 -6255,2017,6255,43.68722699999996,-73.633639,41,18233.318378999997 -6256,2014,6256,40.913915,-72.732158,0,0 -6257,2016,6257,43.02259000000002,-74.09266899999999,73,13199.562663999994 -6258,2013,6258,44.832245,-73.510126,7,12267.001298 -6259,2017,6259,44.639364000000064,-74.69669699999996,54,23893.508165 -6261,2019,6261,43.43834900000001,-73.96641899999997,59,21274.733288999996 -6262,2013,6262,41.347023,-73.570522,0,0 -6263,2018,6263,42.458906,-77.339545,0,0 -6264,2016,6264,42.75817800000003,-75.61670300000003,42,19454.760967000002 -6265,2017,6265,42.13136599999999,-74.38568600000004,26,25800.381557 -6267,2013,6267,42.971363,-77.65934300000002,18,6509.007755 -6268,2016,6268,44.673965,-75.068558,0,0 -6270,2015,6270,44.09113100000001,-73.464565,19,11388.0126 -6271,2019,6271,42.36479899999997,-75.655311,38,0 -6272,2019,6272,44.142934999999966,-75.00575799999999,74,12810.341775 -6273,2018,6273,42.594298,-74.726924,0,0 -6274,2015,6274,42.132128,-75.783782,0,0 -6275,2014,6275,44.446484,-75.027152,0,0 -6276,2018,6276,42.19063100000001,-76.96937900000005,28,0 -6278,2019,6278,43.294688,-73.67429,0,0 -6279,2015,6279,42.507011,-77.533942,0,0 -6280,2015,6280,41.631850000000036,-74.58059199999994,62,23596.525637 -6283,2018,6283,43.525257,-73.91918900000005,41,0 -6284,2018,6284,40.779994,-73.417565,0,0 -6285,2013,6285,42.048586,-77.52857,0,0 -6286,2018,6286,44.744072999999965,-73.80918699999992,68,13974.114722 -6288,2014,6288,42.491613000000015,-74.74244300000002,20,11805.210880000002 -6289,2015,6289,42.811732,-75.95764499999999,16,14747.531636 -6290,2016,6290,42.971052,-77.76251,0,0 -6293,2015,6293,42.95653,-73.652587,5,0 -6294,2015,6294,42.141846999999956,-74.05850699999993,95,18082.584332999995 -6295,2018,6295,42.33300799999999,-73.83677,17,4010.188666 -6296,2016,6296,42.48242600000002,-76.15316199999995,68,33856.015329999995 -6297,2013,6297,44.28600899999999,-74.80248699999997,28,11954.830861999999 -6298,2013,6298,42.82329699999998,-73.80032500000002,35,20945.580395 -6300,2013,6300,40.925428,-72.405032,0,0 -6301,2018,6301,42.095401,-78.437669,0,0 -6302,2016,6302,42.888837,-75.498422,0,0 -6304,2013,6304,42.795617,-76.889517,0,0 -6305,2018,6305,41.71027800000001,-73.528632,33,21369.298396 -6306,2019,6306,41.509841,-73.992765,0,0 -6307,2016,6307,42.98793800000001,-78.963507,3,0 -6308,2019,6308,42.22614799999998,-74.07861700000002,49,27514.575949000005 -6310,2017,6310,44.452003999999995,-75.36410100000003,23,7722.5104139999985 -6311,2017,6311,40.562147,-74.09822,0,0 -6312,2019,6312,42.18613000000002,-77.97592400000003,26,0 -6313,2017,6313,42.01218,-73.712165,0,0 -6314,2014,6314,43.11534400000002,-76.09789,22,16099.228277999999 -6315,2014,6315,42.029091,-79.615414,0,0 -6316,2015,6316,43.20262400000001,-77.14878799999998,7,982.96097 -6317,2016,6317,43.472629000000026,-73.26790799999998,34,11588.035280999999 -6318,2015,6318,42.532791,-78.545886,0,0 -6320,2014,6320,41.55401699999997,-74.44690600000003,50,17639.541311999998 -6321,2017,6321,44.57488,-73.96049200000007,65,11075.850011 -6322,2013,6322,42.94718400000005,-73.72140899999992,42,29183.10159 -6323,2019,6323,42.69205800000002,-75.043485,37,26341.564872 -6325,2019,6325,42.88093099999998,-75.27935699999995,33,23178.10515 -6326,2014,6326,43.900142,-75.974891,0,0 -6327,2013,6327,44.36808000000003,-73.65367599999993,61,42388.02010500001 -6328,2017,6328,43.124546,-76.231747,0,0 -6329,2013,6329,42.023451000000016,-79.03440899999995,51,10974.091376999997 -6330,2017,6330,42.29344099999998,-76.53581300000003,61,24060.085075000003 -6331,2015,6331,44.952098,-73.364429,0,0 -6332,2019,6332,42.112525,-79.46058999999998,20,12057.980829 -6333,2015,6333,43.22924400000005,-74.399852,45,20344.991232 -6334,2016,6334,42.36065199999999,-79.38497699999998,25,30672.740406 -6335,2015,6335,42.423506,-75.616665,4,7617.940831999999 -6336,2016,6336,44.269291999999965,-74.38129200000002,41,35437.157875000004 -6337,2016,6337,43.540271,-75.772252,0,0 -6338,2019,6338,44.19037799999999,-74.72478,50,20247.898115 -6339,2013,6339,42.627798999999996,-76.84648700000001,15,4338.519182 -6341,2017,6341,42.64716099999997,-75.79600699999995,83,36155.279542000004 -6342,2019,6342,42.939201,-78.824029,0,0 -6343,2019,6343,43.33215999999998,-73.84536699999992,39,27974.037594 -6344,2019,6344,44.40615900000005,-75.36051300000005,41,9713.836698 -6345,2018,6345,42.311784,-79.375661,0,0 -6347,2016,6347,43.82082100000002,-74.94221399999999,18,8694.007717999999 -6349,2013,6349,42.78379799999998,-74.039486,40,16087.387245000002 -6350,2019,6350,44.80113500000004,-73.7246839999999,67,0 -6351,2018,6351,43.21673,-75.590925,0,0 -6352,2016,6352,42.186746,-78.932593,0,0 -6354,2019,6354,42.144982000000006,-75.647695,10,0 -6356,2018,6356,42.798777,-77.811038,0,0 -6358,2013,6358,44.85962,-74.8106509999999,54,21980.793712000006 -6359,2018,6359,44.17344000000002,-74.58868200000005,53,15420.715185000001 -6360,2015,6360,44.001485,-74.25632600000003,43,22696.797197000004 -6361,2013,6361,42.49097799999996,-74.20982300000006,69,17339.278476 -6362,2016,6362,42.730376,-73.781646,0,0 -6363,2017,6363,42.58938200000007,-77.43828300000004,65,27286.443588999995 -6365,2017,6365,42.989384,-76.470709,0,0 -6366,2013,6366,42.222825,-78.287029,0,0 -6367,2013,6367,44.111805,-76.061294,2,0 -6368,2016,6368,43.171069,-76.423497,0,0 -6369,2019,6369,42.25903700000001,-79.37196799999997,35,30264.916944999997 -6370,2013,6370,42.74072700000004,-77.65619299999996,39,9554.278413 -6371,2016,6371,42.100907,-74.41739200000002,31,46711.06752000001 -6372,2016,6372,42.182367,-76.659288,0,0 -6373,2018,6373,42.011817,-79.170029,0,0 -6374,2019,6374,43.239818,-74.807201,0,0 -6375,2016,6375,42.67961999999996,-77.96836299999997,38,21215.09929 -6376,2013,6376,44.45136699999995,-75.46448400000006,48,4185.693802 -6377,2013,6377,42.534041,-78.867416,0,0 -6378,2014,6378,42.95006400000005,-77.17808700000008,41,13203.99516 -6379,2015,6379,42.320793,-75.156379,0,0 -6380,2014,6380,43.56466500000004,-74.74625999999994,38,20679.628286 -6381,2014,6381,43.426651,-75.57476799999998,13,0 -6382,2019,6382,42.46571100000004,-78.71722099999992,39,19182.22142 -6385,2015,6385,42.77564,-78.599475,0,0 -6387,2014,6387,42.14007500000002,-75.31550199999997,32,19009.064899 -6388,2017,6388,42.380437,-73.823179,0,0 -6389,2019,6389,41.68059900000002,-74.27233800000006,38,37405.363946 -6390,2015,6390,42.413462,-77.424742,0,0 -6392,2014,6392,43.69046699999997,-74.44662199999998,35,20216.283382 -6394,2015,6394,42.33809199999998,-78.03829999999998,28,7350.759897000001 -6395,2018,6395,42.83908,-74.19851000000006,52,23915.661323999997 -6397,2018,6397,42.297235,-76.16451799999999,60,17705.364797000002 -6399,2013,6399,44.108643,-75.884848,0,0 -6400,2014,6400,44.08933900000005,-75.49266399999998,47,14642.845452000001 -6404,2018,6404,42.19665399999999,-74.47265100000003,24,5841.849136000001 -6405,2018,6405,42.25434800000002,-73.51189500000004,23,7383.261413 -6406,2017,6406,42.436127,-78.934959,0,0 -6407,2017,6407,42.85331199999998,-75.66924199999995,32,18452.676671999998 -6408,2013,6408,42.45554199999997,-76.02647900000007,46,9159.952462000001 -6410,2015,6410,43.62227499999996,-74.26542599999996,47,24933.959002999996 -6411,2015,6411,40.75848,-73.866939,0,0 -6413,2013,6413,42.860078,-77.197387,0,0 -6414,2013,6414,43.27327,-77.137196,0,0 -6416,2017,6416,44.38647600000001,-75.42942800000002,6,6163.275922000001 -6417,2017,6417,42.013599,-77.22449600000002,11,2692.36029 -6418,2014,6418,44.68635200000006,-74.15847200000006,87,9589.467178000003 -6419,2019,6419,43.97777399999999,-74.701368,16,1347.282883 -6421,2017,6421,42.506578,-75.802467,40,26612.897544000003 -6422,2018,6422,43.81189599999998,-75.04631900000003,48,27479.508699 -6423,2019,6423,41.441643,-73.912366,24,17949.996293 -6426,2016,6426,43.93923999999999,-75.19891499999996,54,12019.225503999998 -6428,2018,6428,42.087112999999995,-76.47300399999999,11,12344.436701999999 -6431,2016,6431,41.562718000000004,-74.803781,15,0 -6432,2013,6432,42.063129000000046,-74.49538399999999,39,39319.144841 -6433,2016,6433,44.49007500000006,-74.43093700000007,95,19357.934557 -6434,2016,6434,41.240089,-74.080246,0,0 -6435,2014,6435,42.273467999999994,-73.808497,17,32812.276367 -6436,2016,6436,40.952492,-73.39245499999998,20,9322.650470999999 -6440,2019,6440,42.595391,-75.778675,0,0 -6442,2013,6442,42.26854299999999,-74.218562,58,16940.862465 -6443,2015,6443,42.734987,-73.596535,3,862.814421 -6445,2018,6445,41.707002,-74.437393,0,0 -6446,2019,6446,42.972463000000005,-76.98797699999994,38,22590.107932 -6447,2018,6447,42.694343000000046,-76.45035999999996,38,19009.028411 -6448,2017,6448,42.216241999999994,-77.50813700000002,18,0 -6451,2015,6451,42.008394,-75.433489,5,1430.02841 -6452,2019,6452,42.893113,-74.68254,0,0 -6454,2017,6454,42.119452,-78.933042,0,0 -6458,2018,6458,42.052186,-77.92049399999999,12,4814.814354 -6459,2013,6459,43.211361,-75.716425,0,0 -6461,2015,6461,43.188306,-77.651554,0,0 -6462,2014,6462,44.718922999999954,-73.55836899999996,52,3346.498864 -6463,2017,6463,44.386081999999995,-73.519546,39,12712.735200999998 -6464,2013,6464,42.862961,-78.796672,0,0 -6465,2016,6465,43.82986499999997,-73.71396800000001,47,24345.130650000003 -6466,2019,6466,43.103195,-76.567438,0,0 -6467,2018,6467,42.73294099999999,-74.67468899999996,38,37153.802306 -6468,2014,6468,42.56268599999999,-78.25140499999999,13,5719.651854000001 -6469,2015,6469,42.908517,-77.674179,0,0 -6470,2017,6470,44.82920199999994,-74.91674299999987,62,25531.834580000002 -6471,2013,6471,43.080873,-78.146552,0,0 -6473,2019,6473,42.360549,-79.420621,11,9700.437740000001 -6474,2014,6474,43.049426,-77.250329,0,0 -6475,2014,6475,43.279512,-77.089417,0,0 -6476,2018,6476,42.194956,-74.803368,0,0 -6479,2018,6479,43.126522,-78.005787,0,0 -6480,2017,6480,43.99181799999998,-75.14605299999995,43,37766.290116 -6482,2016,6482,43.124992,-73.328977,0,0 -6483,2019,6483,41.278324,-73.587067,0,0 -6484,2016,6484,42.786152,-74.640627,0,0 -6487,2013,6487,42.23534300000004,-74.74624399999996,48,19028.498183 -6489,2019,6489,42.105447,-79.063021,5,1184.383943 -6492,2018,6492,43.75519200000004,-75.80030200000007,51,22578.143815 -6493,2017,6493,41.010846,-73.66713,0,0 -6494,2018,6494,42.657973,-78.228669,0,0 -6495,2013,6495,43.195037,-75.223198,1,0 -6496,2014,6496,41.69799799999999,-73.64144899999998,16,7940.059465 -6497,2014,6497,41.47893700000001,-74.63976999999997,31,17337.782047 -6498,2019,6498,42.97893199999999,-73.92259599999993,38,20420.747964000002 -6499,2018,6499,41.414150000000014,-74.61099299999992,40,30046.910551000004 -6502,2018,6502,44.17406599999992,-73.69460599999992,123,20261.371921999995 -6503,2019,6503,43.52911300000003,-74.08265,34,29492.103075 -6505,2018,6505,43.31247,-76.241807,0,0 -6506,2014,6506,43.913564,-73.428207,2,3812.528467 -6507,2014,6507,40.89277700000002,-72.58299400000003,71,11030.725811 -6508,2013,6508,43.45771800000006,-74.66320300000004,47,13932.041956999998 -6509,2014,6509,42.386512999999965,-74.42987700000002,56,35883.558067 -6510,2019,6510,43.196525000000015,-74.538775,39,30490.148157 -6511,2013,6511,41.786494,-75.036343,0,0 -6513,2016,6513,43.030741,-78.746292,0,0 -6515,2016,6515,42.06805800000002,-78.50789500000008,47,23422.275213 -6517,2019,6517,43.293676,-76.770928,0,0 -6518,2018,6518,42.99117,-78.806474,0,0 -6519,2013,6519,42.41825799999998,-76.23549300000002,41,33800.414983 -6520,2014,6520,42.381325,-73.725057,0,0 -6521,2013,6521,41.54707,-73.95064,0,0 -6522,2018,6522,43.95180299999997,-73.59793200000001,46,16251.649460999999 -6523,2014,6523,44.801862,-74.973085,0,0 -6524,2019,6524,43.489898000000025,-76.09175999999994,42,18154.354548999996 -6526,2015,6526,43.928715000000004,-74.98315199999993,43,20708.103644 -6527,2015,6527,42.849044,-77.150569,0,0 -6529,2018,6529,43.85688899999997,-74.99224199999993,44,8209.363539 -6530,2018,6530,41.389007,-74.27285400000001,33,23790.465881 -6531,2018,6531,41.990314,-75.12463,0,0 -6532,2013,6532,41.691793,-74.93310099999998,7,1688.517366 -6533,2018,6533,42.60320099999998,-73.34700199999999,34,16395.288262000002 -6534,2017,6534,44.64145800000005,-73.90656500000016,88,13744.395282999998 -6536,2014,6536,44.853291000000006,-73.439353,6,0 -6538,2017,6538,41.64739400000001,-75.025846,35,12552.139179999998 -6539,2013,6539,42.42977799999997,-79.043545,22,16446.890100000004 -6540,2014,6540,43.923613,-75.715274,0,0 -6541,2016,6541,42.938099,-73.619957,16,34798.210681 -6542,2018,6542,42.857285,-75.432392,0,0 -6543,2017,6543,42.702516000000024,-74.8545089999999,53,26355.604584 -6544,2017,6544,42.605399,-77.80177899999998,15,13306.25604 -6545,2015,6545,42.375067,-77.05277,0,0 -6546,2014,6546,44.286254000000014,-76.11345200000004,29,33130.60132 -6547,2017,6547,44.713655,-73.656762,0,0 -6548,2014,6548,43.25790599999998,-75.49249400000005,65,19774.12056 -6549,2013,6549,42.14358599999998,-78.460462,53,19959.455144999996 -6550,2016,6550,44.75922399999993,-73.84957699999997,91,9422.523242 -6552,2014,6552,41.55095400000002,-74.60280800000004,24,11393.984897 -6553,2013,6553,44.880137,-74.25040399999999,19,4074.1092050000007 -6554,2019,6554,44.44369499999997,-73.87218500000006,59,23983.969483999997 -6555,2014,6555,43.851906,-75.627622,0,0 -6556,2017,6556,43.10475399999997,-75.71555799999996,60,0 -6558,2019,6558,43.53263999999994,-75.03141299999996,77,17667.155898 -6559,2015,6559,41.81603799999998,-73.93924299999993,45,40485.410901999996 -6561,2018,6561,44.29477199999998,-73.94015099999991,47,12718.334571 -6563,2017,6563,42.66703000000001,-73.42554900000003,34,7788.010739000001 -6567,2019,6567,44.94455399999998,-73.63873199999995,35,1969.770883 -6568,2019,6568,42.448004000000005,-78.00799300000007,59,23704.225391 -6569,2017,6569,44.20338799999995,-76.061859,61,14980.772584999997 -6570,2019,6570,42.263832000000036,-76.582989,46,20426.069260999997 -6573,2015,6573,41.517115,-73.73601,0,0 -6574,2015,6574,43.216021999999995,-75.50945299999994,28,12233.706801 -6575,2013,6575,41.584619000000004,-73.693165,0,0 -6576,2017,6576,44.643912,-74.83853,71,14210.129352 -6577,2016,6577,42.934168,-73.916224,0,0 -6578,2019,6578,44.848527999999995,-73.69670499999997,39,10900.791462000001 -6579,2018,6579,43.095614999999974,-78.732759,34,0 -6580,2014,6580,42.936276000000014,-75.33853799999999,19,3146.5679840000003 -6582,2015,6582,42.831629,-78.049453,0,0 -6583,2018,6583,41.361535,-73.945655,33,16437.875021999997 -6584,2014,6584,42.995933,-73.293105,0,0 -6585,2013,6585,42.634105,-78.758275,0,0 -6586,2016,6586,42.149564,-77.199426,9,5384.925912 -6587,2018,6587,43.36599299999996,-75.85356300000001,38,29917.610886999995 -6589,2018,6589,43.133399,-73.980378,0,0 -6590,2015,6590,42.692066,-74.580354,0,0 -6591,2017,6591,42.438788,-75.207725,0,0 -6592,2019,6592,44.46673699999999,-73.804982999999993,49,13835.461021 -6594,2016,6594,41.527696,-74.17168799999999,15,17385.438274 -6596,2015,6596,42.02002599999997,-78.69938499999994,42,29193.279317000004 -6597,2013,6597,42.907607,-76.639939,0,0 -6599,2019,6599,42.84099500000001,-73.46627,10,2607.059249 -6600,2015,6600,43.180479999999996,-77.27439300000002,34,22753.846673 -6601,2016,6601,42.865112,-78.556773,0,0 -6603,2014,6603,44.52297799999999,-74.80021799999999,38,34.979529 -6605,2013,6605,42.656072,-74.57982700000001,49,0 -6606,2014,6606,42.932807,-77.823983,0,0 -6607,2013,6607,42.93729099999999,-75.91091400000002,18,0 -6608,2016,6608,43.032862000000016,-74.22494099999996,49,15693.000806 -6609,2017,6609,42.87062799999996,-74.61208799999999,40,25127.370946000003 -6610,2019,6610,42.134463,-76.585523,4,375.751515 -6611,2014,6611,43.25354599999998,-74.74753499999998,40,26838.327453 -6612,2018,6612,43.37157099999999,-74.0535,30,18028.216622 -6613,2016,6613,43.836276,-73.81298299999993,47,26401.962489000005 -6614,2015,6614,43.174545,-78.881887,2,338.111154 -6615,2019,6615,43.032355,-78.858594,0,0 -6617,2015,6617,44.253724000000076,-73.70237300000007,127,10076.862691 -6620,2017,6620,41.14424,-73.768608,0,0 -6621,2013,6621,42.29407300000002,-79.47617099999994,37,11062.846865 -6624,2014,6624,41.586589,-74.093938,29,18312.773301 -6625,2019,6625,43.913599,-74.02745,0,0 -6627,2014,6627,42.453861,-79.00144,15,7204.162421000001 -6628,2014,6628,43.97959999999994,-73.89703999999996,47,7035.67709 -6630,2017,6630,44.19155900000003,-74.06266599999998,36,37146.951251 -6631,2018,6631,43.342241,-75.497473,10,87.68017 -6632,2018,6632,42.31590299999999,-74.40311000000003,36,20936.28288 -6633,2013,6633,43.41442400000004,-75.05385699999992,47,33173.836795 -6634,2019,6634,43.201583000000014,-76.25367099999997,14,4013.964802 -6636,2016,6636,43.041716,-76.326343,0,0 -6637,2014,6637,42.88895100000001,-74.82053799999997,17,18543.847372 -6640,2016,6640,43.22857300000002,-74.92758400000001,33,17108.281906 -6641,2014,6641,44.58226100000001,-74.05664100000006,73,14504.982628999995 -6642,2013,6642,41.910199,-73.71426199999998,29,6971.427997000001 -6643,2019,6643,40.697328,-73.987269,0,0 -6644,2019,6644,44.68028599999999,-73.83953699999995,52,31746.497998000003 -6645,2016,6645,42.62537700000002,-77.27464700000006,42,45209.354827 -6647,2019,6647,43.40477,-76.085169,0,0 -6649,2018,6649,42.968785,-78.427299,0,0 -6652,2014,6652,42.20008900000001,-76.88403599999998,18,17165.4045 -6654,2019,6654,43.069397,-75.878577,0,0 -6655,2013,6655,42.03995499999999,-77.376884,26,11629.661656000002 -6656,2013,6656,44.448625,-75.42124700000002,33,22351.943152 -6657,2016,6657,45.00018999999998,-73.47506599999998,29,9008.998251 -6658,2014,6658,42.540263,-78.015035,8,3817.929927 -6660,2016,6660,42.52940199999999,-78.38836300000001,27,4929.960349 -6661,2018,6661,43.24227300000002,-76.31362900000008,43,18388.686949 -6663,2019,6663,43.146084,-77.592098,0,0 -6664,2015,6664,43.148229,-75.696606,0,0 -6665,2018,6665,44.55388900000002,-74.90174900000004,25,0 -6666,2014,6666,43.998677,-76.099397,0,0 -6667,2016,6667,43.43183999999999,-75.68036400000004,23,6364.841453000001 -6668,2017,6668,43.332175,-75.905664,0,0 -6669,2016,6669,42.98011399999999,-78.463327,16,0 -6672,2013,6672,42.113031,-77.479466,0,0 -6674,2018,6674,42.521076,-78.670542,0,0 -6675,2017,6675,43.075323,-77.851001,0,0 -6676,2014,6676,41.28549499999998,-74.12551699999997,14,13868.086712 -6678,2017,6678,42.820793,-78.756524,0,0 -6679,2019,6679,44.484375,-74.48622999999999,37,6530.277660999999 -6682,2016,6682,44.039510000000014,-75.31469199999995,44,28430.974753000002 -6683,2013,6683,44.15488099999997,-73.80266799999998,67,18565.374657000004 -6684,2013,6684,41.20940900000002,-73.81964100000003,22,17628.226822999997 -6686,2019,6686,44.582090000000036,-75.564699,49,0 -6689,2015,6689,42.436015,-77.780119,0,0 -6690,2014,6690,43.386993000000025,-76.33831999999997,40,0 -6692,2013,6692,42.325180999999986,-78.59168600000005,37,13371.654020999998 -6693,2013,6693,42.30739900000002,-76.83380600000007,44,36329.61185 -6696,2019,6696,44.516886,-73.551352,9,1993.8716749999999 -6697,2014,6697,42.976622,-75.126729,0,0 -6700,2016,6700,42.931635,-74.368655,0,0 -6701,2016,6701,42.166179,-77.524407,12,3856.038692 -6702,2017,6702,42.009685999999995,-78.303892,6,0 -6703,2019,6703,43.506641999999985,-74.6577900000001,65,19897.533503 -6704,2019,6704,42.15255499999995,-76.11423999999992,47,0 -6707,2013,6707,44.478518,-74.893415,12,2192.276885 -6710,2017,6710,43.28728499999992,-75.2238719999999,75,22168.732889 -6711,2015,6711,43.30531299999999,-73.86129800000002,27,13548.91383 -6712,2013,6712,43.692478999999935,-74.69325000000006,97,11837.443986 -6713,2017,6713,44.62661100000002,-73.709527,69,23451.690252000004 -6714,2013,6714,42.28537600000003,-77.13023800000006,45,21951.944774 -6715,2019,6715,44.617942000000035,-74.03240499999995,47,21491.251618 -6716,2013,6716,42.76577099999999,-75.67004799999998,13,22.630812 -6717,2016,6717,41.043152,-73.834167,0,0 -6720,2016,6720,43.093419,-76.104363,0,0 -6721,2013,6721,43.29265000000001,-74.5140110000001,59,16532.546499 -6722,2017,6722,42.53792900000002,-73.89829099999999,42,22690.526604 -6723,2019,6723,42.03910900000004,-78.56872800000005,41,21682.247489 -6724,2014,6724,44.29011799999995,-74.05864100000005,48,29405.412719000004 -6725,2018,6725,42.67735800000001,-74.83590699999999,16,5238.297699000001 -6726,2017,6726,42.43690800000001,-78.58338300000007,67,0 -6728,2015,6728,43.747936999999986,-74.79587400000001,14,7478.513236 -6729,2016,6729,44.19516400000004,-73.71942900000006,64,19578.49697 -6732,2016,6732,43.668365,-74.47236300000004,28,19407.875192 -6734,2013,6734,43.964581,-75.089593,0,0 -6735,2017,6735,43.935226999999955,-73.69992600000012,58,24914.699010000004 -6736,2017,6736,44.10816,-75.910198,0,0 -6737,2017,6737,43.858825,-75.401494,0,0 -6738,2018,6738,44.114550999999935,-75.34933500000002,126,0 -6739,2019,6739,43.028495,-73.82185200000002,6,23.164419 -6740,2016,6740,41.901127,-73.80302200000004,29,12642.837276999997 -6741,2019,6741,42.50417399999998,-74.80392799999993,51,29652.899453000002 -6743,2019,6743,41.962516999999984,-74.61205699999996,43,30674.87359799999 -6745,2015,6745,41.226564,-74.238563,9,639.431651 -6746,2019,6746,43.209239,-73.547009,0,0 -6747,2013,6747,43.54559799999998,-74.79168399999996,39,0 -6749,2013,6749,43.65105999999997,-74.166291,42,26172.551699 -6751,2015,6751,43.098525,-77.526696,0,0 -6753,2014,6753,42.292912,-76.251825,0,0 -6754,2019,6754,42.570141,-78.808416,0,0 -6756,2018,6756,42.738513000000005,-73.290496,8,7171.109582 -6757,2015,6757,43.18784899999998,-74.21033200000007,39,5084.856627000001 -6758,2014,6758,43.410104000000025,-74.65844299999998,36,27093.397483 -6760,2016,6760,42.32662400000002,-73.51438600000003,29,10685.101429 -6761,2016,6761,42.17111300000003,-74.58941099999996,55,12360.035434999998 -6765,2013,6765,44.683970999999964,-74.32407700000012,65,7147.906508999999 -6766,2015,6766,43.300492000000006,-74.18374900000002,13,6857.661219 -6767,2014,6767,43.11470199999999,-74.71468199999991,65,40326.075358 -6768,2014,6768,43.98560999999997,-75.00769799999995,29,18523.070911 -6865,2016,6865,43.934718,-76.085499,0,0 -7190,2015,7190,44.34312,-74.37969400000004,37,23661.945535 -7260,2016,7260,42.85321999999999,-76.40779700000003,24,20269.242652 -7285,2016,7285,42.560854,-78.721751,0,0 -7360,2016,7360,44.187870000000004,-74.29400900000007,53,23352.022221999996 -7405,2017,7405,41.867451,-73.81796499999999,15,14022.468771 -7516,2015,7516,44.18241799999999,-76.28103,12,4089.698648 -7534,2017,7534,42.803229,-74.947813,0,0 -7536,2016,7536,44.01466600000001,-76.24086100000004,34,8101.220922 -7544,2018,7544,42.13398700000002,-76.83841400000004,24,9640.066178000001 -7556,2015,7556,43.96325399999998,-74.46304400000005,52,25624.738651999996 -7661,2016,7661,42.550115,-77.31972899999994,50,11608.372610999999 -7716,2015,7716,44.72430499999998,-74.26080099999993,46,35151.892245 -7962,2016,7962,43.295509000000024,-73.98199999999999,52,22424.858382000002 -8128,2015,8128,44.465712,-75.762883,9,498.996533 -8206,2017,8206,41.550174,-73.693782,0,0 -8207,2018,8207,42.662059,-75.79252700000005,46,22975.534501 -8208,2017,8208,43.230751,-79.019185,0,0 -8209,2016,8209,43.095497,-74.866138,0,0 -8211,2015,8211,44.495694,-74.26322100000007,55,22545.444122999997 -8212,2017,8212,40.722572,-73.164571,0,0 -8213,2018,8213,43.415578000000025,-75.12669199999999,35,15799.319706 -8214,2017,8214,44.126449999999984,-76.26720900000007,48,10896.48628 -8215,2018,8215,43.19973199999999,-74.19777,11,12546.217052999998 -8216,2017,8216,41.73476299999998,-74.24033599999997,40,35557.545375 -8217,2016,8217,40.909192,-73.540862,0,0 -8218,2017,8218,40.59018,-73.582985,0,0 -8220,2017,8220,40.953627,-72.486537,0,0 -8221,2017,8221,43.943026,-74.60438,0,0 -8222,2019,8222,40.759731,-72.90122,0,0 -8223,2018,8223,43.348892,-74.58247,0,0 -8224,2013,8224,40.588388,-73.676187,0,0 -8225,2015,8225,41.982369,-74.163088,0,0 -8226,2016,8226,43.225354,-75.994246,0,0 -8227,2018,8227,43.865703,-75.165746,0,0 -8229,2018,8229,43.592058,-74.99061,0,0 -8230,2019,8230,40.666316,-74.026840000000007,0,0 -8231,2017,8231,40.708729,-73.007885,0,0 -8232,2016,8232,40.588295,-73.784473,0,0 -8233,2017,8233,44.837603,-75.279744,0,0 -8234,2016,8234,44.593692,-75.491572,0,0 -8235,2019,8235,42.663486,-78.079647,0,0 -8236,2014,8236,43.08171,-79.006409,0,0 -8237,2019,8237,44.286295,-75.817366,0,0 -8238,2018,8238,41.07157800000002,-72.360148,22,17016.984569 -8239,2017,8239,42.469709,-77.146435,0,0 -8240,2018,8240,42.609471,-76.884435,0,0 -8241,2016,8241,43.182909,-75.857898,0,0 -8242,2018,8242,40.903697,-72.462273,0,0 -8243,2019,8243,43.914908,-74.948451,0,0 -8244,2015,8244,43.842387,-74.883572,0,0 -8245,2019,8245,41.056085,-73.905902,0,0 -8246,2019,8246,44.05387,-75.790262,0,0 -8247,2018,8247,43.893051999999955,-73.65086400000007,49,11969.216354000004 -8248,2013,8248,40.827977,-73.706063,0,0 -8249,2014,8249,42.112837,-79.743286,0,0 -8250,2018,8250,42.832245,-77.265105,0,0 -8251,2013,8251,42.748719,-76.725287,0,0 -8252,2015,8252,44.179993,-75.809216,0,0 -8253,2017,8253,42.350997,-78.988564,0,0 -8254,2017,8254,44.8299,-73.410469,0,0 -8255,2017,8255,40.643925,-73.349121,0,0 -8256,2014,8256,43.223179,-76.036265,0,0 -8257,2019,8257,40.71913,-72.904737,0,0 -8258,2014,8258,41.057174,-71.895621,0,0 -8259,2019,8259,42.734225,-76.96576,0,0 -8260,2013,8260,44.876167,-75.163707,0,0 -8261,2017,8261,43.871627,-74.650112,0,0 -8262,2014,8262,44.269327,-75.744395,0,0 -8263,2018,8263,40.637067,-73.538765,0,0 -8264,2018,8264,43.746614,-74.901485,0,0 -8265,2018,8265,40.933446000000004,-73.314386,0,0 -8266,2015,8266,40.630715,-73.444903,0,0 -8267,2015,8267,44.445304,-75.802684,0,0 -8268,2013,8268,40.648561,-73.5025,0,0 -8269,2016,8269,44.423193,-73.346656,0,0 -8270,2018,8270,41.940374,-74.245933,0,0 -8271,2019,8271,40.891261,-73.168472,0,0 -8272,2019,8272,43.724062000000004,-74.332561,0,0 -8273,2016,8273,43.526747,-73.641181,0,0 -8274,2017,8274,41.35374,-74.145513,0,0 -8275,2013,8275,43.954338,-74.14391500000008,44,15662.476458000001 -8276,2017,8276,42.426281,-77.171101,0,0 -8277,2015,8277,44.481285,-73.313474,0,0 -8278,2017,8278,40.654086,-73.218362,0,0 -8279,2016,8279,44.292147,-76.050954,0,0 -8280,2014,8280,44.149275,-73.420136,0,0 -8281,2013,8281,43.668228,-74.343378,0,0 -8282,2013,8282,44.935165,-74.932312,1,231.48295 -8283,2016,8283,40.979128,-72.452798,0,0 -8284,2015,8284,42.852472,-76.494816,0,0 -8285,2013,8285,44.473935,-75.638267,0,0 -8286,2014,8286,43.410008,-74.558603,0,0 -8287,2017,8287,42.066462,-75.612362,0,0 -8288,2017,8288,41.186166,-73.938873,0,0 -8289,2014,8289,40.803778,-73.878931,0,0 -8290,2017,8290,44.269404,-76.037866,0,0 -8291,2017,8291,43.227787,-73.765309,0,0 -8292,2016,8292,44.04473,-76.160251,0,0 -8293,2015,8293,43.1978,-75.876279,0,0 -8294,2019,8294,44.350063,-74.619481,0,0 -8295,2018,8295,42.784738,-77.284176,0,0 -8296,2017,8296,42.07160999999998,-79.51262099999995,44,33429.800159000006 -8297,2015,8297,40.734478,-73.919653,0,0 -8298,2015,8298,44.474875,-73.376055,0,0 -8299,2014,8299,42.46879,-76.528423,0,0 -8300,2016,8300,43.64950900000001,-75.393894,7,16752.589 -8301,2016,8301,44.964307,-74.952509,0,0 -8302,2015,8302,41.212475,-73.931402,0,0 -8303,2013,8303,43.441856,-76.499324,0,0 -8304,2019,8304,44.57267099999996,-74.28998,50,9244.903777 -8305,2014,8305,42.042579,-78.918826,0,0 -8306,2014,8306,43.541714,-73.625091,0,0 -8307,2016,8307,44.146785,-75.390566,0,0 -8308,2018,8308,40.984222,-73.777808,0,0 -8309,2017,8309,42.129525,-75.9949,0,0 -8310,2019,8310,42.795449,-76.762157,0,0 -8311,2015,8311,44.482503,-73.706083,0,0 -8312,2019,8312,40.971821,-72.426277,0,0 -8313,2015,8313,42.252036,-79.488603,0,0 -8314,2017,8314,42.404245,-74.00087,0,0 -8315,2018,8315,44.621973,-75.614518,0,0 -8316,2018,8316,41.023899,-72.450322,0,0 -8317,2018,8317,42.701373,-77.358626,0,0 -8318,2013,8318,42.705483,-76.931232,0,0 -8319,2017,8319,42.482983,-73.9452,0,0 -8320,2015,8320,42.358565,-76.724515,0,0 -8321,2018,8321,43.82231299999999,-74.66791400000004,27,1702.858065 -8322,2019,8322,43.548001,-74.599823,0,0 -8323,2018,8323,44.597249,-73.418062,0,0 -8324,2014,8324,40.957144,-73.085368,0,0 -8325,2016,8325,41.839367,-74.033645,0,0 -8326,2017,8326,44.004186,-73.627464,0,0 -8327,2015,8327,40.597991,-73.470405,0,0 -8328,2017,8328,43.56525,-74.811502,0,0 -8329,2015,8329,43.739605,-73.453253,0,0 -8330,2015,8330,41.416936,-73.586562,0,0 -8331,2017,8331,43.52153300000001,-75.08768899999997,32,24865.131916000002 -8332,2018,8332,43.202256,-75.756525,0,0 -8333,2016,8333,44.428299,-73.3232,0,0 -8334,2014,8334,42.071097,-75.338613,0,0 -8335,2015,8335,42.81144,-77.721216,0,0 -8336,2013,8336,40.84793,-72.475132,0,0 -8337,2014,8337,44.938653,-74.948412,0,0 -8338,2015,8338,40.95852,-72.538686,0,0 -8339,2014,8339,43.345443,-76.708965,0,0 -8340,2015,8340,40.704957,-73.08085,0,0 -8341,2016,8341,43.943099,-75.710359,0,0 -8342,2014,8342,40.683356,-73.245586,0,0 -8343,2017,8343,42.968155,-78.641262,5,0 -8344,2015,8344,43.424898,-73.704269,0,0 -8345,2017,8345,40.837833,-73.75705,0,0 -8346,2015,8346,42.706544,-76.737306,0,0 -8347,2016,8347,40.932316,-72.539098,0,0 -8348,2014,8348,44.489468,-75.76443,0,0 -8349,2015,8349,44.210687,-74.468049,0,0 -8350,2019,8350,44.511902,-75.467948,0,0 -8351,2018,8351,42.854844,-76.349466,0,0 -8352,2019,8352,43.819025,-74.431285,0,0 -8353,2014,8353,40.871436,-73.660966,0,0 -8354,2013,8354,43.284922,-77.663167,0,0 -8355,2014,8355,44.705222,-73.394401,0,0 -8356,2019,8356,40.92953200000001,-72.62940000000002,28,15637.781328000001 -8357,2013,8357,44.742593,-73.373815,0,0 -8358,2018,8358,43.205245,-75.983103,0,0 -8359,2015,8359,42.658624,-76.879289,0,0 -8360,2019,8360,43.139003,-74.166157,0,0 -8361,2019,8361,40.621693,-73.884019,0,0 -8362,2019,8362,44.804248,-73.348415,0,0 -8363,2018,8363,41.1156,-73.947607,0,0 -8364,2017,8364,43.427691,-76.028286,0,0 -8365,2014,8365,44.90154,-75.13055,0,0 -8366,2015,8366,43.888866,-73.379486,0,0 -8367,2013,8367,42.825612,-76.529331,0,0 -8368,2014,8368,42.314952,-79.586215,0,0 -8369,2016,8369,42.102594,-75.291593,0,0 -8370,2018,8370,43.365144,-78.182572,0,0 -8371,2015,8371,44.55715199999999,-73.86607899999998,17,7883.470875000001 -8372,2017,8372,42.69675800000003,-77.89182600000002,62,45775.027994000004 -8373,2011,8373,43.765597,-73.787572,0,0 -8374,2014,8374,43.259882,-74.581752,0,0 -8375,2018,8375,42.112847,-73.779697,0,0 -8376,2019,8376,40.998207,-72.387009,0,0 -8377,2017,8377,42.809767,-76.962882,0,0 -8378,2017,8378,44.454658,-75.507961,0,0 -8379,2014,8379,42.683451999999996,-79.03438000000001,10,6590.061356 -8380,2016,8380,44.017808,-73.433546,0,0 -8381,2018,8381,42.930657,-76.098302,0,0 -8382,2019,8382,43.880605,-74.606568,0,0 -8383,2019,8383,40.707741,-73.099885,0,0 -8384,2018,8384,43.463488,-75.224401,0,0 -8385,2018,8385,44.936652,-74.991967,0,0 -8386,2014,8386,41.046778,-73.895798,0,0 -8387,2019,8387,44.552504,-73.405336,0,0 -8388,2014,8388,41.192862,-74.11792,0,0 -8389,2016,8389,42.118925,-79.346795,0,0 -8390,2018,8390,42.021811,-75.444949,0,0 -8391,2014,8391,40.636727,-73.812834,0,0 -8392,2017,8392,42.949564,-78.932971,0,0 -8393,2019,8393,41.456573,-73.85313,0,0 -8394,2018,8394,42.664253,-76.673613,0,0 -8395,2015,8395,43.597261,-73.589241,0,0 -8396,2015,8396,40.698874000000004,-73.217233,0,0 -8397,2017,8397,44.166183,-74.811948,0,0 -8398,2019,8398,42.518465,-76.548341,0,0 -8399,2015,8399,44.329388,-73.971433,0,0 -8400,2016,8400,42.085325,-74.937581,0,0 -8401,2014,8401,44.86947,-75.259991,0,0 -8402,2016,8402,42.614309,-76.888627,0,0 -8403,2014,8403,44.974234,-73.339619,0,0 -8404,2018,8404,43.84771,-76.23878399999998,33,2999.2215489999994 -8405,2015,8405,42.914883,-76.395443,0,0 -8406,2016,8406,42.779564,-76.292403,0,0 -8407,2016,8407,44.634583,-73.382241,0,0 -8408,2016,8408,43.343348,-78.06773,0,0 -8409,2016,8409,40.736034,-73.008119,0,0 -8410,2019,8410,40.680587,-73.139752,0,0 -8411,2017,8411,43.188632,-75.989118,0,0 -8412,2016,8412,41.970405,-73.967272,0,0 -8413,2016,8413,40.659392,-73.062411,0,0 -8414,2016,8414,44.041884,-74.378961,0,0 -8415,2019,8415,43.09193,-76.175231,0,0 -8416,2015,8416,41.812565,-74.464983,0,0 -8418,2019,8418,41.784979,-73.803246,6,6735.152692000001 -8419,2016,8419,40.831739,-73.82399,0,0 -8420,2016,8420,42.091368,-73.514587,0,0 -8421,2018,8421,43.857874999999986,-74.41939899999991,53,21872.223708 -8422,2018,8422,43.52817800000001,-75.751588,17,0 -8423,2015,8423,42.519225,-76.10320099999994,30,24499.877983999995 -8424,2018,8424,42.429273,-76.993323,3,628.4940590000001 -8425,2018,8425,42.066679000000015,-75.77616200000006,35,25860.175958 -8426,2015,8426,40.861668,-72.57971399999998,33,21421.677023 -8427,2016,8427,40.886822,-73.09498,0,0 -8428,2016,8428,44.72630900000001,-73.51992799999995,23,994.1102599999998 -8429,2013,8429,43.73482999999999,-74.09309500000009,74,23394.288877 -8430,2013,8430,42.727608,-77.303056,0,0 -8431,2015,8431,43.228277,-77.83984000000007,54,43939.941140999996 -12876,2019,12876,42.496281999999994,-76.80972900000005,29,19239.641334 -12880,2014,12880,42.504341999999966,-76.73483200000007,44,31071.76395499999 -12950,2019,12950,42.565503,-76.753493,0,0 -18174,2015,18174,42.54239200000002,-76.75500000000007,49,17968.153032999995 -18175,2017,18175,42.510056999999975,-76.791792,43,39476.158423 -18181,2016,18181,42.46546899999999,-76.78056299999994,42,42723.837552000005 -19022,2017,19022,42.31418099999998,-74.77679200000007,39,8924.774728 -19023,2018,19023,43.799499,-73.809766,0,0 -19024,2018,19024,42.791903,-74.45252,0,0 -19025,2017,19025,43.349271999999985,-74.05068699999997,37,14369.383202 -19026,2017,19026,42.548478,-76.565908,0,0 -19027,2017,19027,41.718578,-74.799718,0,0 -19028,2019,19028,42.616524999999974,-75.29777800000006,41,0 diff --git a/content/learn/work/nested-resampling/figs/choose-1.svg b/content/learn/work/nested-resampling/figs/choose-1.svg deleted file mode 100644 index bfa43df1..00000000 --- a/content/learn/work/nested-resampling/figs/choose-1.svg +++ /dev/null @@ -1,93 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -10 -20 -30 - - - - - - - - - - - - - - - -0.25 -0.5 -1 -2 -4 -8 -16 -32 -64 -128 -256 -SVM Cost -count - - diff --git a/content/learn/work/nested-resampling/figs/not-nested-1.svg b/content/learn/work/nested-resampling/figs/not-nested-1.svg deleted file mode 100644 index e63c8725..00000000 --- a/content/learn/work/nested-resampling/figs/not-nested-1.svg +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -2.6 -2.8 -3.0 -3.2 -3.4 - - - - - - - - -1 -8 -64 -SVM Cost -RMSE - - diff --git a/content/learn/work/nested-resampling/figs/rmse-plot-1.svg b/content/learn/work/nested-resampling/figs/rmse-plot-1.svg deleted file mode 100644 index de190133..00000000 --- a/content/learn/work/nested-resampling/figs/rmse-plot-1.svg +++ /dev/null @@ -1,162 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -3.0 -3.5 - - - - - -1 -8 -64 -SVM Cost -Inner RMSE - - diff --git a/content/learn/work/nested-resampling/index.Rmarkdown b/content/learn/work/nested-resampling/index.Rmarkdown deleted file mode 100755 index c904251d..00000000 --- a/content/learn/work/nested-resampling/index.Rmarkdown +++ /dev/null @@ -1,271 +0,0 @@ ---- -title: "Nested resampling" -tags: [rsample, parsnip] -categories: [tuning] -type: learn-subsection -weight: 2 -description: | - Estimate the best hyperparameters for a model using nested resampling. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - - -```{r load, include=FALSE} -library(tidymodels) -library(scales) -library(mlbench) -library(kernlab) -library(furrr) - -pkgs <- c("tidymodels", "scales", "mlbench", "kernlab", "furrr") - -theme_set(theme_bw() + theme(legend.position = "top")) -set.seed(1234) -``` - -## Introduction - -`r req_pkgs(pkgs)` - -In this article, we discuss an alternative method for evaluating and tuning models, called [nested resampling](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22nested+resampling%22+inner+outer&btnG=). While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance. - -## Resampling models - -A typical scheme for splitting the data when developing a predictive model is to create an initial split of the data into a training and test set. If resampling is used, it is executed on the training set. A series of binary splits is created. In rsample, we use the term _analysis set_ for the data that are used to fit the model and the term _assessment set_ for the set used to compute performance: - -```{r resampling-fig, echo = FALSE, fig.align='center', out.width="70%"} -knitr::include_graphics("figs/resampling.svg") -``` - -A common method for tuning models is [grid search](/learn/work/tune-svm/) where a candidate set of tuning parameters is created. The full set of models for every combination of the tuning parameter grid and the resamples is fitted. Each time, the assessment data are used to measure performance and the average value is determined for each tuning parameter. - -The potential problem is that once we pick the tuning parameter associated with the best performance, this performance value is usually quoted as the performance of the model. There is serious potential for _optimization bias_ since we use the same data to tune the model and to assess performance. This would result in an optimistic estimate of performance. - -Nested resampling uses an additional layer of resampling that separates the tuning activities from the process used to estimate the efficacy of the model. An _outer_ resampling scheme is used and, for every split in the outer resample, another full set of resampling splits are created on the original analysis set. For example, if 10-fold cross-validation is used on the outside and 5-fold cross-validation on the inside, a total of 500 models will be fit. The parameter tuning will be conducted 10 times and the best parameters are determined from the average of the 5 assessment sets. This process occurs 10 times. - -Once the tuning results are complete, a model is fit to each of the outer resampling splits using the best parameter associated with that resample. The average of the outer method's assessment sets are a unbiased estimate of the model. - -We will simulate some regression data to illustrate the methods. The mlbench package has a function `mlbench::mlbench.friedman1()` that can simulate a complex regression data structure from the [original MARS publication](https://scholar.google.com/scholar?hl=en&q=%22Multivariate+adaptive+regression+splines%22&btnG=&as_sdt=1%2C7&as_sdtp=). A training set size of 100 data points are generated as well as a large set that will be used to characterize how well the resampling procedure performed. - -```{r sim-data} -library(mlbench) -sim_data <- function(n) { - tmp <- mlbench.friedman1(n, sd = 1) - tmp <- cbind(tmp$x, tmp$y) - tmp <- as.data.frame(tmp) - names(tmp)[ncol(tmp)] <- "y" - tmp -} - -set.seed(9815) -train_dat <- sim_data(100) -large_dat <- sim_data(10^5) -``` - -## Nested resampling - -To get started, the types of resampling methods need to be specified. This isn't a large data set, so 5 repeats of 10-fold cross validation will be used as the _outer_ resampling method for generating the estimate of overall performance. To tune the model, it would be good to have precise estimates for each of the values of the tuning parameter so let's use 25 iterations of the bootstrap. This means that there will eventually be `5 * 10 * 25 = 1250` models that are fit to the data _per tuning parameter_. These models will be discarded once the performance of the model has been quantified. - -To create the tibble with the resampling specifications: - -```{r tibble-gen} -library(tidymodels) -results <- nested_cv(train_dat, - outside = vfold_cv(repeats = 5), - inside = bootstraps(times = 25)) -results -``` - -The splitting information for each resample is contained in the `split` objects. Focusing on the second fold of the first repeat: - -```{r split-example} -results$splits[[2]] -``` - -`<90/10/100>` indicates the number of observations in the analysis set, assessment set, and the original data. - -Each element of `inner_resamples` has its own tibble with the bootstrapping splits. - -```{r inner-splits} -results$inner_resamples[[5]] -``` - -These are self-contained, meaning that the bootstrap sample is aware that it is a sample of a specific 90% of the data: - -```{r inner-boot-split} -results$inner_resamples[[5]]$splits[[1]] -``` - -To start, we need to define how the model will be created and measured. Let's use a radial basis support vector machine model via the function `kernlab::ksvm`. This model is generally considered to have _two_ tuning parameters: the SVM cost value and the kernel parameter `sigma`. For illustration purposes here, only the cost value will be tuned and the function `kernlab::sigest` will be used to estimate `sigma` during each model fit. This is automatically done by `ksvm`. - -After the model is fit to the analysis set, the root-mean squared error (RMSE) is computed on the assessment set. **One important note:** for this model, it is critical to center and scale the predictors before computing dot products. We don't do this operation here because `mlbench.friedman1` simulates all of the predictors to be standardized uniform random variables. - -Our function to fit the model and compute the RMSE is: - -```{r rmse-func} -library(kernlab) - -# `object` will be an `rsplit` object from our `results` tibble -# `cost` is the tuning parameter -svm_rmse <- function(object, cost = 1) { - y_col <- ncol(object$data) - mod <- - svm_rbf(mode = "regression", cost = cost) %>% - set_engine("kernlab") %>% - fit(y ~ ., data = analysis(object)) - - holdout_pred <- - predict(mod, assessment(object) %>% dplyr::select(-y)) %>% - bind_cols(assessment(object) %>% dplyr::select(y)) - rmse(holdout_pred, truth = y, estimate = .pred)$.estimate -} - -# In some case, we want to parameterize the function over the tuning parameter: -rmse_wrapper <- function(cost, object) svm_rmse(object, cost) -``` - -For the nested resampling, a model needs to be fit for each tuning parameter and each bootstrap split. To do this, create a wrapper: - -```{r inner-tune-func} -# `object` will be an `rsplit` object for the bootstrap samples -tune_over_cost <- function(object) { - tibble(cost = 2 ^ seq(-2, 8, by = 1)) %>% - mutate(RMSE = map_dbl(cost, rmse_wrapper, object = object)) -} -``` - -Since this will be called across the set of outer cross-validation splits, another wrapper is required: - -```{r inner-func} -# `object` is an `rsplit` object in `results$inner_resamples` -summarize_tune_results <- function(object) { - # Return row-bound tibble that has the 25 bootstrap results - map_df(object$splits, tune_over_cost) %>% - # For each value of the tuning parameter, compute the - # average RMSE which is the inner bootstrap estimate. - group_by(cost) %>% - summarize(mean_RMSE = mean(RMSE, na.rm = TRUE), - n = length(RMSE), - .groups = "drop") -} -``` - -Now that those functions are defined, we can execute all the inner resampling loops: - -```{r inner-runs, eval = FALSE} -tuning_results <- map(results$inner_resamples, summarize_tune_results) -``` - -Alternatively, since these computations can be run in parallel, we can use the furrr package. Instead of using `map()`, the function `future_map()` parallelizes the iterations using the [future package](https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html). The `multisession` plan uses the local cores to process the inner resampling loop. The end results are the same as the sequential computations. - -```{r inner-runs-parallel, R.options=list(dplyr.summarise.inform = FALSE), warning=FALSE} -library(furrr) -plan(multisession) - -tuning_results <- future_map( - results$inner_resamples, - summarize_tune_results, - .options = furrr_options(seed = 1234) -) -``` - -The object `tuning_results` is a list of data frames for each of the 50 outer resamples. - -Let's make a plot of the averaged results to see what the relationship is between the RMSE and the tuning parameters for each of the inner bootstrapping operations: - -```{r rmse-plot, fig.height=4, message = FALSE} -library(scales) - -pooled_inner <- tuning_results %>% bind_rows - -best_cost <- function(dat) dat[which.min(dat$mean_RMSE),] - -p <- - ggplot(pooled_inner, aes(x = cost, y = mean_RMSE)) + - scale_x_continuous(trans = 'log2') + - xlab("SVM Cost") + ylab("Inner RMSE") - -for (i in 1:length(tuning_results)) - p <- p + - geom_line(data = tuning_results[[i]], alpha = .2) + - geom_point(data = best_cost(tuning_results[[i]]), pch = 16, alpha = 3/4) - -p <- p + geom_smooth(data = pooled_inner, se = FALSE) -p -``` - -Each gray line is a separate bootstrap resampling curve created from a different 90% of the data. The blue line is a LOESS smooth of all the results pooled together. - -To determine the best parameter estimate for each of the outer resampling iterations: - -```{r choose, fig.height=4} -cost_vals <- - tuning_results %>% - map_df(best_cost) %>% - select(cost) - -results <- - bind_cols(results, cost_vals) %>% - mutate(cost = factor(cost, levels = paste(2 ^ seq(-2, 8, by = 1)))) - -ggplot(results, aes(x = cost)) + - geom_bar() + - xlab("SVM Cost") + - scale_x_discrete(drop = FALSE) -``` - -Most of the resamples produced an optimal cost value of 2.0, but the distribution is right-skewed due to the flat trend in the resampling profile once the cost value becomes 10 or larger. - -Now that we have these estimates, we can compute the outer resampling results for each of the `r nrow(results)` splits using the corresponding tuning parameter value: - -```{r run-outer} -results <- - results %>% - mutate(RMSE = map2_dbl(splits, cost, svm_rmse)) - -summary(results$RMSE) -``` - -The estimated RMSE for the model tuning process is `r round(mean(results$RMSE), 2)`. - -What is the RMSE estimate for the non-nested procedure when only the outer resampling method is used? For each cost value in the tuning grid, `r nrow(results)` SVM models are fit and their RMSE values are averaged. The table of cost values and mean RMSE estimates is used to determine the best cost value. The associated RMSE is the biased estimate. - -```{r not-nested, fig.height=4} -not_nested <- - map(results$splits, tune_over_cost) %>% - bind_rows - -outer_summary <- not_nested %>% - group_by(cost) %>% - summarize(outer_RMSE = mean(RMSE), n = length(RMSE)) - -outer_summary - -ggplot(outer_summary, aes(x = cost, y = outer_RMSE)) + - geom_point() + - geom_line() + - scale_x_continuous(trans = 'log2') + - xlab("SVM Cost") + ylab("RMSE") -``` - -The non-nested procedure estimates the RMSE to be `r round(min(outer_summary$outer_RMSE), 2)`. Both estimates are fairly close. - -The approximately true RMSE for an SVM model with a cost value of 2.0 can be approximated with the large sample that was simulated at the beginning. - -```{r large-sample-estimate} -finalModel <- ksvm(y ~ ., data = train_dat, C = 2) -large_pred <- predict(finalModel, large_dat[, -ncol(large_dat)]) -sqrt(mean((large_dat$y - large_pred) ^ 2, na.rm = TRUE)) -``` - -The nested procedure produces a closer estimate to the approximate truth but the non-nested estimate is very similar. - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/learn/work/nested-resampling/index.markdown b/content/learn/work/nested-resampling/index.markdown deleted file mode 100644 index 230753ac..00000000 --- a/content/learn/work/nested-resampling/index.markdown +++ /dev/null @@ -1,368 +0,0 @@ ---- -title: "Nested resampling" -tags: [rsample, parsnip] -categories: [tuning] -type: learn-subsection -weight: 2 -description: | - Estimate the best hyperparameters for a model using nested resampling. ---- - - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: furrr, kernlab, mlbench, scales, and tidymodels. - -In this article, we discuss an alternative method for evaluating and tuning models, called [nested resampling](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22nested+resampling%22+inner+outer&btnG=). While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance. - -## Resampling models - -A typical scheme for splitting the data when developing a predictive model is to create an initial split of the data into a training and test set. If resampling is used, it is executed on the training set. A series of binary splits is created. In rsample, we use the term _analysis set_ for the data that are used to fit the model and the term _assessment set_ for the set used to compute performance: - - - -A common method for tuning models is [grid search](/learn/work/tune-svm/) where a candidate set of tuning parameters is created. The full set of models for every combination of the tuning parameter grid and the resamples is fitted. Each time, the assessment data are used to measure performance and the average value is determined for each tuning parameter. - -The potential problem is that once we pick the tuning parameter associated with the best performance, this performance value is usually quoted as the performance of the model. There is serious potential for _optimization bias_ since we use the same data to tune the model and to assess performance. This would result in an optimistic estimate of performance. - -Nested resampling uses an additional layer of resampling that separates the tuning activities from the process used to estimate the efficacy of the model. An _outer_ resampling scheme is used and, for every split in the outer resample, another full set of resampling splits are created on the original analysis set. For example, if 10-fold cross-validation is used on the outside and 5-fold cross-validation on the inside, a total of 500 models will be fit. The parameter tuning will be conducted 10 times and the best parameters are determined from the average of the 5 assessment sets. This process occurs 10 times. - -Once the tuning results are complete, a model is fit to each of the outer resampling splits using the best parameter associated with that resample. The average of the outer method's assessment sets are a unbiased estimate of the model. - -We will simulate some regression data to illustrate the methods. The mlbench package has a function `mlbench::mlbench.friedman1()` that can simulate a complex regression data structure from the [original MARS publication](https://scholar.google.com/scholar?hl=en&q=%22Multivariate+adaptive+regression+splines%22&btnG=&as_sdt=1%2C7&as_sdtp=). A training set size of 100 data points are generated as well as a large set that will be used to characterize how well the resampling procedure performed. - - -```r -library(mlbench) -sim_data <- function(n) { - tmp <- mlbench.friedman1(n, sd = 1) - tmp <- cbind(tmp$x, tmp$y) - tmp <- as.data.frame(tmp) - names(tmp)[ncol(tmp)] <- "y" - tmp -} - -set.seed(9815) -train_dat <- sim_data(100) -large_dat <- sim_data(10^5) -``` - -## Nested resampling - -To get started, the types of resampling methods need to be specified. This isn't a large data set, so 5 repeats of 10-fold cross validation will be used as the _outer_ resampling method for generating the estimate of overall performance. To tune the model, it would be good to have precise estimates for each of the values of the tuning parameter so let's use 25 iterations of the bootstrap. This means that there will eventually be `5 * 10 * 25 = 1250` models that are fit to the data _per tuning parameter_. These models will be discarded once the performance of the model has been quantified. - -To create the tibble with the resampling specifications: - - -```r -library(tidymodels) -results <- nested_cv(train_dat, - outside = vfold_cv(repeats = 5), - inside = bootstraps(times = 25)) -results -#> # Nested resampling: -#> # outer: 10-fold cross-validation repeated 5 times -#> # inner: Bootstrap sampling -#> # A tibble: 50 × 4 -#> splits id id2 inner_resamples -#> -#> 1 Repeat1 Fold01 -#> 2 Repeat1 Fold02 -#> 3 Repeat1 Fold03 -#> 4 Repeat1 Fold04 -#> 5 Repeat1 Fold05 -#> 6 Repeat1 Fold06 -#> 7 Repeat1 Fold07 -#> 8 Repeat1 Fold08 -#> 9 Repeat1 Fold09 -#> 10 Repeat1 Fold10 -#> # … with 40 more rows -``` - -The splitting information for each resample is contained in the `split` objects. Focusing on the second fold of the first repeat: - - -```r -results$splits[[2]] -#> -#> <90/10/100> -``` - -`<90/10/100>` indicates the number of observations in the analysis set, assessment set, and the original data. - -Each element of `inner_resamples` has its own tibble with the bootstrapping splits. - - -```r -results$inner_resamples[[5]] -#> # Bootstrap sampling -#> # A tibble: 25 × 2 -#> splits id -#> -#> 1 Bootstrap01 -#> 2 Bootstrap02 -#> 3 Bootstrap03 -#> 4 Bootstrap04 -#> 5 Bootstrap05 -#> 6 Bootstrap06 -#> 7 Bootstrap07 -#> 8 Bootstrap08 -#> 9 Bootstrap09 -#> 10 Bootstrap10 -#> # … with 15 more rows -``` - -These are self-contained, meaning that the bootstrap sample is aware that it is a sample of a specific 90% of the data: - - -```r -results$inner_resamples[[5]]$splits[[1]] -#> -#> <90/31/90> -``` - -To start, we need to define how the model will be created and measured. Let's use a radial basis support vector machine model via the function `kernlab::ksvm`. This model is generally considered to have _two_ tuning parameters: the SVM cost value and the kernel parameter `sigma`. For illustration purposes here, only the cost value will be tuned and the function `kernlab::sigest` will be used to estimate `sigma` during each model fit. This is automatically done by `ksvm`. - -After the model is fit to the analysis set, the root-mean squared error (RMSE) is computed on the assessment set. **One important note:** for this model, it is critical to center and scale the predictors before computing dot products. We don't do this operation here because `mlbench.friedman1` simulates all of the predictors to be standardized uniform random variables. - -Our function to fit the model and compute the RMSE is: - - -```r -library(kernlab) - -# `object` will be an `rsplit` object from our `results` tibble -# `cost` is the tuning parameter -svm_rmse <- function(object, cost = 1) { - y_col <- ncol(object$data) - mod <- - svm_rbf(mode = "regression", cost = cost) %>% - set_engine("kernlab") %>% - fit(y ~ ., data = analysis(object)) - - holdout_pred <- - predict(mod, assessment(object) %>% dplyr::select(-y)) %>% - bind_cols(assessment(object) %>% dplyr::select(y)) - rmse(holdout_pred, truth = y, estimate = .pred)$.estimate -} - -# In some case, we want to parameterize the function over the tuning parameter: -rmse_wrapper <- function(cost, object) svm_rmse(object, cost) -``` - -For the nested resampling, a model needs to be fit for each tuning parameter and each bootstrap split. To do this, create a wrapper: - - -```r -# `object` will be an `rsplit` object for the bootstrap samples -tune_over_cost <- function(object) { - tibble(cost = 2 ^ seq(-2, 8, by = 1)) %>% - mutate(RMSE = map_dbl(cost, rmse_wrapper, object = object)) -} -``` - -Since this will be called across the set of outer cross-validation splits, another wrapper is required: - - -```r -# `object` is an `rsplit` object in `results$inner_resamples` -summarize_tune_results <- function(object) { - # Return row-bound tibble that has the 25 bootstrap results - map_df(object$splits, tune_over_cost) %>% - # For each value of the tuning parameter, compute the - # average RMSE which is the inner bootstrap estimate. - group_by(cost) %>% - summarize(mean_RMSE = mean(RMSE, na.rm = TRUE), - n = length(RMSE), - .groups = "drop") -} -``` - -Now that those functions are defined, we can execute all the inner resampling loops: - - -```r -tuning_results <- map(results$inner_resamples, summarize_tune_results) -``` - -Alternatively, since these computations can be run in parallel, we can use the furrr package. Instead of using `map()`, the function `future_map()` parallelizes the iterations using the [future package](https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html). The `multisession` plan uses the local cores to process the inner resampling loop. The end results are the same as the sequential computations. - - -```r -library(furrr) -plan(multisession) - -tuning_results <- future_map( - results$inner_resamples, - summarize_tune_results, - .options = furrr_options(seed = 1234) -) -``` - -The object `tuning_results` is a list of data frames for each of the 50 outer resamples. - -Let's make a plot of the averaged results to see what the relationship is between the RMSE and the tuning parameters for each of the inner bootstrapping operations: - - -```r -library(scales) - -pooled_inner <- tuning_results %>% bind_rows - -best_cost <- function(dat) dat[which.min(dat$mean_RMSE),] - -p <- - ggplot(pooled_inner, aes(x = cost, y = mean_RMSE)) + - scale_x_continuous(trans = 'log2') + - xlab("SVM Cost") + ylab("Inner RMSE") - -for (i in 1:length(tuning_results)) - p <- p + - geom_line(data = tuning_results[[i]], alpha = .2) + - geom_point(data = best_cost(tuning_results[[i]]), pch = 16, alpha = 3/4) - -p <- p + geom_smooth(data = pooled_inner, se = FALSE) -p -``` - - - -Each gray line is a separate bootstrap resampling curve created from a different 90% of the data. The blue line is a LOESS smooth of all the results pooled together. - -To determine the best parameter estimate for each of the outer resampling iterations: - - -```r -cost_vals <- - tuning_results %>% - map_df(best_cost) %>% - select(cost) - -results <- - bind_cols(results, cost_vals) %>% - mutate(cost = factor(cost, levels = paste(2 ^ seq(-2, 8, by = 1)))) - -ggplot(results, aes(x = cost)) + - geom_bar() + - xlab("SVM Cost") + - scale_x_discrete(drop = FALSE) -``` - - - -Most of the resamples produced an optimal cost value of 2.0, but the distribution is right-skewed due to the flat trend in the resampling profile once the cost value becomes 10 or larger. - -Now that we have these estimates, we can compute the outer resampling results for each of the 50 splits using the corresponding tuning parameter value: - - -```r -results <- - results %>% - mutate(RMSE = map2_dbl(splits, cost, svm_rmse)) - -summary(results$RMSE) -#> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> 1.59 2.09 2.67 2.69 3.27 4.35 -``` - -The estimated RMSE for the model tuning process is 2.69. - -What is the RMSE estimate for the non-nested procedure when only the outer resampling method is used? For each cost value in the tuning grid, 50 SVM models are fit and their RMSE values are averaged. The table of cost values and mean RMSE estimates is used to determine the best cost value. The associated RMSE is the biased estimate. - - -```r -not_nested <- - map(results$splits, tune_over_cost) %>% - bind_rows - -outer_summary <- not_nested %>% - group_by(cost) %>% - summarize(outer_RMSE = mean(RMSE), n = length(RMSE)) - -outer_summary -#> # A tibble: 11 × 3 -#> cost outer_RMSE n -#> -#> 1 0.25 3.54 50 -#> 2 0.5 3.11 50 -#> 3 1 2.77 50 -#> 4 2 2.62 50 -#> 5 4 2.65 50 -#> 6 8 2.75 50 -#> 7 16 2.82 50 -#> 8 32 2.82 50 -#> 9 64 2.83 50 -#> 10 128 2.83 50 -#> 11 256 2.83 50 - -ggplot(outer_summary, aes(x = cost, y = outer_RMSE)) + - geom_point() + - geom_line() + - scale_x_continuous(trans = 'log2') + - xlab("SVM Cost") + ylab("RMSE") -``` - - - -The non-nested procedure estimates the RMSE to be 2.62. Both estimates are fairly close. - -The approximately true RMSE for an SVM model with a cost value of 2.0 can be approximated with the large sample that was simulated at the beginning. - - -```r -finalModel <- ksvm(y ~ ., data = train_dat, C = 2) -large_pred <- predict(finalModel, large_dat[, -ncol(large_dat)]) -sqrt(mean((large_dat$y - large_pred) ^ 2, na.rm = TRUE)) -#> [1] 2.71 -``` - -The nested procedure produces a closer estimate to the approximate truth but the non-nested estimate is very similar. - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> furrr * 0.3.1 2022-08-15 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> kernlab * 0.9-31 2022-06-09 [1] CRAN (R 4.2.0) -#> mlbench * 2.1-3 2021-01-29 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/learn/work/tune-svm/index.Rmarkdown b/content/learn/work/tune-svm/index.Rmarkdown deleted file mode 100755 index b1301be4..00000000 --- a/content/learn/work/tune-svm/index.Rmarkdown +++ /dev/null @@ -1,205 +0,0 @@ ---- -title: "Model tuning via grid search" -tags: [rsample, parsnip, tune, yardstick] -categories: [model tuning] -type: learn-subsection -weight: 1 -description: | - Choose hyperparameters for a model by training on a grid of many possible parameter values. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include=FALSE} -library(tidymodels) -library(mlbench) -library(kernlab) -library(doMC) -registerDoMC(cores = parallel::detectCores()) - -pkgs <- c("tidymodels", "mlbench", "kernlab") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - -## Introduction - -`r req_pkgs(pkgs)` - -This article demonstrates how to tune a model using grid search. Many models have **hyperparameters** that can't be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best. - -## Example data - -To demonstrate model tuning, we'll use the Ionosphere data in the mlbench package: - -```{r load-data} -library(tidymodels) -library(mlbench) -data(Ionosphere) -``` - -From `?Ionosphere`: - -> This radar data was collected by a system in Goose Bay, Labrador. This system consists of a phased array of 16 high-frequency antennas with a total transmitted power on the order of 6.4 kilowatts. See the paper for more details. The targets were free electrons in the ionosphere. "good" radar returns are those showing evidence of some type of structure in the ionosphere. "bad" returns are those that do not; their signals pass through the ionosphere. - -> Received signals were processed using an autocorrelation function whose arguments are the time of a pulse and the pulse number. There were 17 pulse numbers for the Goose Bay system. Instances in this databse are described by 2 attributes per pulse number, corresponding to the complex values returned by the function resulting from the complex electromagnetic signal. See cited below for more details. - -There are 43 predictors and a factor outcome. Two of the predictors are factors (`V1` and `V2`) and the rest are numeric variables that have been scaled to a range of -1 to 1. Note that the two factor predictors have sparse distributions: - -```{r factor-pred} -table(Ionosphere$V1) -table(Ionosphere$V2) -``` - -There's no point of putting `V2` into any model since is is a zero-variance predictor. `V1` is not but it _could_ be if the resampling process ends up sampling all of the same value. Is this an issue? It might be since the standard R formula infrastructure fails when there is only a single observed value: - -```{r glm-fail, error=TRUE} -glm(Class ~ ., data = Ionosphere, family = binomial) - -# Surprisingly, this doesn't help: - -glm(Class ~ . - V2, data = Ionosphere, family = binomial) -``` - -Let's remove these two problematic variables: - -```{r ion-rm} -Ionosphere <- Ionosphere %>% select(-V1, -V2) -``` - -## Inputs for the search - -To demonstrate, we'll fit a radial basis function support vector machine to these data and tune the SVM cost parameter and the $\sigma$ parameter in the kernel function: - -```{r svm-mod} -svm_mod <- - svm_rbf(cost = tune(), rbf_sigma = tune()) %>% - set_mode("classification") %>% - set_engine("kernlab") -``` - -In this article, tuning will be demonstrated in two ways, using: - -- a standard R formula, and -- a recipe. - -Let's create a simple recipe here: - -```{r rec} -iono_rec <- - recipe(Class ~ ., data = Ionosphere) %>% - # remove any zero variance predictors - step_zv(all_predictors()) %>% - # remove any linear combinations - step_lincomb(all_numeric()) -``` - -The only other required item for tuning is a resampling strategy as defined by an rsample object. Let's demonstrate using basic bootstrapping: - -```{r rs} -set.seed(4943) -iono_rs <- bootstraps(Ionosphere, times = 30) -``` - -## Optional inputs - -An _optional_ step for model tuning is to specify which metrics should be computed using the out-of-sample predictions. For classification, the default is to calculate the log-likelihood statistic and overall accuracy. Instead of the defaults, the area under the ROC curve will be used. To do this, a yardstick package function can be used to create a metric set: - -```{r roc} -roc_vals <- metric_set(roc_auc) -``` - -If no grid or parameters are provided, a set of 10 hyperparameters are created using a space-filling design (via a Latin hypercube). A grid can be given in a data frame where the parameters are in columns and parameter combinations are in rows. Here, the default will be used. - -Also, a control object can be passed that specifies different aspects of the search. Here, the verbose option is turned off and the option to save the out-of-sample predictions is turned on. - -```{r ctrl} -ctrl <- control_grid(verbose = FALSE, save_pred = TRUE) -``` - -## Executing with a formula - -First, we can use the formula interface: - -```{r grid, message=FALSE} -set.seed(35) -formula_res <- - svm_mod %>% - tune_grid( - Class ~ ., - resamples = iono_rs, - metrics = roc_vals, - control = ctrl - ) -formula_res -``` - -The `.metrics` column contains tibbles of the performance metrics for each tuning parameter combination: - -```{r raw-metrics} -formula_res %>% - select(.metrics) %>% - slice(1) %>% - pull(1) -``` - -To get the final resampling estimates, the `collect_metrics()` function can be used on the grid object: - -```{r metric-estimates} -estimates <- collect_metrics(formula_res) -estimates -``` - -The top combinations are: - -```{r sorted-metrics} -show_best(formula_res, metric = "roc_auc") -``` - -## Executing with a recipe - -Next, we can use the same syntax but pass a *recipe* in as the pre-processor argument: - -```{r recipe} -set.seed(325) -recipe_res <- - svm_mod %>% - tune_grid( - iono_rec, - resamples = iono_rs, - metrics = roc_vals, - control = ctrl - ) -recipe_res -``` - -The best setting here is: - -```{r best-rec} -show_best(recipe_res, metric = "roc_auc") -``` - -## Out-of-sample predictions - -If we used `save_pred = TRUE` to keep the out-of-sample predictions for each resample during tuning, we can obtain those predictions, along with the tuning parameters and resample identifier, using `collect_predictions()`: - -```{r rec-preds} -collect_predictions(recipe_res) -``` - -We can obtain the hold-out sets for all the resamples augmented with the predictions using `augment()`, which provides opportunities for flexible visualization of model results: - -```{r augment-preds} -augment(recipe_res) %>% - ggplot(aes(V3, .pred_good, color = Class)) + - geom_point(show.legend = FALSE) + - facet_wrap(~Class) -``` - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/learn/work/tune-svm/index.markdown b/content/learn/work/tune-svm/index.markdown deleted file mode 100644 index e7bdfc5e..00000000 --- a/content/learn/work/tune-svm/index.markdown +++ /dev/null @@ -1,345 +0,0 @@ ---- -title: "Model tuning via grid search" -tags: [rsample, parsnip, tune, yardstick] -categories: [model tuning] -type: learn-subsection -weight: 1 -description: | - Choose hyperparameters for a model by training on a grid of many possible parameter values. ---- - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: kernlab, mlbench, and tidymodels. - -This article demonstrates how to tune a model using grid search. Many models have **hyperparameters** that can't be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best. - -## Example data - -To demonstrate model tuning, we'll use the Ionosphere data in the mlbench package: - - -```r -library(tidymodels) -library(mlbench) -data(Ionosphere) -``` - -From `?Ionosphere`: - -> This radar data was collected by a system in Goose Bay, Labrador. This system consists of a phased array of 16 high-frequency antennas with a total transmitted power on the order of 6.4 kilowatts. See the paper for more details. The targets were free electrons in the ionosphere. "good" radar returns are those showing evidence of some type of structure in the ionosphere. "bad" returns are those that do not; their signals pass through the ionosphere. - -> Received signals were processed using an autocorrelation function whose arguments are the time of a pulse and the pulse number. There were 17 pulse numbers for the Goose Bay system. Instances in this databse are described by 2 attributes per pulse number, corresponding to the complex values returned by the function resulting from the complex electromagnetic signal. See cited below for more details. - -There are 43 predictors and a factor outcome. Two of the predictors are factors (`V1` and `V2`) and the rest are numeric variables that have been scaled to a range of -1 to 1. Note that the two factor predictors have sparse distributions: - - -```r -table(Ionosphere$V1) -#> -#> 0 1 -#> 38 313 -table(Ionosphere$V2) -#> -#> 0 -#> 351 -``` - -There's no point of putting `V2` into any model since is is a zero-variance predictor. `V1` is not but it _could_ be if the resampling process ends up sampling all of the same value. Is this an issue? It might be since the standard R formula infrastructure fails when there is only a single observed value: - - -```r -glm(Class ~ ., data = Ionosphere, family = binomial) -#> Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]): contrasts can be applied only to factors with 2 or more levels - -# Surprisingly, this doesn't help: - -glm(Class ~ . - V2, data = Ionosphere, family = binomial) -#> Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]): contrasts can be applied only to factors with 2 or more levels -``` - -Let's remove these two problematic variables: - - -```r -Ionosphere <- Ionosphere %>% select(-V1, -V2) -``` - -## Inputs for the search - -To demonstrate, we'll fit a radial basis function support vector machine to these data and tune the SVM cost parameter and the `\(\sigma\)` parameter in the kernel function: - - -```r -svm_mod <- - svm_rbf(cost = tune(), rbf_sigma = tune()) %>% - set_mode("classification") %>% - set_engine("kernlab") -``` - -In this article, tuning will be demonstrated in two ways, using: - -- a standard R formula, and -- a recipe. - -Let's create a simple recipe here: - - -```r -iono_rec <- - recipe(Class ~ ., data = Ionosphere) %>% - # remove any zero variance predictors - step_zv(all_predictors()) %>% - # remove any linear combinations - step_lincomb(all_numeric()) -``` - -The only other required item for tuning is a resampling strategy as defined by an rsample object. Let's demonstrate using basic bootstrapping: - - -```r -set.seed(4943) -iono_rs <- bootstraps(Ionosphere, times = 30) -``` - -## Optional inputs - -An _optional_ step for model tuning is to specify which metrics should be computed using the out-of-sample predictions. For classification, the default is to calculate the log-likelihood statistic and overall accuracy. Instead of the defaults, the area under the ROC curve will be used. To do this, a yardstick package function can be used to create a metric set: - - -```r -roc_vals <- metric_set(roc_auc) -``` - -If no grid or parameters are provided, a set of 10 hyperparameters are created using a space-filling design (via a Latin hypercube). A grid can be given in a data frame where the parameters are in columns and parameter combinations are in rows. Here, the default will be used. - -Also, a control object can be passed that specifies different aspects of the search. Here, the verbose option is turned off and the option to save the out-of-sample predictions is turned on. - - -```r -ctrl <- control_grid(verbose = FALSE, save_pred = TRUE) -``` - -## Executing with a formula - -First, we can use the formula interface: - - -```r -set.seed(35) -formula_res <- - svm_mod %>% - tune_grid( - Class ~ ., - resamples = iono_rs, - metrics = roc_vals, - control = ctrl - ) -formula_res -#> # Tuning results -#> # Bootstrap sampling -#> # A tibble: 30 × 5 -#> splits id .metrics .notes .predictions -#> -#> 1 Bootstrap01 -#> 2 Bootstrap02 -#> 3 Bootstrap03 -#> 4 Bootstrap04 -#> 5 Bootstrap05 -#> 6 Bootstrap06 -#> 7 Bootstrap07 -#> 8 Bootstrap08 -#> 9 Bootstrap09 -#> 10 Bootstrap10 -#> # … with 20 more rows -``` - -The `.metrics` column contains tibbles of the performance metrics for each tuning parameter combination: - - -```r -formula_res %>% - select(.metrics) %>% - slice(1) %>% - pull(1) -#> [[1]] -#> # A tibble: 10 × 6 -#> cost rbf_sigma .metric .estimator .estimate .config -#> -#> 1 0.00849 1.11e-10 roc_auc binary 0.815 Preprocessor1_Model01 -#> 2 0.176 7.28e- 8 roc_auc binary 0.839 Preprocessor1_Model02 -#> 3 14.9 3.93e- 4 roc_auc binary 0.870 Preprocessor1_Model03 -#> 4 5.51 2.10e- 3 roc_auc binary 0.919 Preprocessor1_Model04 -#> 5 1.87 3.53e- 7 roc_auc binary 0.838 Preprocessor1_Model05 -#> 6 0.00719 1.45e- 5 roc_auc binary 0.832 Preprocessor1_Model06 -#> 7 0.00114 8.41e- 2 roc_auc binary 0.969 Preprocessor1_Model07 -#> 8 0.950 1.74e- 1 roc_auc binary 0.984 Preprocessor1_Model08 -#> 9 0.189 3.13e- 6 roc_auc binary 0.832 Preprocessor1_Model09 -#> 10 0.0364 4.96e- 9 roc_auc binary 0.839 Preprocessor1_Model10 -``` - -To get the final resampling estimates, the `collect_metrics()` function can be used on the grid object: - - -```r -estimates <- collect_metrics(formula_res) -estimates -#> # A tibble: 10 × 8 -#> cost rbf_sigma .metric .estimator mean n std_err .config -#> -#> 1 0.00849 1.11e-10 roc_auc binary 0.822 30 0.00718 Preprocessor1_Mode… -#> 2 0.176 7.28e- 8 roc_auc binary 0.871 30 0.00525 Preprocessor1_Mode… -#> 3 14.9 3.93e- 4 roc_auc binary 0.916 30 0.00497 Preprocessor1_Mode… -#> 4 5.51 2.10e- 3 roc_auc binary 0.960 30 0.00378 Preprocessor1_Mode… -#> 5 1.87 3.53e- 7 roc_auc binary 0.871 30 0.00524 Preprocessor1_Mode… -#> 6 0.00719 1.45e- 5 roc_auc binary 0.871 30 0.00534 Preprocessor1_Mode… -#> 7 0.00114 8.41e- 2 roc_auc binary 0.966 30 0.00301 Preprocessor1_Mode… -#> 8 0.950 1.74e- 1 roc_auc binary 0.979 30 0.00204 Preprocessor1_Mode… -#> 9 0.189 3.13e- 6 roc_auc binary 0.871 30 0.00536 Preprocessor1_Mode… -#> 10 0.0364 4.96e- 9 roc_auc binary 0.871 30 0.00537 Preprocessor1_Mode… -``` - -The top combinations are: - - -```r -show_best(formula_res, metric = "roc_auc") -#> # A tibble: 5 × 8 -#> cost rbf_sigma .metric .estimator mean n std_err .config -#> -#> 1 0.950 0.174 roc_auc binary 0.979 30 0.00204 Preprocessor1_Model… -#> 2 0.00114 0.0841 roc_auc binary 0.966 30 0.00301 Preprocessor1_Model… -#> 3 5.51 0.00210 roc_auc binary 0.960 30 0.00378 Preprocessor1_Model… -#> 4 14.9 0.000393 roc_auc binary 0.916 30 0.00497 Preprocessor1_Model… -#> 5 0.00719 0.0000145 roc_auc binary 0.871 30 0.00534 Preprocessor1_Model… -``` - -## Executing with a recipe - -Next, we can use the same syntax but pass a *recipe* in as the pre-processor argument: - - -```r -set.seed(325) -recipe_res <- - svm_mod %>% - tune_grid( - iono_rec, - resamples = iono_rs, - metrics = roc_vals, - control = ctrl - ) -recipe_res -#> # Tuning results -#> # Bootstrap sampling -#> # A tibble: 30 × 5 -#> splits id .metrics .notes .predictions -#> -#> 1 Bootstrap01 -#> 2 Bootstrap02 -#> 3 Bootstrap03 -#> 4 Bootstrap04 -#> 5 Bootstrap05 -#> 6 Bootstrap06 -#> 7 Bootstrap07 -#> 8 Bootstrap08 -#> 9 Bootstrap09 -#> 10 Bootstrap10 -#> # … with 20 more rows -``` - -The best setting here is: - - -```r -show_best(recipe_res, metric = "roc_auc") -#> # A tibble: 5 × 8 -#> cost rbf_sigma .metric .estimator mean n std_err .config -#> -#> 1 15.6 0.182 roc_auc binary 0.981 30 0.00213 Preprocessor1_Model04 -#> 2 0.385 0.0276 roc_auc binary 0.978 30 0.00222 Preprocessor1_Model03 -#> 3 0.143 0.00243 roc_auc binary 0.930 30 0.00443 Preprocessor1_Model06 -#> 4 0.841 0.000691 roc_auc binary 0.892 30 0.00504 Preprocessor1_Model07 -#> 5 0.0499 0.0000335 roc_auc binary 0.872 30 0.00521 Preprocessor1_Model08 -``` - -## Out-of-sample predictions - -If we used `save_pred = TRUE` to keep the out-of-sample predictions for each resample during tuning, we can obtain those predictions, along with the tuning parameters and resample identifier, using `collect_predictions()`: - - -```r -collect_predictions(recipe_res) -#> # A tibble: 38,740 × 8 -#> id .pred_bad .pred_good .row cost rbf_sigma Class .config -#> -#> 1 Bootstrap01 0.333 0.667 1 0.00296 0.00000383 good Preprocessor… -#> 2 Bootstrap01 0.333 0.667 9 0.00296 0.00000383 good Preprocessor… -#> 3 Bootstrap01 0.333 0.667 10 0.00296 0.00000383 bad Preprocessor… -#> 4 Bootstrap01 0.333 0.667 12 0.00296 0.00000383 bad Preprocessor… -#> 5 Bootstrap01 0.333 0.667 14 0.00296 0.00000383 bad Preprocessor… -#> 6 Bootstrap01 0.333 0.667 15 0.00296 0.00000383 good Preprocessor… -#> 7 Bootstrap01 0.333 0.667 16 0.00296 0.00000383 bad Preprocessor… -#> 8 Bootstrap01 0.334 0.666 22 0.00296 0.00000383 bad Preprocessor… -#> 9 Bootstrap01 0.333 0.667 23 0.00296 0.00000383 good Preprocessor… -#> 10 Bootstrap01 0.334 0.666 24 0.00296 0.00000383 bad Preprocessor… -#> # … with 38,730 more rows -``` - -We can obtain the hold-out sets for all the resamples augmented with the predictions using `augment()`, which provides opportunities for flexible visualization of model results: - - -```r -augment(recipe_res) %>% - ggplot(aes(V3, .pred_good, color = Class)) + - geom_point(show.legend = FALSE) + - facet_wrap(~Class) -``` - - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> kernlab * 0.9-31 2022-06-09 [1] CRAN (R 4.2.0) -#> mlbench * 2.1-3 2021-01-29 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/learn/work/tune-text/index.Rmarkdown b/content/learn/work/tune-text/index.Rmarkdown deleted file mode 100755 index d51284ac..00000000 --- a/content/learn/work/tune-text/index.Rmarkdown +++ /dev/null @@ -1,370 +0,0 @@ ---- -title: "Tuning text models" -tags: [rsample, recipes, textrecipes, parsnip, tune, yardstick] -categories: [model tuning] -type: learn-subsection -weight: 4 -description: | - Prepare text data for predictive modeling and tune with both grid and iterative search. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/learn/common.R")) -``` - -```{r load, include=FALSE} -library(tidymodels) -library(stopwords) -library(doMC) -registerDoMC(cores = parallel::detectCores()) - -pkgs <- c("tidymodels", "textrecipes", "textfeatures", "stopwords") - -theme_set(theme_bw()) -``` - -## Introduction - -`r req_pkgs(pkgs)` - -This article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on. - - -## Text as data - -The text data we'll use in this article are from Amazon: - -> This dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review. - -This article uses a small subset of the total reviews [available at the original source](https://snap.stanford.edu/data/web-FineFoods.html). We sampled a single review from 5,000 random products and allocated 80% of these data to the training set, with the remaining 1,000 reviews held out for the test set. - -There is a column for the product, a column for the text of the review, and a factor column for the outcome variable. The outcome is whether the reviewer gave the product a five-star rating or not. - -```{r data} -library(tidymodels) - -data("small_fine_foods") -training_data -``` - -Our modeling goal is to create modeling features from the text of the reviews to predict whether the review was five-star or not. - -## Inputs for the search - -Text, perhaps more so than tabular data we often deal with, must be heavily processed to be used as predictor data for modeling. There are multiple ways to process and prepare text for modeling; let's add several steps together to create different kinds of features: - -* Create an initial set of count-based features, such as the number of words, spaces, lower- or uppercase characters, URLs, and so on; we can use the [textfeatures](https://github.com/mkearney/textfeatures) package for this. - -* [Tokenize](https://smltar.com/tokenization.html) the text (i.e. break the text into smaller components such as words). - -* Remove stop words such as "the", "an", "of", etc. - -* [Stem](https://smltar.com/stemming.html) tokens to a common root where possible. - -* Convert tokens to dummy variables via a [signed, binary hash function](https://bookdown.org/max/FES/encoding-predictors-with-many-categories.html). - -* Optionally transform non-token features (the count-based features like number of lowercase characters) to a more symmetric state using a [Yeo-Johnson transformation](https://bookdown.org/max/FES/numeric-one-to-one.html). - -* Remove predictors with a single distinct value. - -* Center and scale all predictors. - - -{{% note %}} We will end up with two kinds of features: - -- dummy/indicator variables for the count-based features like number of digits or punctuation characters -- hash features for the tokens like "salsa" or "delicious". {{%/ note %}} - -Some of these preprocessing steps (such as stemming) may or may not be good ideas but a full discussion of their effects is beyond the scope of this article. In this preprocessing approach, the main tuning parameter is the number of hashing features to use. - -Before we start building our preprocessing recipe, we need some helper objects. For example, for the Yeo-Johnson transformation, we need to know the set of count-based text features: - -```{r feat-list} -library(textfeatures) - -basics <- names(textfeatures:::count_functions) -head(basics) -``` - -Also, the implementation of feature hashes does not produce the binary values we need. This small function will help convert the scores to values of -1, 0, or 1: - -```{r hash-func} -binary_hash <- function(x) { - x <- ifelse(x < 0, -1, x) - x <- ifelse(x > 0, 1, x) - x -} -``` - -Now, let's put this all together in one recipe: - -```{r text-rec} -library(textrecipes) - -pre_proc <- - recipe(score ~ product + review, data = training_data) %>% - # Do not use the product ID as a predictor - update_role(product, new_role = "id") %>% - # Make a copy of the raw text - step_mutate(review_raw = review) %>% - # Compute the initial features. This removes the `review_raw` column - step_textfeature(review_raw) %>% - # Make the feature names shorter - step_rename_at( - starts_with("textfeature_"), - fn = ~ gsub("textfeature_review_raw_", "", .) - ) %>% - step_tokenize(review) %>% - step_stopwords(review) %>% - step_stem(review) %>% - # Here is where the tuning parameter is declared - step_texthash(review, signed = TRUE, num_terms = tune()) %>% - # Simplify these names - step_rename_at(starts_with("review_hash"), fn = ~ gsub("review_", "", .)) %>% - # Convert the features from counts to values of -1, 0, or 1 - step_mutate_at(starts_with("hash"), fn = binary_hash) %>% - # Transform the initial feature set - step_YeoJohnson(one_of(!!basics)) %>% - step_zv(all_predictors()) %>% - step_normalize(all_predictors()) -``` - -{{% warning %}} Note that, when objects from the global environment are used, they are injected into the step objects via `!!`. For some parallel processing technologies, these objects may not be found by the worker processes. {{%/ warning %}} - -The preprocessing recipe is long and complex (often typical for working with text data) but the model we'll use is more straightforward. Let's stick with a regularized logistic regression model: - -```{r lr} -lr_mod <- - logistic_reg(penalty = tune(), mixture = tune()) %>% - set_engine("glmnet") -``` - -There are three tuning parameters for this data analysis: - -- `num_terms`, the number of feature hash variables to create -- `penalty`, the amount of regularization for the model -- `mixture`, the proportion of L1 regularization - -## Resampling - -There are enough data here so that 10-fold resampling would hold out 400 reviews at a time to estimate performance. Performance estimates using this many observations have sufficiently low noise to measure and tune models. - -```{r folds} -set.seed(8935) -folds <- vfold_cv(training_data) -folds -``` - -## Grid search - -Let's begin our tuning with [grid search](https://www.tidymodels.org/learn/work/tune-svm/) and a regular grid. For glmnet models, evaluating penalty values is fairly cheap because of the use of the ["submodel-trick"](https://tune.tidymodels.org/articles/extras/optimizations.html#sub-model-speed-ups-1). The grid will use 20 penalty values, 5 mixture values, and 3 values for the number of hash features. - -```{r grid} -five_star_grid <- - crossing( - penalty = 10^seq(-3, 0, length = 20), - mixture = c(0.01, 0.25, 0.50, 0.75, 1), - num_terms = 2^c(8, 10, 12) - ) -five_star_grid -``` - -Note that, for each resample, the (computationally expensive) text preprocessing recipe is only prepped 6 times. This increases the efficiency of the analysis by avoiding redundant work. - -Let's save information on the number of predictors by penalty value for each glmnet model. This can help us understand how many features were used across the penalty values. Use an extraction function to do this: - -```{r extract} -glmnet_vars <- function(x) { - # `x` will be a workflow object - mod <- extract_fit_engine(x) - # `df` is the number of model terms for each penalty value - tibble(penalty = mod$lambda, num_vars = mod$df) -} - -ctrl <- control_grid(extract = glmnet_vars, verbose = TRUE) -``` - -Finally, let's run the grid search: - -```{r grid-search, message=FALSE} -roc_scores <- metric_set(roc_auc) - -set.seed(1559) -five_star_glmnet <- - tune_grid( - lr_mod, - pre_proc, - resamples = folds, - grid = five_star_grid, - metrics = roc_scores, - control = ctrl - ) - -five_star_glmnet -``` - -This took a while to complete! What do the results look like? Let's get the resampling estimates of the area under the ROC curve for each tuning parameter: - -```{r grid-roc} -grid_roc <- - collect_metrics(five_star_glmnet) %>% - arrange(desc(mean)) -grid_roc -``` - -The best results have a fairly high penalty value and focus on the ridge penalty (i.e. no feature selection via the lasso's L1 penalty). The best solutions also use the largest number of hashing features. - -What is the relationship between performance and the tuning parameters? - -```{r grid-plot, fig.width = 10} -autoplot(five_star_glmnet, metric = "roc_auc") -``` - -- We can definitely see that performance improves with the number of features included. In this article, we've used a small sample of the overall data set available. When more data are used, an even larger feature set is optimal. - -- The profiles with larger mixture values (greater than 0.01) have steep drop-offs in performance. What's that about? Those are cases where the lasso penalty is removing too many (and perhaps all) features from the model. -- The panel with at least 4096 features shows that there are several parameter combinations that have about the same performance; there isn't much difference between the best performance for the different mixture values. A case could be made that we should choose a _larger_ mixture value and a _smaller_ penalty to select a simpler model that contains fewer predictors. - -- If more experimentation were conducted, a larger set of features (more than 4096) should also be considered. - -We'll come back to the extracted glmnet components at the end of this article. - -## Directed search - -What if we had started with Bayesian optimization? Would a good set of conditions have been found more efficiently? - -Let's pretend that we haven't seen the grid search results. We'll initialize the Gaussian process model with five tuning parameter combinations chosen with a space-filling design. - -It might be good to use a custom `dials` object for the number of hash terms. The default object, `num_terms()`, uses a linear range and tries to set the upper bound of the parameter using the data. Instead, let's create a parameter set, change the scale to be `log2`, and define the same range as was used in grid search. - -```{r hash-range} -hash_range <- num_terms(c(8, 12), trans = log2_trans()) -hash_range -``` - -To use this, we have to merge the recipe and `parsnip` model object into a workflow: - -```{r wflow} -five_star_wflow <- - workflow() %>% - add_recipe(pre_proc) %>% - add_model(lr_mod) -``` - -Then we can extract and manipulate the corresponding parameter set: - -```{r search-set} -five_star_set <- - five_star_wflow %>% - extract_parameter_set_dials() %>% - update( - num_terms = hash_range, - penalty = penalty(c(-3, 0)), - mixture = mixture(c(0.05, 1.00)) - ) -``` - -This is passed to the search function via the `param_info` argument. - -The initial rounds of search can be biased more towards exploration of the parameter space (as opposed to staying near the current best results). If expected improvement is used as the acquisition function, the trade-off value can be slowly moved from exploration to exploitation over iterations (see the tune vignette on [acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html) for more details). The tune package has a built-in function called `expo_decay()` that can help accomplish this: - -```{r decay} -trade_off_decay <- function(iter) { - expo_decay(iter, start_val = .01, limit_val = 0, slope = 1/4) -} -``` - -Using these values, let's run the search: - -```{r search} -set.seed(12) -five_star_search <- - tune_bayes( - five_star_wflow, - resamples = folds, - param_info = five_star_set, - initial = 5, - iter = 30, - metrics = roc_scores, - objective = exp_improve(trade_off_decay), - control = control_bayes(verbose = TRUE) - ) - -five_star_search -``` - -These results show some improvement over the initial set. One issue is that so many settings are sub-optimal (as shown in the plot above for grid search) so there are poor results periodically. There are regions where the penalty parameter becomes too large and all of the predictors are removed from the model. These regions are also dependent on the number of terms. There is a fairly narrow ridge (sorry, pun intended!) where good performance can be achieved. Using more iterations would probably result in the search finding better results. -Let's look at a plot of model performance versus the search iterations: - -```{r iter-plot} -autoplot(five_star_search, type = "performance") -``` - -{{% note %}} What would we do if we knew about the grid search results and wanted to try directed, iterative search? We would restrict the range for the number of hash features to be larger (especially with more data). We might also restrict the penalty and mixture parameters to have a lower upper bound. {{%/ note %}} - -## Extracted results - -Let's return to the grid search results and examine the results of our `extract` function. For each _fitted model_, a tibble was saved that contains the relationship between the number of predictors and the penalty value. Let's look at these results for the best model: - -```{r best-res} -params <- select_best(five_star_glmnet, metric = "roc_auc") -params -``` - -Recall that we saved the glmnet results in a tibble. The column `five_star_glmnet$.extracts` is a list of tibbles. As an example, the first element of the list is: - -```{r first-elem} -five_star_glmnet$.extracts[[1]] -``` - -More nested tibbles! Let's `unnest()` the `five_star_glmnet$.extracts` column: - -```{r unnest} -library(tidyr) -extracted <- - five_star_glmnet %>% - dplyr::select(id, .extracts) %>% - unnest(cols = .extracts) -extracted -``` - -One thing to realize here is that `tune_grid()` [may not fit all of the models](https://tune.tidymodels.org/articles/extras/optimizations.html) that are evaluated. In this case, for each value of `mixture` and `num_terms`, the model is fit over _all_ penalty values (this is a feature of this particular model and is not generally true for other engines). To select the best parameter set, we can exclude the `penalty` column in `extracted`: - - -```{r select-best} -extracted <- - extracted %>% - dplyr::select(-penalty) %>% - inner_join(params, by = c("num_terms", "mixture")) %>% - # Now remove it from the final results - dplyr::select(-penalty) -extracted -``` - -Now we can get at the results that we want using another `unnest()`: - -```{r final-unnest} -extracted <- - extracted %>% - unnest(col = .extracts) # <- these contain a `penalty` column -extracted -``` - -Let's look at a plot of these results (per resample): - -```{r var-plot} -ggplot(extracted, aes(x = penalty, y = num_vars)) + - geom_line(aes(group = id, col = id), alpha = .5) + - ylab("Number of retained predictors") + - scale_x_log10() + - ggtitle(paste("mixture = ", params$mixture, "and", params$num_terms, "features")) + - theme(legend.position = "none") -``` - -These results might help guide the choice of the `penalty` range if more optimization was conducted. - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/learn/work/tune-text/index.markdown b/content/learn/work/tune-text/index.markdown deleted file mode 100644 index 3eaaafa7..00000000 --- a/content/learn/work/tune-text/index.markdown +++ /dev/null @@ -1,946 +0,0 @@ ---- -title: "Tuning text models" -tags: [rsample, recipes, textrecipes, parsnip, tune, yardstick] -categories: [model tuning] -type: learn-subsection -weight: 4 -description: | - Prepare text data for predictive modeling and tune with both grid and iterative search. ---- - - - - - -## Introduction - -To use the code in this article, you will need to install the following packages: stopwords, textfeatures, textrecipes, and tidymodels. - -This article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on. - - -## Text as data - -The text data we'll use in this article are from Amazon: - -> This dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review. - -This article uses a small subset of the total reviews [available at the original source](https://snap.stanford.edu/data/web-FineFoods.html). We sampled a single review from 5,000 random products and allocated 80% of these data to the training set, with the remaining 1,000 reviews held out for the test set. - -There is a column for the product, a column for the text of the review, and a factor column for the outcome variable. The outcome is whether the reviewer gave the product a five-star rating or not. - - -```r -library(tidymodels) - -data("small_fine_foods") -training_data -#> # A tibble: 4,000 × 3 -#> product review score -#> -#> 1 B000J0LSBG "this stuff is not stuffing its not good at all save yo… other -#> 2 B000EYLDYE "I absolutely LOVE this dried fruit. LOVE IT. Whenever I … great -#> 3 B0026LIO9A "GREAT DEAL, CONVENIENT TOO. Much cheaper than WalMart and… great -#> 4 B00473P8SK "Great flavor, we go through a ton of this sauce! I discove… great -#> 5 B001SAWTNM "This is excellent salsa/hot sauce, but you can get it for … great -#> 6 B000FAG90U "Again, this is the best dogfood out there. One suggestion… great -#> 7 B006BXTCEK "The box I received was filled with teas, hot chocolates, a… other -#> 8 B002GWH5OY "This is delicious coffee which compares favorably with muc… great -#> 9 B003R0MFYY "Don't let these little tiny cans fool you. They pack a lo… great -#> 10 B001EO5ZXI "One of the nicest, smoothest cup of chai I've made. Nice m… great -#> # … with 3,990 more rows -``` - -Our modeling goal is to create modeling features from the text of the reviews to predict whether the review was five-star or not. - -## Inputs for the search - -Text, perhaps more so than tabular data we often deal with, must be heavily processed to be used as predictor data for modeling. There are multiple ways to process and prepare text for modeling; let's add several steps together to create different kinds of features: - -* Create an initial set of count-based features, such as the number of words, spaces, lower- or uppercase characters, URLs, and so on; we can use the [textfeatures](https://github.com/mkearney/textfeatures) package for this. - -* [Tokenize](https://smltar.com/tokenization.html) the text (i.e. break the text into smaller components such as words). - -* Remove stop words such as "the", "an", "of", etc. - -* [Stem](https://smltar.com/stemming.html) tokens to a common root where possible. - -* Convert tokens to dummy variables via a [signed, binary hash function](https://bookdown.org/max/FES/encoding-predictors-with-many-categories.html). - -* Optionally transform non-token features (the count-based features like number of lowercase characters) to a more symmetric state using a [Yeo-Johnson transformation](https://bookdown.org/max/FES/numeric-one-to-one.html). - -* Remove predictors with a single distinct value. - -* Center and scale all predictors. - - -{{% note %}} We will end up with two kinds of features: - -- dummy/indicator variables for the count-based features like number of digits or punctuation characters -- hash features for the tokens like "salsa" or "delicious". {{%/ note %}} - -Some of these preprocessing steps (such as stemming) may or may not be good ideas but a full discussion of their effects is beyond the scope of this article. In this preprocessing approach, the main tuning parameter is the number of hashing features to use. - -Before we start building our preprocessing recipe, we need some helper objects. For example, for the Yeo-Johnson transformation, we need to know the set of count-based text features: - - -```r -library(textfeatures) - -basics <- names(textfeatures:::count_functions) -head(basics) -#> [1] "n_words" "n_uq_words" "n_charS" "n_uq_charS" "n_digits" -#> [6] "n_hashtags" -``` - -Also, the implementation of feature hashes does not produce the binary values we need. This small function will help convert the scores to values of -1, 0, or 1: - - -```r -binary_hash <- function(x) { - x <- ifelse(x < 0, -1, x) - x <- ifelse(x > 0, 1, x) - x -} -``` - -Now, let's put this all together in one recipe: - - -```r -library(textrecipes) - -pre_proc <- - recipe(score ~ product + review, data = training_data) %>% - # Do not use the product ID as a predictor - update_role(product, new_role = "id") %>% - # Make a copy of the raw text - step_mutate(review_raw = review) %>% - # Compute the initial features. This removes the `review_raw` column - step_textfeature(review_raw) %>% - # Make the feature names shorter - step_rename_at( - starts_with("textfeature_"), - fn = ~ gsub("textfeature_review_raw_", "", .) - ) %>% - step_tokenize(review) %>% - step_stopwords(review) %>% - step_stem(review) %>% - # Here is where the tuning parameter is declared - step_texthash(review, signed = TRUE, num_terms = tune()) %>% - # Simplify these names - step_rename_at(starts_with("review_hash"), fn = ~ gsub("review_", "", .)) %>% - # Convert the features from counts to values of -1, 0, or 1 - step_mutate_at(starts_with("hash"), fn = binary_hash) %>% - # Transform the initial feature set - step_YeoJohnson(one_of(!!basics)) %>% - step_zv(all_predictors()) %>% - step_normalize(all_predictors()) -``` - -{{% warning %}} Note that, when objects from the global environment are used, they are injected into the step objects via `!!`. For some parallel processing technologies, these objects may not be found by the worker processes. {{%/ warning %}} - -The preprocessing recipe is long and complex (often typical for working with text data) but the model we'll use is more straightforward. Let's stick with a regularized logistic regression model: - - -```r -lr_mod <- - logistic_reg(penalty = tune(), mixture = tune()) %>% - set_engine("glmnet") -``` - -There are three tuning parameters for this data analysis: - -- `num_terms`, the number of feature hash variables to create -- `penalty`, the amount of regularization for the model -- `mixture`, the proportion of L1 regularization - -## Resampling - -There are enough data here so that 10-fold resampling would hold out 400 reviews at a time to estimate performance. Performance estimates using this many observations have sufficiently low noise to measure and tune models. - - -```r -set.seed(8935) -folds <- vfold_cv(training_data) -folds -#> # 10-fold cross-validation -#> # A tibble: 10 × 2 -#> splits id -#> -#> 1 Fold01 -#> 2 Fold02 -#> 3 Fold03 -#> 4 Fold04 -#> 5 Fold05 -#> 6 Fold06 -#> 7 Fold07 -#> 8 Fold08 -#> 9 Fold09 -#> 10 Fold10 -``` - -## Grid search - -Let's begin our tuning with [grid search](https://www.tidymodels.org/learn/work/tune-svm/) and a regular grid. For glmnet models, evaluating penalty values is fairly cheap because of the use of the ["submodel-trick"](https://tune.tidymodels.org/articles/extras/optimizations.html#sub-model-speed-ups-1). The grid will use 20 penalty values, 5 mixture values, and 3 values for the number of hash features. - - -```r -five_star_grid <- - crossing( - penalty = 10^seq(-3, 0, length = 20), - mixture = c(0.01, 0.25, 0.50, 0.75, 1), - num_terms = 2^c(8, 10, 12) - ) -five_star_grid -#> # A tibble: 300 × 3 -#> penalty mixture num_terms -#> -#> 1 0.001 0.01 256 -#> 2 0.001 0.01 1024 -#> 3 0.001 0.01 4096 -#> 4 0.001 0.25 256 -#> 5 0.001 0.25 1024 -#> 6 0.001 0.25 4096 -#> 7 0.001 0.5 256 -#> 8 0.001 0.5 1024 -#> 9 0.001 0.5 4096 -#> 10 0.001 0.75 256 -#> # … with 290 more rows -``` - -Note that, for each resample, the (computationally expensive) text preprocessing recipe is only prepped 6 times. This increases the efficiency of the analysis by avoiding redundant work. - -Let's save information on the number of predictors by penalty value for each glmnet model. This can help us understand how many features were used across the penalty values. Use an extraction function to do this: - - -```r -glmnet_vars <- function(x) { - # `x` will be a workflow object - mod <- extract_fit_engine(x) - # `df` is the number of model terms for each penalty value - tibble(penalty = mod$lambda, num_vars = mod$df) -} - -ctrl <- control_grid(extract = glmnet_vars, verbose = TRUE) -``` - -Finally, let's run the grid search: - - -```r -roc_scores <- metric_set(roc_auc) - -set.seed(1559) -five_star_glmnet <- - tune_grid( - lr_mod, - pre_proc, - resamples = folds, - grid = five_star_grid, - metrics = roc_scores, - control = ctrl - ) - -five_star_glmnet -#> # Tuning results -#> # 10-fold cross-validation -#> # A tibble: 10 × 5 -#> splits id .metrics .notes .extracts -#> -#> 1 Fold01 -#> 2 Fold02 -#> 3 Fold03 -#> 4 Fold04 -#> 5 Fold05 -#> 6 Fold06 -#> 7 Fold07 -#> 8 Fold08 -#> 9 Fold09 -#> 10 Fold10 -``` - -This took a while to complete! What do the results look like? Let's get the resampling estimates of the area under the ROC curve for each tuning parameter: - - -```r -grid_roc <- - collect_metrics(five_star_glmnet) %>% - arrange(desc(mean)) -grid_roc -#> # A tibble: 300 × 9 -#> penalty mixture num_terms .metric .estimator mean n std_err .config -#> -#> 1 0.695 0.01 4096 roc_auc binary 0.811 10 0.00799 Preprocesso… -#> 2 0.483 0.01 4096 roc_auc binary 0.811 10 0.00797 Preprocesso… -#> 3 0.0379 0.25 4096 roc_auc binary 0.809 10 0.00755 Preprocesso… -#> 4 0.0183 0.5 4096 roc_auc binary 0.807 10 0.00776 Preprocesso… -#> 5 0.0264 0.25 4096 roc_auc binary 0.807 10 0.00792 Preprocesso… -#> 6 0.0127 0.75 4096 roc_auc binary 0.807 10 0.00773 Preprocesso… -#> 7 0.336 0.01 4096 roc_auc binary 0.806 10 0.00781 Preprocesso… -#> 8 0.00886 1 4096 roc_auc binary 0.806 10 0.00783 Preprocesso… -#> 9 1 0.01 4096 roc_auc binary 0.806 10 0.00801 Preprocesso… -#> 10 0.0546 0.25 4096 roc_auc binary 0.805 10 0.00783 Preprocesso… -#> # … with 290 more rows -``` - -The best results have a fairly high penalty value and focus on the ridge penalty (i.e. no feature selection via the lasso's L1 penalty). The best solutions also use the largest number of hashing features. - -What is the relationship between performance and the tuning parameters? - - -```r -autoplot(five_star_glmnet, metric = "roc_auc") -``` - - - -- We can definitely see that performance improves with the number of features included. In this article, we've used a small sample of the overall data set available. When more data are used, an even larger feature set is optimal. - -- The profiles with larger mixture values (greater than 0.01) have steep drop-offs in performance. What's that about? Those are cases where the lasso penalty is removing too many (and perhaps all) features from the model. -- The panel with at least 4096 features shows that there are several parameter combinations that have about the same performance; there isn't much difference between the best performance for the different mixture values. A case could be made that we should choose a _larger_ mixture value and a _smaller_ penalty to select a simpler model that contains fewer predictors. - -- If more experimentation were conducted, a larger set of features (more than 4096) should also be considered. - -We'll come back to the extracted glmnet components at the end of this article. - -## Directed search - -What if we had started with Bayesian optimization? Would a good set of conditions have been found more efficiently? - -Let's pretend that we haven't seen the grid search results. We'll initialize the Gaussian process model with five tuning parameter combinations chosen with a space-filling design. - -It might be good to use a custom `dials` object for the number of hash terms. The default object, `num_terms()`, uses a linear range and tries to set the upper bound of the parameter using the data. Instead, let's create a parameter set, change the scale to be `log2`, and define the same range as was used in grid search. - - -```r -hash_range <- num_terms(c(8, 12), trans = log2_trans()) -hash_range -#> # Model Terms (quantitative) -#> Transformer: log-2 [1e-100, Inf] -#> Range (transformed scale): [8, 12] -``` - -To use this, we have to merge the recipe and `parsnip` model object into a workflow: - - -```r -five_star_wflow <- - workflow() %>% - add_recipe(pre_proc) %>% - add_model(lr_mod) -``` - -Then we can extract and manipulate the corresponding parameter set: - - -```r -five_star_set <- - five_star_wflow %>% - extract_parameter_set_dials() %>% - update( - num_terms = hash_range, - penalty = penalty(c(-3, 0)), - mixture = mixture(c(0.05, 1.00)) - ) -``` - -This is passed to the search function via the `param_info` argument. - -The initial rounds of search can be biased more towards exploration of the parameter space (as opposed to staying near the current best results). If expected improvement is used as the acquisition function, the trade-off value can be slowly moved from exploration to exploitation over iterations (see the tune vignette on [acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html) for more details). The tune package has a built-in function called `expo_decay()` that can help accomplish this: - - -```r -trade_off_decay <- function(iter) { - expo_decay(iter, start_val = .01, limit_val = 0, slope = 1/4) -} -``` - -Using these values, let's run the search: - - -```r -set.seed(12) -five_star_search <- - tune_bayes( - five_star_wflow, - resamples = folds, - param_info = five_star_set, - initial = 5, - iter = 30, - metrics = roc_scores, - objective = exp_improve(trade_off_decay), - control = control_bayes(verbose = TRUE) - ) -#> -#> ❯ Generating a set of 5 initial parameter results -#> ✓ Initialization complete -#> -#> -#> ── Iteration 1 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.7624 (@iter 0) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.01 -#> i penalty=0.319, mixture=0.248, num_terms=345 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.5632 (+/-0.0116) -#> -#> ── Iteration 2 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.7624 (@iter 0) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.007788 -#> i penalty=0.00542, mixture=0.614, num_terms=1231 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.7763 (+/-0.00963) -#> -#> ── Iteration 3 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.7763 (@iter 2) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.006065 -#> i penalty=0.001, mixture=0.13, num_terms=1522 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7049 (+/-0.00759) -#> -#> ── Iteration 4 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.7763 (@iter 2) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.004724 -#> i penalty=0.00699, mixture=0.849, num_terms=2123 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.7978 (+/-0.00779) -#> -#> ── Iteration 5 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.7978 (@iter 4) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.003679 -#> i penalty=0.008, mixture=0.72, num_terms=4022 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8083 (+/-0.00486) -#> -#> ── Iteration 6 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8083 (@iter 5) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.002865 -#> i penalty=0.0131, mixture=0.842, num_terms=3976 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8113 (+/-0.00723) -#> -#> ── Iteration 7 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8113 (@iter 6) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.002231 -#> i penalty=0.0122, mixture=0.871, num_terms=4042 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8084 (+/-0.00724) -#> -#> ── Iteration 8 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8113 (@iter 6) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.001738 -#> i penalty=0.0154, mixture=0.944, num_terms=4070 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8055 (+/-0.0109) -#> -#> ── Iteration 9 ─────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8113 (@iter 6) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.001353 -#> i penalty=0.0101, mixture=0.236, num_terms=4048 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7839 (+/-0.00584) -#> -#> ── Iteration 10 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8113 (@iter 6) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.001054 -#> i penalty=0.016, mixture=0.564, num_terms=3731 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.803 (+/-0.00692) -#> -#> ── Iteration 11 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8113 (@iter 6) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.0008208 -#> i penalty=0.00909, mixture=0.811, num_terms=4094 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8047 (+/-0.00858) -#> -#> ── Iteration 12 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8113 (@iter 6) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.0006393 -#> i penalty=0.00115, mixture=0.984, num_terms=3124 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.756 (+/-0.00491) -#> -#> ── Iteration 13 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8113 (@iter 6) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.0004979 -#> i penalty=0.0137, mixture=0.69, num_terms=3265 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8044 (+/-0.00782) -#> -#> ── Iteration 14 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8113 (@iter 6) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.0003877 -#> i penalty=0.00797, mixture=0.985, num_terms=4013 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8116 (+/-0.00704) -#> -#> ── Iteration 15 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8116 (@iter 14) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.000302 -#> i penalty=0.0129, mixture=0.948, num_terms=4004 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8132 (+/-0.00848) -#> -#> ── Iteration 16 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8132 (@iter 15) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.0002352 -#> i penalty=0.02, mixture=0.674, num_terms=4037 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8047 (+/-0.00716) -#> -#> ── Iteration 17 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8132 (@iter 15) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.0001832 -#> i penalty=0.0171, mixture=0.844, num_terms=3935 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.807 (+/-0.0074) -#> -#> ── Iteration 18 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8132 (@iter 15) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.0001426 -#> i penalty=0.0132, mixture=0.964, num_terms=3947 -#> i Estimating performance -#> ✓ Estimating performance -#> ♥ Newest results: roc_auc=0.8144 (+/-0.00835) -#> -#> ── Iteration 19 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 0.0001111 -#> i penalty=0.0247, mixture=0.124, num_terms=3689 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7923 (+/-0.00754) -#> -#> ── Iteration 20 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 8.652e-05 -#> i penalty=0.021, mixture=0.515, num_terms=295 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.76 (+/-0.00747) -#> -#> ── Iteration 21 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 6.738e-05 -#> i penalty=0.0678, mixture=0.0725, num_terms=3618 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.806 (+/-0.00771) -#> -#> ── Iteration 22 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 5.248e-05 -#> i penalty=0.0449, mixture=0.122, num_terms=3982 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8075 (+/-0.0048) -#> -#> ── Iteration 23 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 4.087e-05 -#> i penalty=0.0571, mixture=0.0852, num_terms=4000 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8074 (+/-0.00664) -#> -#> ── Iteration 24 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 3.183e-05 -#> i penalty=0.00176, mixture=0.969, num_terms=261 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.741 (+/-0.00955) -#> -#> ── Iteration 25 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 2.479e-05 -#> i penalty=0.0315, mixture=0.307, num_terms=4072 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.813 (+/-0.00624) -#> -#> ── Iteration 26 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 1.93e-05 -#> i penalty=0.0303, mixture=0.483, num_terms=3876 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.7989 (+/-0.00839) -#> -#> ── Iteration 27 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 1.503e-05 -#> i penalty=0.0386, mixture=0.264, num_terms=3888 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8122 (+/-0.00748) -#> -#> ── Iteration 28 ────────────────────────────────────────────────────── -#> -#> i Current best: roc_auc=0.8144 (@iter 18) -#> i Gaussian process model -#> ✓ Gaussian process model -#> i Generating 5000 candidates -#> i Predicted candidates -#> i Trade-off value: 1.171e-05 -#> i penalty=0.0588, mixture=0.335, num_terms=3814 -#> i Estimating performance -#> ✓ Estimating performance -#> ⓧ Newest results: roc_auc=0.8007 (+/-0.0102) -#> ! No improvement for 10 iterations; returning current results. - -five_star_search -#> # Tuning results -#> # 10-fold cross-validation -#> # A tibble: 290 × 5 -#> splits id .metrics .notes .iter -#> -#> 1 Fold01 0 -#> 2 Fold02 0 -#> 3 Fold03 0 -#> 4 Fold04 0 -#> 5 Fold05 0 -#> 6 Fold06 0 -#> 7 Fold07 0 -#> 8 Fold08 0 -#> 9 Fold09 0 -#> 10 Fold10 0 -#> # … with 280 more rows -``` - -These results show some improvement over the initial set. One issue is that so many settings are sub-optimal (as shown in the plot above for grid search) so there are poor results periodically. There are regions where the penalty parameter becomes too large and all of the predictors are removed from the model. These regions are also dependent on the number of terms. There is a fairly narrow ridge (sorry, pun intended!) where good performance can be achieved. Using more iterations would probably result in the search finding better results. -Let's look at a plot of model performance versus the search iterations: - - -```r -autoplot(five_star_search, type = "performance") -``` - - - -{{% note %}} What would we do if we knew about the grid search results and wanted to try directed, iterative search? We would restrict the range for the number of hash features to be larger (especially with more data). We might also restrict the penalty and mixture parameters to have a lower upper bound. {{%/ note %}} - -## Extracted results - -Let's return to the grid search results and examine the results of our `extract` function. For each _fitted model_, a tibble was saved that contains the relationship between the number of predictors and the penalty value. Let's look at these results for the best model: - - -```r -params <- select_best(five_star_glmnet, metric = "roc_auc") -params -#> # A tibble: 1 × 4 -#> penalty mixture num_terms .config -#> -#> 1 0.695 0.01 4096 Preprocessor3_Model019 -``` - -Recall that we saved the glmnet results in a tibble. The column `five_star_glmnet$.extracts` is a list of tibbles. As an example, the first element of the list is: - - -```r -five_star_glmnet$.extracts[[1]] -#> # A tibble: 300 × 5 -#> num_terms penalty mixture .extracts .config -#> -#> 1 256 1 0.01 Preprocessor1_Model001 -#> 2 256 1 0.01 Preprocessor1_Model002 -#> 3 256 1 0.01 Preprocessor1_Model003 -#> 4 256 1 0.01 Preprocessor1_Model004 -#> 5 256 1 0.01 Preprocessor1_Model005 -#> 6 256 1 0.01 Preprocessor1_Model006 -#> 7 256 1 0.01 Preprocessor1_Model007 -#> 8 256 1 0.01 Preprocessor1_Model008 -#> 9 256 1 0.01 Preprocessor1_Model009 -#> 10 256 1 0.01 Preprocessor1_Model010 -#> # … with 290 more rows -``` - -More nested tibbles! Let's `unnest()` the `five_star_glmnet$.extracts` column: - - -```r -library(tidyr) -extracted <- - five_star_glmnet %>% - dplyr::select(id, .extracts) %>% - unnest(cols = .extracts) -extracted -#> # A tibble: 3,000 × 6 -#> id num_terms penalty mixture .extracts .config -#> -#> 1 Fold01 256 1 0.01 Preprocessor1_Model001 -#> 2 Fold01 256 1 0.01 Preprocessor1_Model002 -#> 3 Fold01 256 1 0.01 Preprocessor1_Model003 -#> 4 Fold01 256 1 0.01 Preprocessor1_Model004 -#> 5 Fold01 256 1 0.01 Preprocessor1_Model005 -#> 6 Fold01 256 1 0.01 Preprocessor1_Model006 -#> 7 Fold01 256 1 0.01 Preprocessor1_Model007 -#> 8 Fold01 256 1 0.01 Preprocessor1_Model008 -#> 9 Fold01 256 1 0.01 Preprocessor1_Model009 -#> 10 Fold01 256 1 0.01 Preprocessor1_Model010 -#> # … with 2,990 more rows -``` - -One thing to realize here is that `tune_grid()` [may not fit all of the models](https://tune.tidymodels.org/articles/extras/optimizations.html) that are evaluated. In this case, for each value of `mixture` and `num_terms`, the model is fit over _all_ penalty values (this is a feature of this particular model and is not generally true for other engines). To select the best parameter set, we can exclude the `penalty` column in `extracted`: - - - -```r -extracted <- - extracted %>% - dplyr::select(-penalty) %>% - inner_join(params, by = c("num_terms", "mixture")) %>% - # Now remove it from the final results - dplyr::select(-penalty) -extracted -#> # A tibble: 200 × 6 -#> id num_terms mixture .extracts .config.x .config.y -#> -#> 1 Fold01 4096 0.01 Preprocessor3_Model001 Preproces… -#> 2 Fold01 4096 0.01 Preprocessor3_Model002 Preproces… -#> 3 Fold01 4096 0.01 Preprocessor3_Model003 Preproces… -#> 4 Fold01 4096 0.01 Preprocessor3_Model004 Preproces… -#> 5 Fold01 4096 0.01 Preprocessor3_Model005 Preproces… -#> 6 Fold01 4096 0.01 Preprocessor3_Model006 Preproces… -#> 7 Fold01 4096 0.01 Preprocessor3_Model007 Preproces… -#> 8 Fold01 4096 0.01 Preprocessor3_Model008 Preproces… -#> 9 Fold01 4096 0.01 Preprocessor3_Model009 Preproces… -#> 10 Fold01 4096 0.01 Preprocessor3_Model010 Preproces… -#> # … with 190 more rows -``` - -Now we can get at the results that we want using another `unnest()`: - - -```r -extracted <- - extracted %>% - unnest(col = .extracts) # <- these contain a `penalty` column -extracted -#> # A tibble: 20,000 × 7 -#> id num_terms mixture penalty num_vars .config.x .config.y -#> -#> 1 Fold01 4096 0.01 8.60 0 Preprocessor3_Model001 Preprocesso… -#> 2 Fold01 4096 0.01 8.21 2 Preprocessor3_Model001 Preprocesso… -#> 3 Fold01 4096 0.01 7.84 2 Preprocessor3_Model001 Preprocesso… -#> 4 Fold01 4096 0.01 7.48 3 Preprocessor3_Model001 Preprocesso… -#> 5 Fold01 4096 0.01 7.14 3 Preprocessor3_Model001 Preprocesso… -#> 6 Fold01 4096 0.01 6.82 3 Preprocessor3_Model001 Preprocesso… -#> 7 Fold01 4096 0.01 6.51 4 Preprocessor3_Model001 Preprocesso… -#> 8 Fold01 4096 0.01 6.21 6 Preprocessor3_Model001 Preprocesso… -#> 9 Fold01 4096 0.01 5.93 7 Preprocessor3_Model001 Preprocesso… -#> 10 Fold01 4096 0.01 5.66 7 Preprocessor3_Model001 Preprocesso… -#> # … with 19,990 more rows -``` - -Let's look at a plot of these results (per resample): - - -```r -ggplot(extracted, aes(x = penalty, y = num_vars)) + - geom_line(aes(group = id, col = id), alpha = .5) + - ylab("Number of retained predictors") + - scale_x_log10() + - ggtitle(paste("mixture = ", params$mixture, "and", params$num_terms, "features")) + - theme(legend.position = "none") -``` - - - -These results might help guide the choice of the `penalty` range if more optimization was conducted. - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> stopwords * 2.3 2021-10-28 [1] CRAN (R 4.2.0) -#> textfeatures * 0.3.3 2019-09-03 [1] CRAN (R 4.2.0) -#> textrecipes * 1.0.1 2022-10-06 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/packages/index.md b/content/packages/index.md deleted file mode 100644 index 70c01640..00000000 --- a/content/packages/index.md +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: Tidymodels packages ---- - -## Installation and use - -* Install many of the packages in the tidymodels ecosystem by running `install.packages("tidymodels")`. - -* Run `library(tidymodels)` to load the core packages and make them available in your current R session. - -
    - - - -
    -
    - -
    -

    tidymodels

    -

    tidymodels is a meta-package that installs and load the core packages listed below that you need for modeling and machine learning. -

    -
    -
    -
    - -
    -

    rsample

    -

    rsample provides infrastructure for efficient data splitting and resampling.

    -
    -
    -
    - -
    -

    parsnip

    -

    parsnip is a tidy, unified interface to models that can be used to try a range of models without getting bogged down in the syntactical minutiae of the underlying packages.

    -
    -
    -
    - -
    -

    recipes

    -

    recipes is a tidy interface to data pre-processing tools for feature engineering.

    -
    -
    -
    - -
    -

    workflows

    -

    workflows bundle your pre-processing, modeling, and post-processing together.

    -
    -
    -
    - -
    -

    tune

    -

    tune helps you optimize the hyperparameters of your model and pre-processing steps.

    -
    -
    -
    - -
    -

    yardstick

    -

    yardstick measures the effectiveness of models using performance metrics.

    -
    -
    -
    - -
    -

    broom

    -

    broom converts the information in common statistical R objects into user-friendly, predictable formats. -

    -
    -
    -
    - -
    -

    dials

    -

    dials creates and manages tuning parameters and parameter grids. -

    -
    -
    - -
    -
    - -Learn more about the tidymodels metapackage itself at . - -## Specialized packages - -The tidymodels framework also includes many other packages designed for specialized data analysis and modeling tasks. They are not loaded automatically with `library(tidymodels)`, so you'll need to load each one with its own call to `library()`. These packages include: - -### [Perform statistical analysis](/learn/statistics/) - -* [infer](https://infer.tidymodels.org/) is a high-level API for tidyverse-friendly statistical inference. - -* The [corrr](https://corrr.tidymodels.org/) package has tidy interfaces for working with correlation matrices. - -### [Create robust models](/learn/models/) - -* The [spatialsample](http://spatialsample.tidymodels.org/) package provides resampling functions and classes like rsample, but specialized for spatial data. - -* parsnip also has additional packages that contain more model definitions. [discrim](https://discrim.tidymodels.org/) contains definitions for discriminant analysis models, [poissonreg](https://poissonreg.tidymodels.org/) provides definitions for Poisson regression models, [plsmod](https://plsmod.tidymodels.org/) enables linear projection models, and [rules](https://rules.tidymodels.org/) does the same for rule-based classification and regression models. [baguette](https://baguette.tidymodels.org/) creates ensemble models via bagging, and [multilevelmod](https://multilevelmod.tidymodels.org/) provides support for multilevel models (otherwise known as mixed models or hierarchical models). - -* There are several add-on packages for creating recipes. [embed](https://embed.tidymodels.org/) contains steps to create embeddings or projections of predictors. [textrecipes](https://textrecipes.tidymodels.org/) has extra steps for text processing, and [themis](https://themis.tidymodels.org/) can help alleviate class imbalance using sampling methods. - -* [tidypredict](https://tidypredict.tidymodels.org/) and [modeldb](https://modeldb.tidymodels.org/) can convert prediction equations to different languages (e.g. SQL) and fit some models in-database. - -### [Tune, compare, and work with your models](/learn/work/) - -* To try out multiple different workflows (i.e. bundles of pre-processor and model) at once, [workflowsets](https://workflowsets.tidymodels.org/) lets you create sets of workflow objects for tuning and resampling. - -* To integrate predictions from many models, the [stacks](https://stacks.tidymodels.org/) package provides tools for stacked ensemble modeling. - -* The [finetune](https://finetune.tidymodels.org/) package extends the tune package with more approaches such as racing and simulated annealing. - -* The [usemodels](https://usemodels.tidymodels.org/) package creates templates and automatically generates code to fit and tune models. - -* [probably](https://probably.tidymodels.org/) has tools for post-processing class probability estimates. - -* The [tidyposterior](https://tidyposterior.tidymodels.org/) package enables users to make formal statistical comparisons between models using resampling and Bayesian methods. - -* Some R objects become inconveniently large when saved to disk. The [butcher](https://butcher.tidymodels.org/) package can reduce the size of those objects by removing the sub-components. - -* To know whether the data that you are predicting are _extrapolations_ from the training set, [applicable](https://applicable.tidymodels.org/) can produce metrics that measure extrapolation. - -* [shinymodels](https://shinymodels.tidymodels.org/) lets you explore tuning or resampling results via a Shiny app. - -### [Develop custom modeling tools](/learn/develop/) - -* [hardhat](https://hardhat.tidymodels.org/) is a _developer-focused_ package that helps beginners create high-quality R packages for modeling. diff --git a/content/start/_index.md b/content/start/_index.md deleted file mode 100644 index 96dc5e6f..00000000 --- a/content/start/_index.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: Get Started -subtitle: Welcome! -description: "What do you need to know to start using tidymodels? Learn what you need in 5 articles." ---- - -Here, learn what you need to get started with tidymodels in five articles, starting with how to create a model and ending with a beginning-to-end modeling case study. After you are comfortable with these basics, you can [learn how to go farther with tidymodels](/learn/). - -## If you are new to R or the tidyverse - -To get the most out of tidymodels, we recommend that you start by learning some basics about R and the [tidyverse](https://www.tidyverse.org/) first, then return here when you feel ready. Here are some resources to start learning: - -* [Finding Your Way To R](https://education.rstudio.com/learn/), from the RStudio Education team. - -* [Learn the tidyverse](https://www.tidyverse.org/learn/), from the tidyverse team. - -* [Statistical Inference via Data Science: A ModernDive into R and the Tidyverse](/books/moderndive/). - diff --git a/content/start/case-study/figs/lr-plot-lines-1.svg b/content/start/case-study/figs/lr-plot-lines-1.svg deleted file mode 100644 index 1ea88e7f..00000000 --- a/content/start/case-study/figs/lr-plot-lines-1.svg +++ /dev/null @@ -1,103 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.5 -0.6 -0.7 -0.8 - - - - - - - - -0.0001 -0.0010 -0.0100 -0.1000 -penalty -Area under the ROC Curve - - diff --git a/content/start/case-study/figs/rf-roc-curve-1.svg b/content/start/case-study/figs/rf-roc-curve-1.svg deleted file mode 100644 index 49084a14..00000000 --- a/content/start/case-study/figs/rf-roc-curve-1.svg +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/content/start/case-study/index.Rmarkdown b/content/start/case-study/index.Rmarkdown deleted file mode 100644 index fa236e52..00000000 --- a/content/start/case-study/index.Rmarkdown +++ /dev/null @@ -1,505 +0,0 @@ ---- -title: "A predictive modeling case study" -weight: 5 -tags: [parsnip, recipes, rsample, workflows, tune] -categories: [model fitting, tuning] -description: | - Develop, from beginning to end, a predictive model using best practices. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/start/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(readr) -library(vip) -library(tidymodels) -pkgs <- c("tidymodels", "readr", "glmnet", "ranger", "vip") -theme_set(theme_bw() + theme(legend.position = "top")) -``` - - -## Introduction {#intro} - -Each of the four previous [_Get Started_](/start/) articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you'll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays. - -```{r echo=FALSE, out.width="90%"} -knitr::include_graphics("img/hotel.jpg") -``` - - -`r article_req_pkgs(pkgs)` - -```{r eval=FALSE} -library(tidymodels) - -# Helper packages -library(readr) # for importing data -library(vip) # for variable importance plots -``` - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - -## The Hotel Bookings Data {#data} - -Let’s use hotel bookings data from [Antonio, Almeida, and Nunes (2019)](https://doi.org/10.1016/j.dib.2018.11.126) to predict which hotel stays included children and/or babies, based on the other characteristics of the stays such as which hotel the guests stay at, how much they pay, etc. This was also a [`#TidyTuesday`](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11) dataset with a [data dictionary](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11#data-dictionary) you may want to look over to learn more about the variables. We'll use a slightly [edited version of the dataset](https://gist.github.com/topepo/05a74916c343e57a71c51d6bc32a21ce) for this case study. - -To start, let's read our hotel data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (""): - -```{r hotel-import, message = FALSE} -library(tidymodels) -library(readr) - -hotels <- - read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>% - mutate(across(where(is.character), as.factor)) - -dim(hotels) -``` - -In the original paper, the [authors](https://doi.org/10.1016/j.dib.2018.11.126) caution that the distribution of many variables (such as number of adults/children, room type, meals bought, country of origin of the guests, and so forth) is different for hotel stays that were canceled versus not canceled. This makes sense because much of that information is gathered (or gathered again more accurately) when guests check in for their stay, so canceled bookings are likely to have more missing data than non-canceled bookings, and/or to have different characteristics when data is not missing. Given this, it is unlikely that we can reliably detect meaningful differences between guests who cancel their bookings and those who do not with this dataset. To build our models here, we have already filtered the data to include only the bookings that did not cancel, so we'll be analyzing _hotel stays_ only. - -```{r glimpse-hotels} -glimpse(hotels) -``` - -We will build a model to predict which actual hotel stays included children and/or babies, and which did not. Our outcome variable `children` is a factor variable with two levels: - -```{r count-children} -hotels %>% - count(children) %>% - mutate(prop = n/sum(n)) -``` - -We can see that children were only in `r round(mean(hotels$children == "children") * 100, 1)`% of the reservations. This type of class imbalance can often wreak havoc on an analysis. While there are several methods for combating this issue using [recipes](/find/recipes/) (search for steps to `upsample` or `downsample`) or other more specialized packages like [themis](https://themis.tidymodels.org/), the analyses shown below analyze the data as-is. - -## Data Splitting & Resampling {#data-split} - -For a data splitting strategy, let's reserve 25% of the stays to the test set. As in our [*Evaluate your model with resampling*](/start/resampling/#data-split) article, we know our outcome variable `children` is pretty imbalanced so we'll use a stratified random sample: - -```{r tr-te-split} -set.seed(123) -splits <- initial_split(hotels, strata = children) - -hotel_other <- training(splits) -hotel_test <- testing(splits) - -# training set proportions by children -hotel_other %>% - count(children) %>% - mutate(prop = n/sum(n)) - -# test set proportions by children -hotel_test %>% - count(children) %>% - mutate(prop = n/sum(n)) -``` - -In our articles so far, we've relied on 10-fold cross-validation as the primary resampling method using [`rsample::vfold_cv()`](https://rsample.tidymodels.org/reference/vfold_cv.html). This has created 10 different resamples of the training set (which we further split into _analysis_ and _assessment_ sets), producing 10 different performance metrics that we then aggregated. - -For this case study, rather than using multiple iterations of resampling, let's create a single resample called a _validation set_. In tidymodels, a validation set is treated as a single iteration of resampling. This will be a split from the `r format(nrow(hotel_other), big.mark = ",")` stays that were not used for testing, which we called `hotel_other`. This split creates two new datasets: - -+ the set held out for the purpose of measuring performance, called the _validation set_, and - -+ the remaining data used to fit the model, called the _training set_. - -```{r validation-fig, echo = FALSE, fig.align='center', out.width="50%"} -knitr::include_graphics("img/validation-split.svg") -``` - -We'll use the `validation_split()` function to allocate 20% of the `hotel_other` stays to the _validation set_ and `r format(nrow(hotel_other) * .8, big.mark = ",")` stays to the _training set_. This means that our model performance metrics will be computed on a single set of `r format(nrow(hotel_other) * .2, big.mark = ",")` hotel stays. This is fairly large, so the amount of data should provide enough precision to be a reliable indicator for how well each model predicts the outcome with a single iteration of resampling. - -```{r validation-set} -set.seed(234) -val_set <- validation_split(hotel_other, - strata = children, - prop = 0.80) -val_set -``` - -This function, like `initial_split()`, has the same `strata` argument, which uses stratified sampling to create the resample. This means that we'll have roughly the same proportions of hotel stays with and without children in our new validation and training sets, as compared to the original `hotel_other` proportions. - -## A first model: penalized logistic regression {#first-model} - -Since our outcome variable `children` is categorical, logistic regression would be a good first model to start. Let's use a model that can perform feature selection during training. The [glmnet](https://cran.r-project.org/web/packages/glmnet/index.html) R package fits a generalized linear model via penalized maximum likelihood. This method of estimating the logistic regression slope parameters uses a _penalty_ on the process so that less relevant predictors are driven towards a value of zero. One of the glmnet penalization methods, called the [lasso method](https://en.wikipedia.org/wiki/Lasso_(statistics)), can actually set the predictor slopes to zero if a large enough penalty is used. - -### Build the model - -To specify a penalized logistic regression model that uses a feature selection penalty, let's use the parsnip package with the [glmnet engine](/find/parsnip/): - -```{r logistic-model} -lr_mod <- - logistic_reg(penalty = tune(), mixture = 1) %>% - set_engine("glmnet") -``` - -We'll set the `penalty` argument to `tune()` as a placeholder for now. This is a model hyperparameter that we will [tune](/start/tuning/) to find the best value for making predictions with our data. Setting `mixture` to a value of one means that the glmnet model will potentially remove irrelevant predictors and choose a simpler model. - -### Create the recipe - -Let's create a [recipe](/start/recipes/) to define the preprocessing steps we need to prepare our hotel stays data for this model. It might make sense to create a set of date-based predictors that reflect important components related to the arrival date. We have already introduced a [number of useful recipe steps](/start/recipes/#features) for creating features from dates: - -+ `step_date()` creates predictors for the year, month, and day of the week. - -+ `step_holiday()` generates a set of indicator variables for specific holidays. Although we don't know where these two hotels are located, we do know that the countries for origin for most stays are based in Europe. - -+ `step_rm()` removes variables; here we'll use it to remove the original date variable since we no longer want it in the model. - -Additionally, all categorical predictors (e.g., `distribution_channel`, `hotel`, ...) should be converted to dummy variables, and all numeric predictors need to be centered and scaled. - -+ `step_dummy()` converts characters or factors (i.e., nominal variables) into one or more numeric binary model terms for the levels of the original data. - -+ `step_zv()` removes indicator variables that only contain a single unique value (e.g. all zeros). This is important because, for penalized models, the predictors should be centered and scaled. - -+ `step_normalize()` centers and scales numeric variables. - -Putting all these steps together into a recipe for a penalized logistic regression model, we have: - -```{r logistic-features} -holidays <- c("AllSouls", "AshWednesday", "ChristmasEve", "Easter", - "ChristmasDay", "GoodFriday", "NewYearsDay", "PalmSunday") - -lr_recipe <- - recipe(children ~ ., data = hotel_other) %>% - step_date(arrival_date) %>% - step_holiday(arrival_date, holidays = holidays) %>% - step_rm(arrival_date) %>% - step_dummy(all_nominal_predictors()) %>% - step_zv(all_predictors()) %>% - step_normalize(all_predictors()) -``` - - -### Create the workflow - -As we introduced in [*Preprocess your data with recipes*](/start/recipes/#fit-workflow), let's bundle the model and recipe into a single `workflow()` object to make management of the R objects easier: - -```{r logistic-workflow} -lr_workflow <- - workflow() %>% - add_model(lr_mod) %>% - add_recipe(lr_recipe) -``` - -### Create the grid for tuning - -Before we fit this model, we need to set up a grid of `penalty` values to tune. In our [*Tune model parameters*](/start/tuning/) article, we used [`dials::grid_regular()`](start/tuning/#tune-grid) to create an expanded grid based on a combination of two hyperparameters. Since we have only one hyperparameter to tune here, we can set the grid up manually using a one-column tibble with 30 candidate values: - -```{r logistic-grid} -lr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30)) - -lr_reg_grid %>% top_n(-5) # lowest penalty values -lr_reg_grid %>% top_n(5) # highest penalty values -``` - -### Train and tune the model - -Let's use `tune::tune_grid()` to train these 30 penalized logistic regression models. We'll also save the validation set predictions (via the call to `control_grid()`) so that diagnostic information can be available after the model fit. The area under the ROC curve will be used to quantify how well the model performs across a continuum of event thresholds (recall that the event rate—the proportion of stays including children— is very low for these data). - -```{r logistic-fit, cache = TRUE, message = FALSE} -lr_res <- - lr_workflow %>% - tune_grid(val_set, - grid = lr_reg_grid, - control = control_grid(save_pred = TRUE), - metrics = metric_set(roc_auc)) -``` - -It might be easier to visualize the validation set metrics by plotting the area under the ROC curve against the range of penalty values: - -```{r logistic-results, fig.height = 4.25, fig.width = 6} -lr_plot <- - lr_res %>% - collect_metrics() %>% - ggplot(aes(x = penalty, y = mean)) + - geom_point() + - geom_line() + - ylab("Area under the ROC Curve") + - scale_x_log10(labels = scales::label_number()) - -lr_plot -``` - -This plots shows us that model performance is generally better at the smaller penalty values. This suggests that the majority of the predictors are important to the model. We also see a steep drop in the area under the ROC curve towards the highest penalty values. This happens because a large enough penalty will remove _all_ predictors from the model, and not surprisingly predictive accuracy plummets with no predictors in the model (recall that an ROC AUC value of 0.50 means that the model does no better than chance at predicting the correct class). - -Our model performance seems to plateau at the smaller penalty values, so going by the `roc_auc` metric alone could lead us to multiple options for the "best" value for this hyperparameter: - -```{r} -top_models <- - lr_res %>% - show_best("roc_auc", n = 15) %>% - arrange(penalty) -top_models -``` - -```{r top-cand-mods, echo = FALSE} -# Adding a candidate model ID variable -top_models <- - top_models %>% - mutate(cand_model = row_number()) - -top_pen <- - lr_res %>% - select_best("roc_auc") %>% - pull(penalty) - -top_cand <- - top_models %>% - filter(penalty == top_pen) %>% - pull(cand_model) - -select_cand <- 12 - -select_pen <- - top_models %>% - filter(cand_model == select_cand) %>% - pull(penalty) - -select_roc <- - top_models %>% - filter(cand_model == select_cand) %>% - pull(mean) -``` - -Every candidate model in this tibble likely includes more predictor variables than the model in the row below it. If we used `select_best()`, it would return candidate model `r top_cand` with a penalty value of `r format(top_pen, digits = 3)`, shown with the dotted line below. - -```{r lr-plot-lines, echo=FALSE, fig.height = 4.25, fig.width = 6} -lr_plot + - geom_vline(color = "#CA225E", linewidth = 3, xintercept = top_pen) + - geom_vline(color = "#CA225E", xintercept = select_pen) -``` - -However, we may want to choose a penalty value further along the x-axis, closer to where we start to see the decline in model performance. For example, candidate model `r select_cand` with a penalty value of `r format(select_pen, digits = 3)` has effectively the same performance as the numerically best model, but might eliminate more predictors. This penalty value is marked by the solid line above. In general, fewer irrelevant predictors is better. If performance is about the same, we'd prefer to choose a higher penalty value. - -Let's select this value and visualize the validation set ROC curve: -```{r logistic-best} -lr_best <- - lr_res %>% - collect_metrics() %>% - arrange(penalty) %>% - slice(12) -lr_best -``` - - -```{r logistic-roc-curve} -lr_auc <- - lr_res %>% - collect_predictions(parameters = lr_best) %>% - roc_curve(children, .pred_children) %>% - mutate(model = "Logistic Regression") - -autoplot(lr_auc) -``` - -The level of performance generated by this logistic regression model is good, but not groundbreaking. Perhaps the linear nature of the prediction equation is too limiting for this data set. As a next step, we might consider a highly non-linear model generated using a tree-based ensemble method. - -## A second model: tree-based ensemble {#second-model} - -An effective and low-maintenance modeling technique is a _random forest_. This model was also used in our [*Evaluate your model with resampling*](/start/resampling/) article. Compared to logistic regression, a random forest model is more flexible. A random forest is an _ensemble model_ typically made up of thousands of decision trees, where each individual tree sees a slightly different version of the training data and learns a sequence of splitting rules to predict new data. Each tree is non-linear, and aggregating across trees makes random forests also non-linear but more robust and stable compared to individual trees. Tree-based models like random forests require very little preprocessing and can effectively handle many types of predictors (sparse, skewed, continuous, categorical, etc.). - -### Build the model and improve training time - -Although the default hyperparameters for random forests tend to give reasonable results, we'll plan to tune two hyperparameters that we think could improve performance. Unfortunately, random forest models can be computationally expensive to train and to tune. The computations required for model tuning can usually be easily parallelized to improve training time. The tune package can do [parallel processing](https://tune.tidymodels.org/articles/extras/optimizations.html#parallel-processing) for you, and allows users to use multiple cores or separate machines to fit models. - -But, here we are using a single validation set, so parallelization isn't an option using the tune package. For this specific case study, a good alternative is provided by the engine itself. The ranger package offers a built-in way to compute individual random forest models in parallel. To do this, we need to know the the number of cores we have to work with. We can use the parallel package to query the number of cores on your own computer to understand how much parallelization you can do: - -```{r num-cores} -cores <- parallel::detectCores() -cores -``` - -We have `r cores` cores to work with. We can pass this information to the ranger engine when we set up our parsnip `rand_forest()` model. To enable parallel processing, we can pass engine-specific arguments like `num.threads` to ranger when we set the engine: - -```{r rf-model} -rf_mod <- - rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% - set_engine("ranger", num.threads = cores) %>% - set_mode("classification") -``` - -This works well in this modeling context, but it bears repeating: if you use any other resampling method, let tune do the parallel processing for you — we typically do not recommend relying on the modeling engine (like we did here) to do this. - -In this model, we used `tune()` as a placeholder for the `mtry` and `min_n` argument values, because these are our two hyperparameters that we will [tune](/start/tuning/). - -### Create the recipe and workflow - -Unlike penalized logistic regression models, random forest models do not require [dummy](https://bookdown.org/max/FES/categorical-trees.html) or normalized predictor variables. Nevertheless, we want to do some feature engineering again with our `arrival_date` variable. As before, the date predictor is engineered so that the random forest model does not need to work hard to tease these potential patterns from the data. - -```{r rf-features} -rf_recipe <- - recipe(children ~ ., data = hotel_other) %>% - step_date(arrival_date) %>% - step_holiday(arrival_date) %>% - step_rm(arrival_date) -``` - -Adding this recipe to our parsnip model gives us a new workflow for predicting whether a hotel stay included children and/or babies as guests with a random forest: - -```{r rf-workflow} -rf_workflow <- - workflow() %>% - add_model(rf_mod) %>% - add_recipe(rf_recipe) -``` - -### Train and tune the model - -When we set up our parsnip model, we chose two hyperparameters for tuning: - -```{r message=FALSE} -rf_mod - -# show what will be tuned -extract_parameter_set_dials(rf_mod) -``` - -The `mtry` hyperparameter sets the number of predictor variables that each node in the decision tree "sees" and can learn about, so it can range from 1 to the total number of features present; when `mtry` = all possible features, the model is the same as bagging decision trees. The `min_n` hyperparameter sets the minimum `n` to split at any node. - -We will use a space-filling design to tune, with 25 candidate models: - -```{r rf-fit, cache = TRUE} -set.seed(345) -rf_res <- - rf_workflow %>% - tune_grid(val_set, - grid = 25, - control = control_grid(save_pred = TRUE), - metrics = metric_set(roc_auc)) -``` - -The message printed above *"Creating pre-processing data to finalize unknown parameter: mtry"* is related to the size of the data set. Since `mtry` depends on the number of predictors in the data set, `tune_grid()` determines the upper bound for `mtry` once it receives the data. - -Here are our top 5 random forest models, out of the 25 candidates: - -```{r} -rf_res %>% - show_best(metric = "roc_auc") -``` - -Right away, we see that these values for area under the ROC look more promising than our top model using penalized logistic regression, which yielded an ROC AUC of `r format(select_roc, digits = 3)`. - -Plotting the results of the tuning process highlights that both `mtry` (number of predictors at each node) and `min_n` (minimum number of data points required to keep splitting) should be fairly small to optimize performance. However, the range of the y-axis indicates that the model is very robust to the choice of these parameter values — all but one of the ROC AUC values are greater than 0.90. - -```{r rf-results, fig.height = 4} -autoplot(rf_res) -``` - -Let's select the best model according to the ROC AUC metric. Our final tuning parameter values are: - -```{r rf-best} -rf_best <- - rf_res %>% - select_best(metric = "roc_auc") -rf_best -``` - -To calculate the data needed to plot the ROC curve, we use `collect_predictions()`. This is only possible after tuning with `control_grid(save_pred = TRUE)`. In the output, you can see the two columns that hold our class probabilities for predicting hotel stays including and not including children. - -```{r R.options = list(tibble.print_min = 5)} -rf_res %>% - collect_predictions() -``` - -To filter the predictions for only our best random forest model, we can use the `parameters` argument and pass it our tibble with the best hyperparameter values from tuning, which we called `rf_best`: - -```{r rf-roc} -rf_auc <- - rf_res %>% - collect_predictions(parameters = rf_best) %>% - roc_curve(children, .pred_children) %>% - mutate(model = "Random Forest") -``` - -Now, we can compare the validation set ROC curves for our top penalized logistic regression model and random forest model: - -```{r rf-lr-roc-curve} -bind_rows(rf_auc, lr_auc) %>% - ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) + - geom_path(linewidth = 1.5, alpha = 0.8) + - geom_abline(lty = 3) + - coord_equal() + - scale_color_viridis_d(option = "plasma", end = .6) -``` - -The random forest is uniformly better across event probability thresholds. - -## The last fit {#last-fit} - -Our goal was to predict which hotel stays included children and/or babies. The random forest model clearly performed better than the penalized logistic regression model, and would be our best bet for predicting hotel stays with and without children. After selecting our best model and hyperparameter values, our last step is to fit the final model on all the rows of data not originally held out for testing (both the training and the validation sets combined), and then evaluate the model performance one last time with the held-out test set. - -We'll start by building our parsnip model object again from scratch. We take our best hyperparameter values from our random forest model. When we set the engine, we add a new argument: `importance = "impurity"`. This will provide _variable importance_ scores for this last model, which gives some insight into which predictors drive model performance. - -```{r last-rf, cache = TRUE} -# the last model -last_rf_mod <- - rand_forest(mtry = 8, min_n = 7, trees = 1000) %>% - set_engine("ranger", num.threads = cores, importance = "impurity") %>% - set_mode("classification") - -# the last workflow -last_rf_workflow <- - rf_workflow %>% - update_model(last_rf_mod) - -# the last fit -set.seed(345) -last_rf_fit <- - last_rf_workflow %>% - last_fit(splits) - -last_rf_fit -``` - -This fitted workflow contains _everything_, including our final metrics based on the test set. So, how did this model do on the test set? Was the validation set a good estimate of future performance? - -```{r} -last_rf_fit %>% - collect_metrics() -``` - -This ROC AUC value is pretty close to what we saw when we tuned the random forest model with the validation set, which is good news. That means that our estimate of how well our model would perform with new data was not too far off from how well our model actually performed with the unseen test data. - -We can access those variable importance scores via the `.workflow` column. We can [extract out the fit](https://tune.tidymodels.org/reference/extract-tune.html) from the workflow object, and then use the vip package to visualize the variable importance scores for the top 20 features: - -```{r rf-importance} -last_rf_fit %>% - extract_fit_parsnip() %>% - vip(num_features = 20) -``` - -The most important predictors in whether a hotel stay had children or not were the daily cost for the room, the type of room reserved, the time between the creation of the reservation and the arrival date, and the type of room that was ultimately assigned. - -Let's generate our last ROC curve to visualize. Since the event we are predicting is the first level in the `children` factor ("children"), we provide `roc_curve()` with the [relevant class probability](https://yardstick.tidymodels.org/reference/roc_curve.html#relevant-level) `.pred_children`: - -```{r test-set-roc-curve} -last_rf_fit %>% - collect_predictions() %>% - roc_curve(children, .pred_children) %>% - autoplot() -``` - -Based on these results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data. - -## Where to next? {#next} - -If you've made it to the end of this series of [*Get Started*](/start/) articles, we hope you feel ready to learn more! You now know the core tidymodels packages and how they fit together. After you are comfortable with the basics we introduced in this series, you can [learn how to go farther](/learn/) with tidymodels in your modeling and machine learning projects. - -Here are some more ideas for where to go next: - -+ Study up on statistics and modeling with our comprehensive [books](/books/). - -+ Dig deeper into the [package documentation sites](/packages/) to find functions that meet your modeling needs. Use the [searchable tables](/find/) to explore what is possible. - -+ Keep up with the latest about tidymodels packages at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/). - -+ Find ways to ask for [help](/help/) and [contribute to tidymodels](/contribute) to help others. - -###
    Happy modeling!
    - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/start/case-study/index.markdown b/content/start/case-study/index.markdown deleted file mode 100644 index 2d01d026..00000000 --- a/content/start/case-study/index.markdown +++ /dev/null @@ -1,684 +0,0 @@ ---- -title: "A predictive modeling case study" -weight: 5 -tags: [parsnip, recipes, rsample, workflows, tune] -categories: [model fitting, tuning] -description: | - Develop, from beginning to end, a predictive model using best practices. ---- - - - - - - -## Introduction {#intro} - -Each of the four previous [_Get Started_](/start/) articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you'll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays. - - - - -To use code in this article, you will need to install the following packages: glmnet, ranger, readr, tidymodels, and vip. - - -```r -library(tidymodels) - -# Helper packages -library(readr) # for importing data -library(vip) # for variable importance plots -``` - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - -## The Hotel Bookings Data {#data} - -Let’s use hotel bookings data from [Antonio, Almeida, and Nunes (2019)](https://doi.org/10.1016/j.dib.2018.11.126) to predict which hotel stays included children and/or babies, based on the other characteristics of the stays such as which hotel the guests stay at, how much they pay, etc. This was also a [`#TidyTuesday`](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11) dataset with a [data dictionary](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11#data-dictionary) you may want to look over to learn more about the variables. We'll use a slightly [edited version of the dataset](https://gist.github.com/topepo/05a74916c343e57a71c51d6bc32a21ce) for this case study. - -To start, let's read our hotel data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (""): - - -```r -library(tidymodels) -library(readr) - -hotels <- - read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>% - mutate(across(where(is.character), as.factor)) - -dim(hotels) -#> [1] 50000 23 -``` - -In the original paper, the [authors](https://doi.org/10.1016/j.dib.2018.11.126) caution that the distribution of many variables (such as number of adults/children, room type, meals bought, country of origin of the guests, and so forth) is different for hotel stays that were canceled versus not canceled. This makes sense because much of that information is gathered (or gathered again more accurately) when guests check in for their stay, so canceled bookings are likely to have more missing data than non-canceled bookings, and/or to have different characteristics when data is not missing. Given this, it is unlikely that we can reliably detect meaningful differences between guests who cancel their bookings and those who do not with this dataset. To build our models here, we have already filtered the data to include only the bookings that did not cancel, so we'll be analyzing _hotel stays_ only. - - -```r -glimpse(hotels) -#> Rows: 50,000 -#> Columns: 23 -#> $ hotel City_Hotel, City_Hotel, Resort_Hotel, R… -#> $ lead_time 217, 2, 95, 143, 136, 67, 47, 56, 80, 6… -#> $ stays_in_weekend_nights 1, 0, 2, 2, 1, 2, 0, 0, 0, 2, 1, 0, 1, … -#> $ stays_in_week_nights 3, 1, 5, 6, 4, 2, 2, 3, 4, 2, 2, 1, 2, … -#> $ adults 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 1, 2, … -#> $ children none, none, none, none, none, none, chi… -#> $ meal BB, BB, BB, HB, HB, SC, BB, BB, BB, BB,… -#> $ country DEU, PRT, GBR, ROU, PRT, GBR, ESP, ESP,… -#> $ market_segment Offline_TA/TO, Direct, Online_TA, Onlin… -#> $ distribution_channel TA/TO, Direct, TA/TO, TA/TO, Direct, TA… -#> $ is_repeated_guest 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … -#> $ previous_cancellations 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … -#> $ previous_bookings_not_canceled 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … -#> $ reserved_room_type A, D, A, A, F, A, C, B, D, A, A, D, A, … -#> $ assigned_room_type A, K, A, A, F, A, C, A, D, A, D, D, A, … -#> $ booking_changes 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … -#> $ deposit_type No_Deposit, No_Deposit, No_Deposit, No_… -#> $ days_in_waiting_list 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, … -#> $ customer_type Transient-Party, Transient, Transient, … -#> $ average_daily_rate 80.75, 170.00, 8.00, 81.00, 157.60, 49.… -#> $ required_car_parking_spaces none, none, none, none, none, none, non… -#> $ total_of_special_requests 1, 3, 2, 1, 4, 1, 1, 1, 1, 1, 0, 1, 0, … -#> $ arrival_date 2016-09-01, 2017-08-25, 2016-11-19, 20… -``` - -We will build a model to predict which actual hotel stays included children and/or babies, and which did not. Our outcome variable `children` is a factor variable with two levels: - - -```r -hotels %>% - count(children) %>% - mutate(prop = n/sum(n)) -#> # A tibble: 2 × 3 -#> children n prop -#> -#> 1 children 4038 0.0808 -#> 2 none 45962 0.919 -``` - -We can see that children were only in 8.1% of the reservations. This type of class imbalance can often wreak havoc on an analysis. While there are several methods for combating this issue using [recipes](/find/recipes/) (search for steps to `upsample` or `downsample`) or other more specialized packages like [themis](https://themis.tidymodels.org/), the analyses shown below analyze the data as-is. - -## Data Splitting & Resampling {#data-split} - -For a data splitting strategy, let's reserve 25% of the stays to the test set. As in our [*Evaluate your model with resampling*](/start/resampling/#data-split) article, we know our outcome variable `children` is pretty imbalanced so we'll use a stratified random sample: - - -```r -set.seed(123) -splits <- initial_split(hotels, strata = children) - -hotel_other <- training(splits) -hotel_test <- testing(splits) - -# training set proportions by children -hotel_other %>% - count(children) %>% - mutate(prop = n/sum(n)) -#> # A tibble: 2 × 3 -#> children n prop -#> -#> 1 children 3027 0.0807 -#> 2 none 34473 0.919 - -# test set proportions by children -hotel_test %>% - count(children) %>% - mutate(prop = n/sum(n)) -#> # A tibble: 2 × 3 -#> children n prop -#> -#> 1 children 1011 0.0809 -#> 2 none 11489 0.919 -``` - -In our articles so far, we've relied on 10-fold cross-validation as the primary resampling method using [`rsample::vfold_cv()`](https://rsample.tidymodels.org/reference/vfold_cv.html). This has created 10 different resamples of the training set (which we further split into _analysis_ and _assessment_ sets), producing 10 different performance metrics that we then aggregated. - -For this case study, rather than using multiple iterations of resampling, let's create a single resample called a _validation set_. In tidymodels, a validation set is treated as a single iteration of resampling. This will be a split from the 37,500 stays that were not used for testing, which we called `hotel_other`. This split creates two new datasets: - -+ the set held out for the purpose of measuring performance, called the _validation set_, and - -+ the remaining data used to fit the model, called the _training set_. - - - -We'll use the `validation_split()` function to allocate 20% of the `hotel_other` stays to the _validation set_ and 30,000 stays to the _training set_. This means that our model performance metrics will be computed on a single set of 7,500 hotel stays. This is fairly large, so the amount of data should provide enough precision to be a reliable indicator for how well each model predicts the outcome with a single iteration of resampling. - - -```r -set.seed(234) -val_set <- validation_split(hotel_other, - strata = children, - prop = 0.80) -val_set -#> # Validation Set Split (0.8/0.2) using stratification -#> # A tibble: 1 × 2 -#> splits id -#> -#> 1 validation -``` - -This function, like `initial_split()`, has the same `strata` argument, which uses stratified sampling to create the resample. This means that we'll have roughly the same proportions of hotel stays with and without children in our new validation and training sets, as compared to the original `hotel_other` proportions. - -## A first model: penalized logistic regression {#first-model} - -Since our outcome variable `children` is categorical, logistic regression would be a good first model to start. Let's use a model that can perform feature selection during training. The [glmnet](https://cran.r-project.org/web/packages/glmnet/index.html) R package fits a generalized linear model via penalized maximum likelihood. This method of estimating the logistic regression slope parameters uses a _penalty_ on the process so that less relevant predictors are driven towards a value of zero. One of the glmnet penalization methods, called the [lasso method](https://en.wikipedia.org/wiki/Lasso_(statistics)), can actually set the predictor slopes to zero if a large enough penalty is used. - -### Build the model - -To specify a penalized logistic regression model that uses a feature selection penalty, let's use the parsnip package with the [glmnet engine](/find/parsnip/): - - -```r -lr_mod <- - logistic_reg(penalty = tune(), mixture = 1) %>% - set_engine("glmnet") -``` - -We'll set the `penalty` argument to `tune()` as a placeholder for now. This is a model hyperparameter that we will [tune](/start/tuning/) to find the best value for making predictions with our data. Setting `mixture` to a value of one means that the glmnet model will potentially remove irrelevant predictors and choose a simpler model. - -### Create the recipe - -Let's create a [recipe](/start/recipes/) to define the preprocessing steps we need to prepare our hotel stays data for this model. It might make sense to create a set of date-based predictors that reflect important components related to the arrival date. We have already introduced a [number of useful recipe steps](/start/recipes/#features) for creating features from dates: - -+ `step_date()` creates predictors for the year, month, and day of the week. - -+ `step_holiday()` generates a set of indicator variables for specific holidays. Although we don't know where these two hotels are located, we do know that the countries for origin for most stays are based in Europe. - -+ `step_rm()` removes variables; here we'll use it to remove the original date variable since we no longer want it in the model. - -Additionally, all categorical predictors (e.g., `distribution_channel`, `hotel`, ...) should be converted to dummy variables, and all numeric predictors need to be centered and scaled. - -+ `step_dummy()` converts characters or factors (i.e., nominal variables) into one or more numeric binary model terms for the levels of the original data. - -+ `step_zv()` removes indicator variables that only contain a single unique value (e.g. all zeros). This is important because, for penalized models, the predictors should be centered and scaled. - -+ `step_normalize()` centers and scales numeric variables. - -Putting all these steps together into a recipe for a penalized logistic regression model, we have: - - -```r -holidays <- c("AllSouls", "AshWednesday", "ChristmasEve", "Easter", - "ChristmasDay", "GoodFriday", "NewYearsDay", "PalmSunday") - -lr_recipe <- - recipe(children ~ ., data = hotel_other) %>% - step_date(arrival_date) %>% - step_holiday(arrival_date, holidays = holidays) %>% - step_rm(arrival_date) %>% - step_dummy(all_nominal_predictors()) %>% - step_zv(all_predictors()) %>% - step_normalize(all_predictors()) -``` - - -### Create the workflow - -As we introduced in [*Preprocess your data with recipes*](/start/recipes/#fit-workflow), let's bundle the model and recipe into a single `workflow()` object to make management of the R objects easier: - - -```r -lr_workflow <- - workflow() %>% - add_model(lr_mod) %>% - add_recipe(lr_recipe) -``` - -### Create the grid for tuning - -Before we fit this model, we need to set up a grid of `penalty` values to tune. In our [*Tune model parameters*](/start/tuning/) article, we used [`dials::grid_regular()`](start/tuning/#tune-grid) to create an expanded grid based on a combination of two hyperparameters. Since we have only one hyperparameter to tune here, we can set the grid up manually using a one-column tibble with 30 candidate values: - - -```r -lr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30)) - -lr_reg_grid %>% top_n(-5) # lowest penalty values -#> Selecting by penalty -#> # A tibble: 5 × 1 -#> penalty -#> -#> 1 0.0001 -#> 2 0.000127 -#> 3 0.000161 -#> 4 0.000204 -#> 5 0.000259 -lr_reg_grid %>% top_n(5) # highest penalty values -#> Selecting by penalty -#> # A tibble: 5 × 1 -#> penalty -#> -#> 1 0.0386 -#> 2 0.0489 -#> 3 0.0621 -#> 4 0.0788 -#> 5 0.1 -``` - -### Train and tune the model - -Let's use `tune::tune_grid()` to train these 30 penalized logistic regression models. We'll also save the validation set predictions (via the call to `control_grid()`) so that diagnostic information can be available after the model fit. The area under the ROC curve will be used to quantify how well the model performs across a continuum of event thresholds (recall that the event rate—the proportion of stays including children— is very low for these data). - - -```r -lr_res <- - lr_workflow %>% - tune_grid(val_set, - grid = lr_reg_grid, - control = control_grid(save_pred = TRUE), - metrics = metric_set(roc_auc)) -``` - -It might be easier to visualize the validation set metrics by plotting the area under the ROC curve against the range of penalty values: - - -```r -lr_plot <- - lr_res %>% - collect_metrics() %>% - ggplot(aes(x = penalty, y = mean)) + - geom_point() + - geom_line() + - ylab("Area under the ROC Curve") + - scale_x_log10(labels = scales::label_number()) - -lr_plot -``` - - - -This plots shows us that model performance is generally better at the smaller penalty values. This suggests that the majority of the predictors are important to the model. We also see a steep drop in the area under the ROC curve towards the highest penalty values. This happens because a large enough penalty will remove _all_ predictors from the model, and not surprisingly predictive accuracy plummets with no predictors in the model (recall that an ROC AUC value of 0.50 means that the model does no better than chance at predicting the correct class). - -Our model performance seems to plateau at the smaller penalty values, so going by the `roc_auc` metric alone could lead us to multiple options for the "best" value for this hyperparameter: - - -```r -top_models <- - lr_res %>% - show_best("roc_auc", n = 15) %>% - arrange(penalty) -top_models -#> # A tibble: 15 × 7 -#> penalty .metric .estimator mean n std_err .config -#> -#> 1 0.000127 roc_auc binary 0.872 1 NA Preprocessor1_Model02 -#> 2 0.000161 roc_auc binary 0.872 1 NA Preprocessor1_Model03 -#> 3 0.000204 roc_auc binary 0.873 1 NA Preprocessor1_Model04 -#> 4 0.000259 roc_auc binary 0.873 1 NA Preprocessor1_Model05 -#> 5 0.000329 roc_auc binary 0.874 1 NA Preprocessor1_Model06 -#> 6 0.000418 roc_auc binary 0.874 1 NA Preprocessor1_Model07 -#> 7 0.000530 roc_auc binary 0.875 1 NA Preprocessor1_Model08 -#> 8 0.000672 roc_auc binary 0.875 1 NA Preprocessor1_Model09 -#> 9 0.000853 roc_auc binary 0.876 1 NA Preprocessor1_Model10 -#> 10 0.00108 roc_auc binary 0.876 1 NA Preprocessor1_Model11 -#> 11 0.00137 roc_auc binary 0.876 1 NA Preprocessor1_Model12 -#> 12 0.00174 roc_auc binary 0.876 1 NA Preprocessor1_Model13 -#> 13 0.00221 roc_auc binary 0.876 1 NA Preprocessor1_Model14 -#> 14 0.00281 roc_auc binary 0.875 1 NA Preprocessor1_Model15 -#> 15 0.00356 roc_auc binary 0.873 1 NA Preprocessor1_Model16 -``` - - - -Every candidate model in this tibble likely includes more predictor variables than the model in the row below it. If we used `select_best()`, it would return candidate model 11 with a penalty value of 0.00137, shown with the dotted line below. - - - -However, we may want to choose a penalty value further along the x-axis, closer to where we start to see the decline in model performance. For example, candidate model 12 with a penalty value of 0.00174 has effectively the same performance as the numerically best model, but might eliminate more predictors. This penalty value is marked by the solid line above. In general, fewer irrelevant predictors is better. If performance is about the same, we'd prefer to choose a higher penalty value. - -Let's select this value and visualize the validation set ROC curve: - -```r -lr_best <- - lr_res %>% - collect_metrics() %>% - arrange(penalty) %>% - slice(12) -lr_best -#> # A tibble: 1 × 7 -#> penalty .metric .estimator mean n std_err .config -#> -#> 1 0.00137 roc_auc binary 0.876 1 NA Preprocessor1_Model12 -``` - - - -```r -lr_auc <- - lr_res %>% - collect_predictions(parameters = lr_best) %>% - roc_curve(children, .pred_children) %>% - mutate(model = "Logistic Regression") - -autoplot(lr_auc) -``` - - - -The level of performance generated by this logistic regression model is good, but not groundbreaking. Perhaps the linear nature of the prediction equation is too limiting for this data set. As a next step, we might consider a highly non-linear model generated using a tree-based ensemble method. - -## A second model: tree-based ensemble {#second-model} - -An effective and low-maintenance modeling technique is a _random forest_. This model was also used in our [*Evaluate your model with resampling*](/start/resampling/) article. Compared to logistic regression, a random forest model is more flexible. A random forest is an _ensemble model_ typically made up of thousands of decision trees, where each individual tree sees a slightly different version of the training data and learns a sequence of splitting rules to predict new data. Each tree is non-linear, and aggregating across trees makes random forests also non-linear but more robust and stable compared to individual trees. Tree-based models like random forests require very little preprocessing and can effectively handle many types of predictors (sparse, skewed, continuous, categorical, etc.). - -### Build the model and improve training time - -Although the default hyperparameters for random forests tend to give reasonable results, we'll plan to tune two hyperparameters that we think could improve performance. Unfortunately, random forest models can be computationally expensive to train and to tune. The computations required for model tuning can usually be easily parallelized to improve training time. The tune package can do [parallel processing](https://tune.tidymodels.org/articles/extras/optimizations.html#parallel-processing) for you, and allows users to use multiple cores or separate machines to fit models. - -But, here we are using a single validation set, so parallelization isn't an option using the tune package. For this specific case study, a good alternative is provided by the engine itself. The ranger package offers a built-in way to compute individual random forest models in parallel. To do this, we need to know the the number of cores we have to work with. We can use the parallel package to query the number of cores on your own computer to understand how much parallelization you can do: - - -```r -cores <- parallel::detectCores() -cores -#> [1] 12 -``` - -We have 12 cores to work with. We can pass this information to the ranger engine when we set up our parsnip `rand_forest()` model. To enable parallel processing, we can pass engine-specific arguments like `num.threads` to ranger when we set the engine: - - -```r -rf_mod <- - rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% - set_engine("ranger", num.threads = cores) %>% - set_mode("classification") -``` - -This works well in this modeling context, but it bears repeating: if you use any other resampling method, let tune do the parallel processing for you — we typically do not recommend relying on the modeling engine (like we did here) to do this. - -In this model, we used `tune()` as a placeholder for the `mtry` and `min_n` argument values, because these are our two hyperparameters that we will [tune](/start/tuning/). - -### Create the recipe and workflow - -Unlike penalized logistic regression models, random forest models do not require [dummy](https://bookdown.org/max/FES/categorical-trees.html) or normalized predictor variables. Nevertheless, we want to do some feature engineering again with our `arrival_date` variable. As before, the date predictor is engineered so that the random forest model does not need to work hard to tease these potential patterns from the data. - - -```r -rf_recipe <- - recipe(children ~ ., data = hotel_other) %>% - step_date(arrival_date) %>% - step_holiday(arrival_date) %>% - step_rm(arrival_date) -``` - -Adding this recipe to our parsnip model gives us a new workflow for predicting whether a hotel stay included children and/or babies as guests with a random forest: - - -```r -rf_workflow <- - workflow() %>% - add_model(rf_mod) %>% - add_recipe(rf_recipe) -``` - -### Train and tune the model - -When we set up our parsnip model, we chose two hyperparameters for tuning: - - -```r -rf_mod -#> Random Forest Model Specification (classification) -#> -#> Main Arguments: -#> mtry = tune() -#> trees = 1000 -#> min_n = tune() -#> -#> Engine-Specific Arguments: -#> num.threads = cores -#> -#> Computational engine: ranger - -# show what will be tuned -extract_parameter_set_dials(rf_mod) -#> Collection of 2 parameters for tuning -#> -#> identifier type object -#> mtry mtry nparam[?] -#> min_n min_n nparam[+] -#> -#> Model parameters needing finalization: -#> # Randomly Selected Predictors ('mtry') -#> -#> See `?dials::finalize` or `?dials::update.parameters` for more information. -``` - -The `mtry` hyperparameter sets the number of predictor variables that each node in the decision tree "sees" and can learn about, so it can range from 1 to the total number of features present; when `mtry` = all possible features, the model is the same as bagging decision trees. The `min_n` hyperparameter sets the minimum `n` to split at any node. - -We will use a space-filling design to tune, with 25 candidate models: - - -```r -set.seed(345) -rf_res <- - rf_workflow %>% - tune_grid(val_set, - grid = 25, - control = control_grid(save_pred = TRUE), - metrics = metric_set(roc_auc)) -#> i Creating pre-processing data to finalize unknown parameter: mtry -``` - -The message printed above *"Creating pre-processing data to finalize unknown parameter: mtry"* is related to the size of the data set. Since `mtry` depends on the number of predictors in the data set, `tune_grid()` determines the upper bound for `mtry` once it receives the data. - -Here are our top 5 random forest models, out of the 25 candidates: - - -```r -rf_res %>% - show_best(metric = "roc_auc") -#> # A tibble: 5 × 8 -#> mtry min_n .metric .estimator mean n std_err .config -#> -#> 1 8 7 roc_auc binary 0.926 1 NA Preprocessor1_Model13 -#> 2 12 7 roc_auc binary 0.926 1 NA Preprocessor1_Model01 -#> 3 13 4 roc_auc binary 0.925 1 NA Preprocessor1_Model05 -#> 4 9 12 roc_auc binary 0.924 1 NA Preprocessor1_Model19 -#> 5 6 18 roc_auc binary 0.924 1 NA Preprocessor1_Model24 -``` - -Right away, we see that these values for area under the ROC look more promising than our top model using penalized logistic regression, which yielded an ROC AUC of 0.876. - -Plotting the results of the tuning process highlights that both `mtry` (number of predictors at each node) and `min_n` (minimum number of data points required to keep splitting) should be fairly small to optimize performance. However, the range of the y-axis indicates that the model is very robust to the choice of these parameter values — all but one of the ROC AUC values are greater than 0.90. - - -```r -autoplot(rf_res) -``` - - - -Let's select the best model according to the ROC AUC metric. Our final tuning parameter values are: - - -```r -rf_best <- - rf_res %>% - select_best(metric = "roc_auc") -rf_best -#> # A tibble: 1 × 3 -#> mtry min_n .config -#> -#> 1 8 7 Preprocessor1_Model13 -``` - -To calculate the data needed to plot the ROC curve, we use `collect_predictions()`. This is only possible after tuning with `control_grid(save_pred = TRUE)`. In the output, you can see the two columns that hold our class probabilities for predicting hotel stays including and not including children. - - -```r -rf_res %>% - collect_predictions() -#> # A tibble: 187,500 × 8 -#> id .pred_children .pred_none .row mtry min_n children .config -#> -#> 1 validation 0.152 0.848 13 12 7 none Preprocessor1… -#> 2 validation 0.0302 0.970 20 12 7 none Preprocessor1… -#> 3 validation 0.513 0.487 22 12 7 children Preprocessor1… -#> 4 validation 0.0103 0.990 23 12 7 none Preprocessor1… -#> 5 validation 0.0111 0.989 31 12 7 none Preprocessor1… -#> # … with 187,495 more rows -``` - -To filter the predictions for only our best random forest model, we can use the `parameters` argument and pass it our tibble with the best hyperparameter values from tuning, which we called `rf_best`: - - -```r -rf_auc <- - rf_res %>% - collect_predictions(parameters = rf_best) %>% - roc_curve(children, .pred_children) %>% - mutate(model = "Random Forest") -``` - -Now, we can compare the validation set ROC curves for our top penalized logistic regression model and random forest model: - - -```r -bind_rows(rf_auc, lr_auc) %>% - ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) + - geom_path(linewidth = 1.5, alpha = 0.8) + - geom_abline(lty = 3) + - coord_equal() + - scale_color_viridis_d(option = "plasma", end = .6) -``` - - - -The random forest is uniformly better across event probability thresholds. - -## The last fit {#last-fit} - -Our goal was to predict which hotel stays included children and/or babies. The random forest model clearly performed better than the penalized logistic regression model, and would be our best bet for predicting hotel stays with and without children. After selecting our best model and hyperparameter values, our last step is to fit the final model on all the rows of data not originally held out for testing (both the training and the validation sets combined), and then evaluate the model performance one last time with the held-out test set. - -We'll start by building our parsnip model object again from scratch. We take our best hyperparameter values from our random forest model. When we set the engine, we add a new argument: `importance = "impurity"`. This will provide _variable importance_ scores for this last model, which gives some insight into which predictors drive model performance. - - -```r -# the last model -last_rf_mod <- - rand_forest(mtry = 8, min_n = 7, trees = 1000) %>% - set_engine("ranger", num.threads = cores, importance = "impurity") %>% - set_mode("classification") - -# the last workflow -last_rf_workflow <- - rf_workflow %>% - update_model(last_rf_mod) - -# the last fit -set.seed(345) -last_rf_fit <- - last_rf_workflow %>% - last_fit(splits) - -last_rf_fit -#> # Resampling results -#> # Manual resampling -#> # A tibble: 1 × 6 -#> splits id .metrics .notes .predict…¹ .workflow -#> -#> 1 train/test split -#> # … with abbreviated variable name ¹​.predictions -``` - -This fitted workflow contains _everything_, including our final metrics based on the test set. So, how did this model do on the test set? Was the validation set a good estimate of future performance? - - -```r -last_rf_fit %>% - collect_metrics() -#> # A tibble: 2 × 4 -#> .metric .estimator .estimate .config -#> -#> 1 accuracy binary 0.946 Preprocessor1_Model1 -#> 2 roc_auc binary 0.923 Preprocessor1_Model1 -``` - -This ROC AUC value is pretty close to what we saw when we tuned the random forest model with the validation set, which is good news. That means that our estimate of how well our model would perform with new data was not too far off from how well our model actually performed with the unseen test data. - -We can access those variable importance scores via the `.workflow` column. We can [extract out the fit](https://tune.tidymodels.org/reference/extract-tune.html) from the workflow object, and then use the vip package to visualize the variable importance scores for the top 20 features: - - -```r -last_rf_fit %>% - extract_fit_parsnip() %>% - vip(num_features = 20) -``` - - - -The most important predictors in whether a hotel stay had children or not were the daily cost for the room, the type of room reserved, the time between the creation of the reservation and the arrival date, and the type of room that was ultimately assigned. - -Let's generate our last ROC curve to visualize. Since the event we are predicting is the first level in the `children` factor ("children"), we provide `roc_curve()` with the [relevant class probability](https://yardstick.tidymodels.org/reference/roc_curve.html#relevant-level) `.pred_children`: - - -```r -last_rf_fit %>% - collect_predictions() %>% - roc_curve(children, .pred_children) %>% - autoplot() -``` - - - -Based on these results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data. - -## Where to next? {#next} - -If you've made it to the end of this series of [*Get Started*](/start/) articles, we hope you feel ready to learn more! You now know the core tidymodels packages and how they fit together. After you are comfortable with the basics we introduced in this series, you can [learn how to go farther](/learn/) with tidymodels in your modeling and machine learning projects. - -Here are some more ideas for where to go next: - -+ Study up on statistics and modeling with our comprehensive [books](/books/). - -+ Dig deeper into the [package documentation sites](/packages/) to find functions that meet your modeling needs. Use the [searchable tables](/find/) to explore what is possible. - -+ Keep up with the latest about tidymodels packages at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/). - -+ Find ways to ask for [help](/help/) and [contribute to tidymodels](/contribute) to help others. - -###
    Happy modeling!
    - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> ranger 0.14.1 2022-06-18 [1] CRAN (R 4.2.0) -#> readr * 2.1.3 2022-10-01 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> vip * 0.3.2 2020-12-17 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/start/case-study/index_files/header-attrs-2.1/header-attrs.js b/content/start/case-study/index_files/header-attrs-2.1/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/content/start/case-study/index_files/header-attrs-2.1/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/content/start/common.R b/content/start/common.R deleted file mode 100644 index 23172bcc..00000000 --- a/content/start/common.R +++ /dev/null @@ -1,39 +0,0 @@ -knitr::opts_chunk$set( - digits = 3, - comment = "#>", - dev = 'svglite', - dev.args = list(bg = "transparent"), - fig.path = "figs/", - collapse = TRUE, - cache.path = "cache/" -) -options( - width = 80, - cli.width = 70, - pillar.min_title_chars = Inf -) - -article_req_pkgs <- function(x, what = "To use code in this article, ") { - x <- sort(x) - x <- knitr::combine_words(x, and = " and ") - paste0( - what, - " you will need to install the following packages: ", - x, "." - ) -} -small_session <- function(pkgs = NULL) { - pkgs <- c(pkgs, "recipes", "parsnip", "tune", "workflows", "dials", "dplyr", - "broom", "ggplot2", "purrr", "rlang", "rsample", "tibble", "infer", - "yardstick", "tidymodels", "infer") - pkgs <- unique(pkgs) - library(sessioninfo) - library(dplyr) - sinfo <- sessioninfo::session_info() - cls <- class(sinfo$packages) - sinfo$packages <- - sinfo$packages %>% - dplyr::filter(package %in% pkgs) - class(sinfo$packages) <- cls - sinfo -} diff --git a/content/start/models/figs/stan-pred-1.svg b/content/start/models/figs/stan-pred-1.svg deleted file mode 100644 index 83746ced..00000000 --- a/content/start/models/figs/stan-pred-1.svg +++ /dev/null @@ -1,85 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.05 -0.06 -0.07 -0.08 -0.09 -0.10 - - - - - - - - - -Initial -Low -High -food_regime -urchin size -Bayesian model with t(1) prior distribution - - diff --git a/content/start/models/index.Rmarkdown b/content/start/models/index.Rmarkdown deleted file mode 100755 index cf9d22cb..00000000 --- a/content/start/models/index.Rmarkdown +++ /dev/null @@ -1,284 +0,0 @@ ---- -title: "Build a model" -weight: 1 -tags: [parsnip, broom] -categories: [model fitting] -description: | - Get started by learning how to specify and train a model using tidymodels. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/start/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(readr) -library(rstanarm) -library(tidymodels) -library(broom.mixed) -library(dotwhisker) - -pkgs <- c("tidymodels", "readr", "rstanarm", "broom.mixed", "dotwhisker") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - - - -## Introduction {#intro} - -How do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/), and understand why these functions are designed this way. - -`r article_req_pkgs(pkgs)` - -```{r eval=FALSE} -library(tidymodels) # for the parsnip package, along with the rest of tidymodels - -# Helper packages -library(readr) # for importing data -library(broom.mixed) # for converting bayesian models to tidy tibbles -library(dotwhisker) # for visualizing regression results -``` - - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - - -## The Sea Urchins Data {#data} - -Let's use the data from [Constable (1993)](https://link.springer.com/article/10.1007/BF00349318) to explore how three different feeding regimes affect the size of sea urchins over time. The initial size of the sea urchins at the beginning of the experiment probably affects how big they grow as they are fed. - -To start, let's read our urchins data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (""): - -```{r data} -urchins <- - # Data were assembled for a tutorial - # at https://www.flutterbys.com.au/stats/tut/tut7.5a.html - read_csv("https://tidymodels.org/start/models/urchins.csv") %>% - # Change the names to be a little more verbose - setNames(c("food_regime", "initial_volume", "width")) %>% - # Factors are very helpful for modeling, so we convert one column - mutate(food_regime = factor(food_regime, levels = c("Initial", "Low", "High"))) -``` - -Let's take a quick look at the data: - -```{r} -urchins -``` - -The urchins data is a [tibble](https://tibble.tidyverse.org/index.html). If you are new to tibbles, the best place to start is the [tibbles chapter](https://r4ds.had.co.nz/tibbles.html) in *R for Data Science*. For each of the `r nrow(urchins)` urchins, we know their: - -+ experimental feeding regime group (`food_regime`: either `Initial`, `Low`, or `High`), -+ size in milliliters at the start of the experiment (`initial_volume`), and -+ suture width at the end of the experiment (`width`). - -As a first step in modeling, it's always a good idea to plot the data: - -```{r urchin-plot} -ggplot(urchins, - aes(x = initial_volume, - y = width, - group = food_regime, - col = food_regime)) + - geom_point() + - geom_smooth(method = lm, se = FALSE) + - scale_color_viridis_d(option = "plasma", end = .7) -``` - -We can see that urchins that were larger in volume at the start of the experiment tended to have wider sutures at the end, but the slopes of the lines look different so this effect may depend on the feeding regime condition. - -## Build and fit a model {#build-model} - -A standard two-way analysis of variance ([ANOVA](https://www.itl.nist.gov/div898/handbook/prc/section4/prc43.htm)) model makes sense for this dataset because we have both a continuous predictor and a categorical predictor. Since the slopes appear to be different for at least two of the feeding regimes, let's build a model that allows for two-way interactions. Specifying an R formula with our variables in this way: - -```{r two-way-int, eval = FALSE} -width ~ initial_volume * food_regime -``` - -allows our regression model depending on initial volume to have separate slopes and intercepts for each food regime. - -For this kind of model, ordinary least squares is a good initial approach. With tidymodels, we start by specifying the _functional form_ of the model that we want using the [parsnip package](https://parsnip.tidymodels.org/). Since there is a numeric outcome and the model should be linear with slopes and intercepts, the model type is ["linear regression"](https://parsnip.tidymodels.org/reference/linear_reg.html). We can declare this with: - - -```{r lm-tm} -linear_reg() -``` - -That is pretty underwhelming since, on its own, it doesn't really do much. However, now that the type of model has been specified, we can think about a method for _fitting_ or training the model, the model **engine**. The engine value is often a mash-up of the software that can be used to fit or train the model as well as the estimation method. The default for `linear_reg()` is `"lm"` for ordinary least squares, as you can see above. We could set a non-default option instead: - -```{r lm-spec} -linear_reg() %>% - set_engine("keras") -``` - -The [documentation page for `linear_reg()`](https://parsnip.tidymodels.org/reference/linear_reg.html) lists all the possible engines. We'll save our model object using the default engine as `lm_mod`. - -```{r} -lm_mod <- linear_reg() -``` - -From here, the model can be estimated or trained using the [`fit()`](https://parsnip.tidymodels.org/reference/fit.html) function: - -```{r lm-fit} -lm_fit <- - lm_mod %>% - fit(width ~ initial_volume * food_regime, data = urchins) -lm_fit -``` - -Perhaps our analysis requires a description of the model parameter estimates and their statistical properties. Although the `summary()` function for `lm` objects can provide that, it gives the results back in an unwieldy format. Many models have a `tidy()` method that provides the summary results in a more predictable and useful format (e.g. a data frame with standard column names): - -```{r lm-table} -tidy(lm_fit) -``` - -This kind of output can be used to generate a dot-and-whisker plot of our regression results using the dotwhisker package: - -```{r dwplot} -tidy(lm_fit) %>% - dwplot(dot_args = list(size = 2, color = "black"), - whisker_args = list(color = "black"), - vline = geom_vline(xintercept = 0, colour = "grey50", linetype = 2)) -``` - - -## Use a model to predict {#predict-model} - -This fitted object `lm_fit` has the `lm` model output built-in, which you can access with `lm_fit$fit`, but there are some benefits to using the fitted parsnip model object when it comes to predicting. - -Suppose that, for a publication, it would be particularly interesting to make a plot of the mean body size for urchins that started the experiment with an initial volume of 20ml. To create such a graph, we start with some new example data that we will make predictions for, to show in our graph: - -```{r new-points} -new_points <- expand.grid(initial_volume = 20, - food_regime = c("Initial", "Low", "High")) -new_points -``` - -To get our predicted results, we can use the `predict()` function to find the mean values at 20ml. - -It is also important to communicate the variability, so we also need to find the predicted confidence intervals. If we had used `lm()` to fit the model directly, a few minutes of reading the [documentation page](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/predict.lm.html) for `predict.lm()` would explain how to do this. However, if we decide to use a different model to estimate urchin size (_spoiler:_ we will!), it is likely that a completely different syntax would be required. - -Instead, with tidymodels, the types of predicted values are standardized so that we can use the same syntax to get these values. - -First, let's generate the mean body width values: - -```{r lm-pred-mean} -mean_pred <- predict(lm_fit, new_data = new_points) -mean_pred -``` - -When making predictions, the tidymodels convention is to always produce a tibble of results with standardized column names. This makes it easy to combine the original data and the predictions in a usable format: - -```{r lm-all-pred} -conf_int_pred <- predict(lm_fit, - new_data = new_points, - type = "conf_int") -conf_int_pred - -# Now combine: -plot_data <- - new_points %>% - bind_cols(mean_pred) %>% - bind_cols(conf_int_pred) - -# and plot: -ggplot(plot_data, aes(x = food_regime)) + - geom_point(aes(y = .pred)) + - geom_errorbar(aes(ymin = .pred_lower, - ymax = .pred_upper), - width = .2) + - labs(y = "urchin size") -``` - -## Model with a different engine {#new-engine} - -Every one on your team is happy with that plot _except_ that one person who just read their first book on [Bayesian analysis](https://bayesian.org/what-is-bayesian-analysis/). They are interested in knowing if the results would be different if the model were estimated using a Bayesian approach. In such an analysis, a [_prior distribution_](https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7) needs to be declared for each model parameter that represents the possible values of the parameters (before being exposed to the observed data). After some discussion, the group agrees that the priors should be bell-shaped but, since no one has any idea what the range of values should be, to take a conservative approach and make the priors _wide_ using a Cauchy distribution (which is the same as a t-distribution with a single degree of freedom). - -The [documentation](https://mc-stan.org/rstanarm/articles/priors.html) on the rstanarm package shows us that the `stan_glm()` function can be used to estimate this model, and that the function arguments that need to be specified are called `prior` and `prior_intercept`. It turns out that `linear_reg()` has a [`stan` engine](https://parsnip.tidymodels.org/reference/linear_reg.html#details). Since these prior distribution arguments are specific to the Stan software, they are passed as arguments to [`parsnip::set_engine()`](https://parsnip.tidymodels.org/reference/set_engine.html). After that, the same exact `fit()` call is used: - -```{r go-stan, message = FALSE} -# set the prior distribution -prior_dist <- rstanarm::student_t(df = 1) - -set.seed(123) - -# make the parsnip model -bayes_mod <- - linear_reg() %>% - set_engine("stan", - prior_intercept = prior_dist, - prior = prior_dist) - -# train the model -bayes_fit <- - bayes_mod %>% - fit(width ~ initial_volume * food_regime, data = urchins) - -print(bayes_fit, digits = 5) -``` - -This kind of Bayesian analysis (like many models) involves randomly generated numbers in its fitting procedure. We can use `set.seed()` to ensure that the same (pseudo-)random numbers are generated each time we run this code. The number `123` isn't special or related to our data; it is just a "seed" used to choose random numbers. - -To update the parameter table, the `tidy()` method is once again used: - -```{r tidy-stan} -tidy(bayes_fit, conf.int = TRUE) -``` - -A goal of the tidymodels packages is that the **interfaces to common tasks are standardized** (as seen in the `tidy()` results above). The same is true for getting predictions; we can use the same code even though the underlying packages use very different syntax: - -```{r stan-pred} -bayes_plot_data <- - new_points %>% - bind_cols(predict(bayes_fit, new_data = new_points)) %>% - bind_cols(predict(bayes_fit, new_data = new_points, type = "conf_int")) - -ggplot(bayes_plot_data, aes(x = food_regime)) + - geom_point(aes(y = .pred)) + - geom_errorbar(aes(ymin = .pred_lower, ymax = .pred_upper), width = .2) + - labs(y = "urchin size") + - ggtitle("Bayesian model with t(1) prior distribution") -``` - -This isn't very different from the non-Bayesian results (except in interpretation). - -{{% note %}} The [parsnip](https://parsnip.tidymodels.org/) package can work with many model types, engines, and arguments. Check out [tidymodels.org/find/parsnip](/find/parsnip/) to see what is available. {{%/ note %}} - -## Why does it work that way? {#why} - -The extra step of defining the model using a function like `linear_reg()` might seem superfluous since a call to `lm()` is much more succinct. However, the problem with standard modeling functions is that they don't separate what you want to do from the execution. For example, the process of executing a formula has to happen repeatedly across model calls even when the formula does not change; we can't recycle those computations. - -Also, using the tidymodels framework, we can do some interesting things by incrementally creating a model (instead of using single function call). [Model tuning](/start/tuning/) with tidymodels uses the specification of the model to declare what parts of the model should be tuned. That would be very difficult to do if `linear_reg()` immediately fit the model. - -If you are familiar with the tidyverse, you may have noticed that our modeling code uses the magrittr pipe (`%>%`). With dplyr and other tidyverse packages, the pipe works well because all of the functions take the _data_ as the first argument. For example: - -```{r tidy-data} -urchins %>% - group_by(food_regime) %>% - summarize(med_vol = median(initial_volume)) -``` - -whereas the modeling code uses the pipe to pass around the _model object_: - -```{r tidy-model, eval = FALSE} -bayes_mod %>% - fit(width ~ initial_volume * food_regime, data = urchins) -``` - -This may seem jarring if you have used dplyr a lot, but it is extremely similar to how ggplot2 operates: - -```{r eval=FALSE} -ggplot(urchins, - aes(initial_volume, width)) + # returns a ggplot object - geom_jitter() + # same - geom_smooth(method = lm, se = FALSE) + # same - labs(x = "Volume", y = "Width") # etc -``` - - -## Session information {#session-info} - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/start/models/index.markdown b/content/start/models/index.markdown deleted file mode 100644 index 8c856371..00000000 --- a/content/start/models/index.markdown +++ /dev/null @@ -1,443 +0,0 @@ ---- -title: "Build a model" -weight: 1 -tags: [parsnip, broom] -categories: [model fitting] -description: | - Get started by learning how to specify and train a model using tidymodels. ---- - - - - - - - -## Introduction {#intro} - -How do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/), and understand why these functions are designed this way. - -To use code in this article, you will need to install the following packages: broom.mixed, dotwhisker, readr, rstanarm, and tidymodels. - - -```r -library(tidymodels) # for the parsnip package, along with the rest of tidymodels - -# Helper packages -library(readr) # for importing data -library(broom.mixed) # for converting bayesian models to tidy tibbles -library(dotwhisker) # for visualizing regression results -``` - - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - - -## The Sea Urchins Data {#data} - -Let's use the data from [Constable (1993)](https://link.springer.com/article/10.1007/BF00349318) to explore how three different feeding regimes affect the size of sea urchins over time. The initial size of the sea urchins at the beginning of the experiment probably affects how big they grow as they are fed. - -To start, let's read our urchins data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (""): - - -```r -urchins <- - # Data were assembled for a tutorial - # at https://www.flutterbys.com.au/stats/tut/tut7.5a.html - read_csv("https://tidymodels.org/start/models/urchins.csv") %>% - # Change the names to be a little more verbose - setNames(c("food_regime", "initial_volume", "width")) %>% - # Factors are very helpful for modeling, so we convert one column - mutate(food_regime = factor(food_regime, levels = c("Initial", "Low", "High"))) -#> Rows: 72 Columns: 3 -#> ── Column specification ────────────────────────────────────────────── -#> Delimiter: "," -#> chr (1): TREAT -#> dbl (2): IV, SUTW -#> -#> ℹ Use `spec()` to retrieve the full column specification for this data. -#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message. -``` - -Let's take a quick look at the data: - - -```r -urchins -#> # A tibble: 72 × 3 -#> food_regime initial_volume width -#> -#> 1 Initial 3.5 0.01 -#> 2 Initial 5 0.02 -#> 3 Initial 8 0.061 -#> 4 Initial 10 0.051 -#> 5 Initial 13 0.041 -#> 6 Initial 13 0.061 -#> 7 Initial 15 0.041 -#> 8 Initial 15 0.071 -#> 9 Initial 16 0.092 -#> 10 Initial 17 0.051 -#> # … with 62 more rows -``` - -The urchins data is a [tibble](https://tibble.tidyverse.org/index.html). If you are new to tibbles, the best place to start is the [tibbles chapter](https://r4ds.had.co.nz/tibbles.html) in *R for Data Science*. For each of the 72 urchins, we know their: - -+ experimental feeding regime group (`food_regime`: either `Initial`, `Low`, or `High`), -+ size in milliliters at the start of the experiment (`initial_volume`), and -+ suture width at the end of the experiment (`width`). - -As a first step in modeling, it's always a good idea to plot the data: - - -```r -ggplot(urchins, - aes(x = initial_volume, - y = width, - group = food_regime, - col = food_regime)) + - geom_point() + - geom_smooth(method = lm, se = FALSE) + - scale_color_viridis_d(option = "plasma", end = .7) -#> `geom_smooth()` using formula = 'y ~ x' -``` - - - -We can see that urchins that were larger in volume at the start of the experiment tended to have wider sutures at the end, but the slopes of the lines look different so this effect may depend on the feeding regime condition. - -## Build and fit a model {#build-model} - -A standard two-way analysis of variance ([ANOVA](https://www.itl.nist.gov/div898/handbook/prc/section4/prc43.htm)) model makes sense for this dataset because we have both a continuous predictor and a categorical predictor. Since the slopes appear to be different for at least two of the feeding regimes, let's build a model that allows for two-way interactions. Specifying an R formula with our variables in this way: - - -```r -width ~ initial_volume * food_regime -``` - -allows our regression model depending on initial volume to have separate slopes and intercepts for each food regime. - -For this kind of model, ordinary least squares is a good initial approach. With tidymodels, we start by specifying the _functional form_ of the model that we want using the [parsnip package](https://parsnip.tidymodels.org/). Since there is a numeric outcome and the model should be linear with slopes and intercepts, the model type is ["linear regression"](https://parsnip.tidymodels.org/reference/linear_reg.html). We can declare this with: - - - -```r -linear_reg() -#> Linear Regression Model Specification (regression) -#> -#> Computational engine: lm -``` - -That is pretty underwhelming since, on its own, it doesn't really do much. However, now that the type of model has been specified, we can think about a method for _fitting_ or training the model, the model **engine**. The engine value is often a mash-up of the software that can be used to fit or train the model as well as the estimation method. The default for `linear_reg()` is `"lm"` for ordinary least squares, as you can see above. We could set a non-default option instead: - - -```r -linear_reg() %>% - set_engine("keras") -#> Linear Regression Model Specification (regression) -#> -#> Computational engine: keras -``` - -The [documentation page for `linear_reg()`](https://parsnip.tidymodels.org/reference/linear_reg.html) lists all the possible engines. We'll save our model object using the default engine as `lm_mod`. - - -```r -lm_mod <- linear_reg() -``` - -From here, the model can be estimated or trained using the [`fit()`](https://parsnip.tidymodels.org/reference/fit.html) function: - - -```r -lm_fit <- - lm_mod %>% - fit(width ~ initial_volume * food_regime, data = urchins) -lm_fit -#> parsnip model object -#> -#> -#> Call: -#> stats::lm(formula = width ~ initial_volume * food_regime, data = data) -#> -#> Coefficients: -#> (Intercept) initial_volume -#> 0.0331216 0.0015546 -#> food_regimeLow food_regimeHigh -#> 0.0197824 0.0214111 -#> initial_volume:food_regimeLow initial_volume:food_regimeHigh -#> -0.0012594 0.0005254 -``` - -Perhaps our analysis requires a description of the model parameter estimates and their statistical properties. Although the `summary()` function for `lm` objects can provide that, it gives the results back in an unwieldy format. Many models have a `tidy()` method that provides the summary results in a more predictable and useful format (e.g. a data frame with standard column names): - - -```r -tidy(lm_fit) -#> # A tibble: 6 × 5 -#> term estimate std.error statistic p.value -#> -#> 1 (Intercept) 0.0331 0.00962 3.44 0.00100 -#> 2 initial_volume 0.00155 0.000398 3.91 0.000222 -#> 3 food_regimeLow 0.0198 0.0130 1.52 0.133 -#> 4 food_regimeHigh 0.0214 0.0145 1.47 0.145 -#> 5 initial_volume:food_regimeLow -0.00126 0.000510 -2.47 0.0162 -#> 6 initial_volume:food_regimeHigh 0.000525 0.000702 0.748 0.457 -``` - -This kind of output can be used to generate a dot-and-whisker plot of our regression results using the dotwhisker package: - - -```r -tidy(lm_fit) %>% - dwplot(dot_args = list(size = 2, color = "black"), - whisker_args = list(color = "black"), - vline = geom_vline(xintercept = 0, colour = "grey50", linetype = 2)) -``` - - - - -## Use a model to predict {#predict-model} - -This fitted object `lm_fit` has the `lm` model output built-in, which you can access with `lm_fit$fit`, but there are some benefits to using the fitted parsnip model object when it comes to predicting. - -Suppose that, for a publication, it would be particularly interesting to make a plot of the mean body size for urchins that started the experiment with an initial volume of 20ml. To create such a graph, we start with some new example data that we will make predictions for, to show in our graph: - - -```r -new_points <- expand.grid(initial_volume = 20, - food_regime = c("Initial", "Low", "High")) -new_points -#> initial_volume food_regime -#> 1 20 Initial -#> 2 20 Low -#> 3 20 High -``` - -To get our predicted results, we can use the `predict()` function to find the mean values at 20ml. - -It is also important to communicate the variability, so we also need to find the predicted confidence intervals. If we had used `lm()` to fit the model directly, a few minutes of reading the [documentation page](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/predict.lm.html) for `predict.lm()` would explain how to do this. However, if we decide to use a different model to estimate urchin size (_spoiler:_ we will!), it is likely that a completely different syntax would be required. - -Instead, with tidymodels, the types of predicted values are standardized so that we can use the same syntax to get these values. - -First, let's generate the mean body width values: - - -```r -mean_pred <- predict(lm_fit, new_data = new_points) -mean_pred -#> # A tibble: 3 × 1 -#> .pred -#> -#> 1 0.0642 -#> 2 0.0588 -#> 3 0.0961 -``` - -When making predictions, the tidymodels convention is to always produce a tibble of results with standardized column names. This makes it easy to combine the original data and the predictions in a usable format: - - -```r -conf_int_pred <- predict(lm_fit, - new_data = new_points, - type = "conf_int") -conf_int_pred -#> # A tibble: 3 × 2 -#> .pred_lower .pred_upper -#> -#> 1 0.0555 0.0729 -#> 2 0.0499 0.0678 -#> 3 0.0870 0.105 - -# Now combine: -plot_data <- - new_points %>% - bind_cols(mean_pred) %>% - bind_cols(conf_int_pred) - -# and plot: -ggplot(plot_data, aes(x = food_regime)) + - geom_point(aes(y = .pred)) + - geom_errorbar(aes(ymin = .pred_lower, - ymax = .pred_upper), - width = .2) + - labs(y = "urchin size") -``` - - - -## Model with a different engine {#new-engine} - -Every one on your team is happy with that plot _except_ that one person who just read their first book on [Bayesian analysis](https://bayesian.org/what-is-bayesian-analysis/). They are interested in knowing if the results would be different if the model were estimated using a Bayesian approach. In such an analysis, a [_prior distribution_](https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7) needs to be declared for each model parameter that represents the possible values of the parameters (before being exposed to the observed data). After some discussion, the group agrees that the priors should be bell-shaped but, since no one has any idea what the range of values should be, to take a conservative approach and make the priors _wide_ using a Cauchy distribution (which is the same as a t-distribution with a single degree of freedom). - -The [documentation](https://mc-stan.org/rstanarm/articles/priors.html) on the rstanarm package shows us that the `stan_glm()` function can be used to estimate this model, and that the function arguments that need to be specified are called `prior` and `prior_intercept`. It turns out that `linear_reg()` has a [`stan` engine](https://parsnip.tidymodels.org/reference/linear_reg.html#details). Since these prior distribution arguments are specific to the Stan software, they are passed as arguments to [`parsnip::set_engine()`](https://parsnip.tidymodels.org/reference/set_engine.html). After that, the same exact `fit()` call is used: - - -```r -# set the prior distribution -prior_dist <- rstanarm::student_t(df = 1) - -set.seed(123) - -# make the parsnip model -bayes_mod <- - linear_reg() %>% - set_engine("stan", - prior_intercept = prior_dist, - prior = prior_dist) - -# train the model -bayes_fit <- - bayes_mod %>% - fit(width ~ initial_volume * food_regime, data = urchins) - -print(bayes_fit, digits = 5) -#> parsnip model object -#> -#> stan_glm -#> family: gaussian [identity] -#> formula: width ~ initial_volume * food_regime -#> observations: 72 -#> predictors: 6 -#> ------ -#> Median MAD_SD -#> (Intercept) 0.03338 0.00947 -#> initial_volume 0.00155 0.00039 -#> food_regimeLow 0.01936 0.01348 -#> food_regimeHigh 0.02073 0.01395 -#> initial_volume:food_regimeLow -0.00125 0.00052 -#> initial_volume:food_regimeHigh 0.00055 0.00069 -#> -#> Auxiliary parameter(s): -#> Median MAD_SD -#> sigma 0.02143 0.00180 -#> -#> ------ -#> * For help interpreting the printed output see ?print.stanreg -#> * For info on the priors used see ?prior_summary.stanreg -``` - -This kind of Bayesian analysis (like many models) involves randomly generated numbers in its fitting procedure. We can use `set.seed()` to ensure that the same (pseudo-)random numbers are generated each time we run this code. The number `123` isn't special or related to our data; it is just a "seed" used to choose random numbers. - -To update the parameter table, the `tidy()` method is once again used: - - -```r -tidy(bayes_fit, conf.int = TRUE) -#> # A tibble: 6 × 5 -#> term estimate std.error conf.low conf.high -#> -#> 1 (Intercept) 0.0334 0.00947 0.0171 0.0493 -#> 2 initial_volume 0.00155 0.000392 0.000885 0.00222 -#> 3 food_regimeLow 0.0194 0.0135 -0.00178 0.0417 -#> 4 food_regimeHigh 0.0207 0.0140 -0.00287 0.0457 -#> 5 initial_volume:food_regimeLow -0.00125 0.000520 -0.00211 -0.000389 -#> 6 initial_volume:food_regimeHigh 0.000551 0.000688 -0.000651 0.00168 -``` - -A goal of the tidymodels packages is that the **interfaces to common tasks are standardized** (as seen in the `tidy()` results above). The same is true for getting predictions; we can use the same code even though the underlying packages use very different syntax: - - -```r -bayes_plot_data <- - new_points %>% - bind_cols(predict(bayes_fit, new_data = new_points)) %>% - bind_cols(predict(bayes_fit, new_data = new_points, type = "conf_int")) - -ggplot(bayes_plot_data, aes(x = food_regime)) + - geom_point(aes(y = .pred)) + - geom_errorbar(aes(ymin = .pred_lower, ymax = .pred_upper), width = .2) + - labs(y = "urchin size") + - ggtitle("Bayesian model with t(1) prior distribution") -``` - - - -This isn't very different from the non-Bayesian results (except in interpretation). - -{{% note %}} The [parsnip](https://parsnip.tidymodels.org/) package can work with many model types, engines, and arguments. Check out [tidymodels.org/find/parsnip](/find/parsnip/) to see what is available. {{%/ note %}} - -## Why does it work that way? {#why} - -The extra step of defining the model using a function like `linear_reg()` might seem superfluous since a call to `lm()` is much more succinct. However, the problem with standard modeling functions is that they don't separate what you want to do from the execution. For example, the process of executing a formula has to happen repeatedly across model calls even when the formula does not change; we can't recycle those computations. - -Also, using the tidymodels framework, we can do some interesting things by incrementally creating a model (instead of using single function call). [Model tuning](/start/tuning/) with tidymodels uses the specification of the model to declare what parts of the model should be tuned. That would be very difficult to do if `linear_reg()` immediately fit the model. - -If you are familiar with the tidyverse, you may have noticed that our modeling code uses the magrittr pipe (`%>%`). With dplyr and other tidyverse packages, the pipe works well because all of the functions take the _data_ as the first argument. For example: - - -```r -urchins %>% - group_by(food_regime) %>% - summarize(med_vol = median(initial_volume)) -#> # A tibble: 3 × 2 -#> food_regime med_vol -#> -#> 1 Initial 20.5 -#> 2 Low 19.2 -#> 3 High 15 -``` - -whereas the modeling code uses the pipe to pass around the _model object_: - - -```r -bayes_mod %>% - fit(width ~ initial_volume * food_regime, data = urchins) -``` - -This may seem jarring if you have used dplyr a lot, but it is extremely similar to how ggplot2 operates: - - -```r -ggplot(urchins, - aes(initial_volume, width)) + # returns a ggplot object - geom_jitter() + # same - geom_smooth(method = lm, se = FALSE) + # same - labs(x = "Volume", y = "Width") # etc -``` - - -## Session information {#session-info} - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> broom.mixed * 0.2.9.4 2022-04-17 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dotwhisker * 0.7.4 2021-09-02 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> readr * 2.1.3 2022-10-01 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> rstanarm * 2.21.3 2022-04-09 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/start/recipes/figs/roc-plot-1.svg b/content/start/recipes/figs/roc-plot-1.svg deleted file mode 100644 index 7593c966..00000000 --- a/content/start/recipes/figs/roc-plot-1.svg +++ /dev/null @@ -1,88 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 - - - - - - - - - - -0.00 -0.25 -0.50 -0.75 -1.00 -1 - specificity -sensitivity - - diff --git a/content/start/recipes/index.Rmarkdown b/content/start/recipes/index.Rmarkdown deleted file mode 100755 index 3e6f17e1..00000000 --- a/content/start/recipes/index.Rmarkdown +++ /dev/null @@ -1,403 +0,0 @@ ---- -title: "Preprocess your data with recipes" -weight: 2 -tags: [recipes, parsnip, workflows, yardstick, broom] -categories: [pre-processing] -description: | - Prepare data for modeling with modular preprocessing steps. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/start/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(tidymodels) -library(nycflights13) -library(kableExtra) -library(skimr) -pkgs <- c("tidymodels", "nycflights13", "skimr") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - -## Introduction {#intro} - -In our [*Build a Model*](/start/models/) article, we learned how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/). In this article, we'll explore another tidymodels package, [recipes](https://recipes.tidymodels.org/), which is designed to help you preprocess your data *before* training your model. Recipes are built as a series of preprocessing steps, such as: - -+ converting qualitative predictors to indicator variables (also known as dummy variables), - -+ transforming data to be on a different scale (e.g., taking the logarithm of a variable), - -+ transforming whole groups of predictors together, - -+ extracting key features from raw variables (e.g., getting the day of the week out of a date variable), - -and so on. If you are familiar with R's formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling. - -`r article_req_pkgs(pkgs)` - -```{r eval=FALSE} -library(tidymodels) # for the recipes package, along with the rest of tidymodels - -# Helper packages -library(nycflights13) # for flight data -library(skimr) # for variable summaries -``` - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - -## The New York City flight data {#data} - -```{r flight-start, echo=FALSE} -set.seed(123) - -flight_data <- - flights %>% - mutate( - # Convert the arrival delay to a factor - arr_delay = ifelse(arr_delay >= 30, "late", "on_time"), - arr_delay = factor(arr_delay), - # We will use the date (not date-time) in the recipe below - date = lubridate::as_date(time_hour) - ) %>% - # Include the weather data - inner_join(weather, by = c("origin", "time_hour")) %>% - # Only retain the specific columns we will use - select(dep_time, flight, origin, dest, air_time, distance, - carrier, date, arr_delay, time_hour) %>% - # Exclude missing data - na.omit() %>% - # For creating models, it is better to have qualitative columns - # encoded as factors (instead of character strings) - mutate_if(is.character, as.factor) -``` - -Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on `r scales::comma(nrow(flight_data))` flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables: - -```{r ref.label='flight-start'} - -``` - - -We can see that about `r percent(mean(flight_data$arr_delay == "late"))` of the flights in this data set arrived more than 30 minutes late. - -```{r count-delays} -flight_data %>% - count(arr_delay) %>% - mutate(prop = n/sum(n)) -``` - - -Before we start building up our recipe, let's take a quick look at a few specific variables that will be important for both preprocessing and modeling. - -First, notice that the variable we created called `arr_delay` is a factor variable; it is important that our outcome variable for training a logistic regression model is a factor. - -```{r glimpse-flights} -glimpse(flight_data) -``` - -Second, there are two variables that we don't want to use as predictors in our model, but that we would like to retain as identification variables that can be used to troubleshoot poorly predicted data points. These are `flight`, a numeric value, and `time_hour`, a date-time value. - -Third, there are `r length(levels(flight_data$dest))` flight destinations contained in `dest` and `r length(levels(flight_data$carrier))` distinct `carrier`s. - -```{r skim-flights} -flight_data %>% - skimr::skim(dest, carrier) -``` - - -Because we'll be using a simple logistic regression model, the variables `dest` and `carrier` will be converted to [dummy variables](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html). However, some of these values do not occur very frequently and this could complicate our analysis. We'll discuss specific steps later in this article that we can add to our recipe to address this issue before modeling. - -## Data splitting {#data-split} - -To get started, let's split this single dataset into two: a _training_ set and a _testing_ set. We'll keep most of the rows in the original dataset (subset chosen randomly) in the _training_ set. The training data will be used to *fit* the model, and the _testing_ set will be used to measure model performance. - -To do this, we can use the [rsample](https://rsample.tidymodels.org/) package to create an object that contains the information on _how_ to split the data, and then two more rsample functions to create data frames for the training and testing sets: - -```{r split} -# Fix the random numbers by setting the seed -# This enables the analysis to be reproducible when random numbers are used -set.seed(222) -# Put 3/4 of the data into the training set -data_split <- initial_split(flight_data, prop = 3/4) - -# Create data frames for the two sets: -train_data <- training(data_split) -test_data <- testing(data_split) -``` - - -## Create recipe and roles {#recipe} - -To get started, let's create a recipe for a simple logistic regression model. Before training the model, we can use a recipe to create a few new predictors and conduct some preprocessing required by the model. - -Let's initiate a new recipe: - -```{r initial-recipe} -flights_rec <- - recipe(arr_delay ~ ., data = train_data) -``` - -The [`recipe()` function](https://recipes.tidymodels.org/reference/recipe.html) as we used it here has two arguments: - -+ A **formula**. Any variable on the left-hand side of the tilde (`~`) is considered the model outcome (here, `arr_delay`). On the right-hand side of the tilde are the predictors. Variables may be listed by name, or you can use the dot (`.`) to indicate all other variables as predictors. - -+ The **data**. A recipe is associated with the data set used to create the model. This will typically be the _training_ set, so `data = train_data` here. Naming a data set doesn't actually change the data itself; it is only used to catalog the names of the variables and their types, like factors, integers, dates, etc. - -Now we can add [roles](https://recipes.tidymodels.org/reference/roles.html) to this recipe. We can use the [`update_role()` function](https://recipes.tidymodels.org/reference/roles.html) to let recipes know that `flight` and `time_hour` are variables with a custom role that we called `"ID"` (a role can have any character value). Whereas our formula included all variables in the training set other than `arr_delay` as predictors, this tells the recipe to keep these two variables but not use them as either outcomes or predictors. - -```{r recipe-roles} -flights_rec <- - recipe(arr_delay ~ ., data = train_data) %>% - update_role(flight, time_hour, new_role = "ID") -``` - -This step of adding roles to a recipe is optional; the purpose of using it here is that those two variables can be retained in the data but not included in the model. This can be convenient when, after the model is fit, we want to investigate some poorly predicted value. These ID columns will be available and can be used to try to understand what went wrong. - -To get the current set of variables and roles, use the `summary()` function: - -```{r summary} -summary(flights_rec) -``` - - - -## Create features {#features} - -Now we can start adding steps onto our recipe using the pipe operator. Perhaps it is reasonable for the date of the flight to have an effect on the likelihood of a late arrival. A little bit of **feature engineering** might go a long way to improving our model. How should the date be encoded into the model? The `date` column has an R `date` object so including that column "as is" will mean that the model will convert it to a numeric format equal to the number of days after a reference date: - -```{r dates, R.options = list(tibble.print_min = 5)} -flight_data %>% - distinct(date) %>% - mutate(numeric_date = as.numeric(date)) -``` - -It's possible that the numeric date variable is a good option for modeling; perhaps the model would benefit from a linear trend between the log-odds of a late arrival and the numeric date variable. However, it might be better to add model terms _derived_ from the date that have a better potential to be important to the model. For example, we could derive the following meaningful features from the single `date` variable: - -* the day of the week, - -* the month, and - -* whether or not the date corresponds to a holiday. - -Let's do all three of these by adding steps to our recipe: - - -```{r date-recipe} -flights_rec <- - recipe(arr_delay ~ ., data = train_data) %>% - update_role(flight, time_hour, new_role = "ID") %>% - step_date(date, features = c("dow", "month")) %>% - step_holiday(date, - holidays = timeDate::listHolidays("US"), - keep_original_cols = FALSE) -``` - -What do each of these steps do? - -* With [`step_date()`](https://recipes.tidymodels.org/reference/step_date.html), we created two new factor columns with the appropriate day of the week and the month. - -* With [`step_holiday()`](https://recipes.tidymodels.org/reference/step_holiday.html), we created a binary variable indicating whether the current date is a holiday or not. The argument value of `timeDate::listHolidays("US")` uses the [timeDate package](https://cran.r-project.org/web/packages/timeDate/index.html) to list the `r length(timeDate::listHolidays("US"))` standard US holidays. - -* With `keep_original_cols = FALSE`, we remove the original `date` variable since we no longer want it in the model. Many recipe steps that create new variables have this argument. - -Next, we'll turn our attention to the variable types of our predictors. Because we plan to train a logistic regression model, we know that predictors will ultimately need to be numeric, as opposed to nominal data like strings and factor variables. In other words, there may be a difference in how we store our data (in factors inside a data frame), and how the underlying equations require them (a purely numeric matrix). - -For factors like `dest` and `origin`, [standard practice](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html) is to convert them into _dummy_ or _indicator_ variables to make them numeric. These are binary values for each level of the factor. For example, our `origin` variable has values of `"EWR"`, `"JFK"`, and `"LGA"`. The standard dummy variable encoding, shown below, will create _two_ numeric columns of the data that are 1 when the originating airport is `"JFK"` or `"LGA"` and zero otherwise, respectively. - -```{r calc-dummy, include = FALSE} -four_origins <- - train_data %>% - select(origin, arr_delay) %>% - slice(1:4) - -origin_dummies <- - recipe(arr_delay ~ origin, data = train_data) %>% - step_dummy(origin, keep_original_cols = TRUE) %>% - prep(training = four_origins) -``` - -```{r dummy-table, echo = FALSE} -# Get a row for each factor level -bake(origin_dummies, new_data = NULL, origin, starts_with("origin")) %>% - distinct() %>% - knitr::kable() %>% - kable_styling(full_width = FALSE) -``` - - -But, unlike the standard model formula methods in R, a recipe **does not** automatically create these dummy variables for you; you'll need to tell your recipe to add this step. This is for two reasons. First, many models do not require [numeric predictors](https://bookdown.org/max/FES/categorical-trees.html), so dummy variables may not always be preferred. Second, recipes can also be used for purposes outside of modeling, where non-dummy versions of the variables may work better. For example, you may want to make a table or a plot with a variable as a single factor. For those reasons, you need to explicitly tell recipes to create dummy variables using `step_dummy()`: - -```{r dummy} -flights_rec <- - recipe(arr_delay ~ ., data = train_data) %>% - update_role(flight, time_hour, new_role = "ID") %>% - step_date(date, features = c("dow", "month")) %>% - step_holiday(date, - holidays = timeDate::listHolidays("US"), - keep_original_cols = FALSE) %>% - step_dummy(all_nominal_predictors()) -``` - -Here, we did something different than before: instead of applying a step to an individual variable, we used [selectors](https://recipes.tidymodels.org/reference/selections.html) to apply this recipe step to several variables at once, `all_nominal_predictors()`. The [selector functions](https://recipes.tidymodels.org/reference/selections.html) can be combined to select intersections of variables. - -At this stage in the recipe, this step selects the `origin`, `dest`, and `carrier` variables. It also includes two new variables, `date_dow` and `date_month`, that were created by the earlier `step_date()`. - -More generally, the recipe selectors mean that you don't always have to apply steps to individual variables one at a time. Since a recipe knows the _variable type_ and _role_ of each column, they can also be selected (or dropped) using this information. - -We need one final step to add to our recipe. Since `carrier` and `dest` have some infrequently occurring factor values, it is possible that dummy variables might be created for values that don't exist in the training set. For example, there is one destination that is only in the test set: - -```{r zv-cols} -test_data %>% - distinct(dest) %>% - anti_join(train_data) -``` - -When the recipe is applied to the training set, a column is made for `r dplyr::setdiff(test_data$dest, train_data$dest)` because the factor levels come from `flight_data` (not the training set), but this column will contain all zeros. This is a "zero-variance predictor" that has no information within the column. While some R functions will not produce an error for such predictors, it usually causes warnings and other issues. `step_zv()` will remove columns from the data when the training set data have a single value, so it is added to the recipe *after* `step_dummy()`: - -```{r zv} -flights_rec <- - recipe(arr_delay ~ ., data = train_data) %>% - update_role(flight, time_hour, new_role = "ID") %>% - step_date(date, features = c("dow", "month")) %>% - step_holiday(date, - holidays = timeDate::listHolidays("US"), - keep_original_cols = FALSE) %>% - step_dummy(all_nominal_predictors()) %>% - step_zv(all_predictors()) -``` - - -Now we've created a _specification_ of what should be done with the data. How do we use the recipe we made? - -## Fit a model with a recipe {#fit-workflow} - -Let's use logistic regression to model the flight data. As we saw in [*Build a Model*](/start/models/), we start by [building a model specification](/start/models/#build-model) using the parsnip package: - -```{r model} -lr_mod <- - logistic_reg() %>% - set_engine("glm") -``` - - -We will want to use our recipe across several steps as we train and test our model. We will: - -1. **Process the recipe using the training set**: This involves any estimation or calculations based on the training set. For our recipe, the training set will be used to determine which predictors should be converted to dummy variables and which predictors will have zero-variance in the training set, and should be slated for removal. - -1. **Apply the recipe to the training set**: We create the final predictor set on the training set. - -1. **Apply the recipe to the test set**: We create the final predictor set on the test set. Nothing is recomputed and no information from the test set is used here; the dummy variable and zero-variance results from the training set are applied to the test set. - -To simplify this process, we can use a _model workflow_, which pairs a model and recipe together. This is a straightforward approach because different recipes are often needed for different models, so when a model and recipe are bundled, it becomes easier to train and test _workflows_. We'll use the [workflows package](https://workflows.tidymodels.org/) from tidymodels to bundle our parsnip model (`lr_mod`) with our recipe (`flights_rec`). - -```{r workflow} -flights_wflow <- - workflow() %>% - add_model(lr_mod) %>% - add_recipe(flights_rec) - -flights_wflow -``` - -Now, there is a single function that can be used to prepare the recipe and train the model from the resulting predictors: - -```{r fit} -flights_fit <- - flights_wflow %>% - fit(data = train_data) -``` - -This object has the finalized recipe and fitted model objects inside. You may want to extract the model or recipe objects from the workflow. To do this, you can use the helper functions `extract_fit_parsnip()` and `extract_recipe()`. For example, here we pull the fitted model object then use the `broom::tidy()` function to get a tidy tibble of model coefficients: - -```{r fit-glance, R.options = list(tibble.print_min = 5)} -flights_fit %>% - extract_fit_parsnip() %>% - tidy() -``` - -## Use a trained workflow to predict {#predict-workflow} - -Our goal was to predict whether a plane arrives more than 30 minutes late. We have just: - -1. Built the model (`lr_mod`), - -1. Created a preprocessing recipe (`flights_rec`), - -1. Bundled the model and recipe (`flights_wflow`), and - -1. Trained our workflow using a single call to `fit()`. - -The next step is to use the trained workflow (`flights_fit`) to predict with the unseen test data, which we will do with a single call to `predict()`. The `predict()` method applies the recipe to the new data, then passes them to the fitted model. - -```{r pred-class, R.options = list(tibble.print_min = 5)} -predict(flights_fit, test_data) -``` - -Because our outcome variable here is a factor, the output from `predict()` returns the predicted class: `late` versus `on_time`. But, let's say we want the predicted class probabilities for each flight instead. To return those, we can specify `type = "prob"` when we use `predict()` or use `augment()` with the model plus test data to save them together: - -```{r test-pred, R.options = list(tibble.print_min = 5)} -flights_aug <- - augment(flights_fit, test_data) - -# The data look like: -flights_aug %>% - select(arr_delay, time_hour, flight, .pred_class, .pred_on_time) -``` - -Now that we have a tibble with our predicted class probabilities, how will we evaluate the performance of our workflow? We can see from these first few rows that our model predicted these 5 on time flights correctly because the values of `.pred_on_time` are *p* > .50. But we also know that we have `r scales::comma(nrow(flights_aug))` rows total to predict. We would like to calculate a metric that tells how well our model predicted late arrivals, compared to the true status of our outcome variable, `arr_delay`. - -Let's use the area under the [ROC curve](https://bookdown.org/max/FES/measuring-performance.html#class-metrics) as our metric, computed using `roc_curve()` and `roc_auc()` from the [yardstick package](https://yardstick.tidymodels.org/). - -To generate a ROC curve, we need the predicted class probabilities for `late` and `on_time`, which we just calculated in the code chunk above. We can create the ROC curve with these values, using `roc_curve()` and then piping to the `autoplot()` method: - -```{r roc-plot} -flights_aug %>% - roc_curve(truth = arr_delay, .pred_late) %>% - autoplot() -``` - -Similarly, `roc_auc()` estimates the area under the curve: - -```{r roc-auc} -flights_aug %>% - roc_auc(truth = arr_delay, .pred_late) -``` - -Not too bad! We leave it to the reader to test out this workflow [*without*](https://workflows.tidymodels.org/reference/add_formula.html) this recipe. You can use `workflows::add_formula(arr_delay ~ .)` instead of `add_recipe()` (remember to remove the identification variables first!), and see whether our recipe improved our model's ability to predict late arrivals. - -```{r eval = FALSE, include = FALSE} -set.seed(555) -flights_cens <- flight_data %>% - select(-flight, -time_hour) - -flights_cens_split <- initial_split(flights_cens, prop = 3/4) -flights_cens_train <- training(flights_cens_split) -flights_cens_test <- testing(flights_cens_split) - -flights_wflow_raw <- - workflow() %>% - add_model(lr_mod) %>% - add_formula(arr_delay ~ .) - -flights_fit_raw <- - flights_wflow_raw %>% - fit(data = flights_cens_train) - -flights_preds_raw <- - predict(flights_fit_raw, - flights_cens_test, - type = "prob") %>% - bind_cols(flights_cens_test %>% select(arr_delay)) - -flights_preds_raw %>% - roc_auc(truth = arr_delay, .pred_late) -``` - - -## Session information {#session-info} - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/start/recipes/index.markdown b/content/start/recipes/index.markdown deleted file mode 100644 index 3aa3b70e..00000000 --- a/content/start/recipes/index.markdown +++ /dev/null @@ -1,601 +0,0 @@ ---- -title: "Preprocess your data with recipes" -weight: 2 -tags: [recipes, parsnip, workflows, yardstick, broom] -categories: [pre-processing] -description: | - Prepare data for modeling with modular preprocessing steps. ---- - - - - - - - -## Introduction {#intro} - -In our [*Build a Model*](/start/models/) article, we learned how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/). In this article, we'll explore another tidymodels package, [recipes](https://recipes.tidymodels.org/), which is designed to help you preprocess your data *before* training your model. Recipes are built as a series of preprocessing steps, such as: - -+ converting qualitative predictors to indicator variables (also known as dummy variables), - -+ transforming data to be on a different scale (e.g., taking the logarithm of a variable), - -+ transforming whole groups of predictors together, - -+ extracting key features from raw variables (e.g., getting the day of the week out of a date variable), - -and so on. If you are familiar with R's formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling. - -To use code in this article, you will need to install the following packages: nycflights13, skimr, and tidymodels. - - -```r -library(tidymodels) # for the recipes package, along with the rest of tidymodels - -# Helper packages -library(nycflights13) # for flight data -library(skimr) # for variable summaries -``` - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - -## The New York City flight data {#data} - - - -Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on 325,819 flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables: - - -```r -set.seed(123) - -flight_data <- - flights %>% - mutate( - # Convert the arrival delay to a factor - arr_delay = ifelse(arr_delay >= 30, "late", "on_time"), - arr_delay = factor(arr_delay), - # We will use the date (not date-time) in the recipe below - date = lubridate::as_date(time_hour) - ) %>% - # Include the weather data - inner_join(weather, by = c("origin", "time_hour")) %>% - # Only retain the specific columns we will use - select(dep_time, flight, origin, dest, air_time, distance, - carrier, date, arr_delay, time_hour) %>% - # Exclude missing data - na.omit() %>% - # For creating models, it is better to have qualitative columns - # encoded as factors (instead of character strings) - mutate_if(is.character, as.factor) -``` - - -We can see that about 16% of the flights in this data set arrived more than 30 minutes late. - - -```r -flight_data %>% - count(arr_delay) %>% - mutate(prop = n/sum(n)) -#> # A tibble: 2 × 3 -#> arr_delay n prop -#> -#> 1 late 52540 0.161 -#> 2 on_time 273279 0.839 -``` - - -Before we start building up our recipe, let's take a quick look at a few specific variables that will be important for both preprocessing and modeling. - -First, notice that the variable we created called `arr_delay` is a factor variable; it is important that our outcome variable for training a logistic regression model is a factor. - - -```r -glimpse(flight_data) -#> Rows: 325,819 -#> Columns: 10 -#> $ dep_time 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 558, … -#> $ flight 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49, 71… -#> $ origin EWR, LGA, JFK, JFK, LGA, EWR, EWR, LGA, JFK, LGA, JFK, JFK, … -#> $ dest IAH, IAH, MIA, BQN, ATL, ORD, FLL, IAD, MCO, ORD, PBI, TPA, … -#> $ air_time 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158, 3… -#> $ distance 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 1028,… -#> $ carrier UA, UA, AA, B6, DL, UA, B6, EV, B6, AA, B6, B6, UA, UA, AA, … -#> $ date 2013-01-01, 2013-01-01, 2013-01-01, 2013-01-01, 2013-01-01,… -#> $ arr_delay on_time, on_time, late, on_time, on_time, on_time, on_time, … -#> $ time_hour 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:… -``` - -Second, there are two variables that we don't want to use as predictors in our model, but that we would like to retain as identification variables that can be used to troubleshoot poorly predicted data points. These are `flight`, a numeric value, and `time_hour`, a date-time value. - -Third, there are 104 flight destinations contained in `dest` and 16 distinct `carrier`s. - - -```r -flight_data %>% - skimr::skim(dest, carrier) -``` - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Table 1: Data summary
    Name Piped data
    Number of rows 325819
    Number of columns 10
    _______________________
    Column type frequency:
    factor 2
    ________________________
    Group variables None
    - - -**Variable type: factor** - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    skim_variable n_missing complete_rate ordered n_unique top_counts
    dest 0 1 FALSE 104 ATL: 16771, ORD: 16507, LAX: 15942, BOS: 14948
    carrier 0 1 FALSE 16 UA: 57489, B6: 53715, EV: 50868, DL: 47465
    - - -Because we'll be using a simple logistic regression model, the variables `dest` and `carrier` will be converted to [dummy variables](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html). However, some of these values do not occur very frequently and this could complicate our analysis. We'll discuss specific steps later in this article that we can add to our recipe to address this issue before modeling. - -## Data splitting {#data-split} - -To get started, let's split this single dataset into two: a _training_ set and a _testing_ set. We'll keep most of the rows in the original dataset (subset chosen randomly) in the _training_ set. The training data will be used to *fit* the model, and the _testing_ set will be used to measure model performance. - -To do this, we can use the [rsample](https://rsample.tidymodels.org/) package to create an object that contains the information on _how_ to split the data, and then two more rsample functions to create data frames for the training and testing sets: - - -```r -# Fix the random numbers by setting the seed -# This enables the analysis to be reproducible when random numbers are used -set.seed(222) -# Put 3/4 of the data into the training set -data_split <- initial_split(flight_data, prop = 3/4) - -# Create data frames for the two sets: -train_data <- training(data_split) -test_data <- testing(data_split) -``` - - -## Create recipe and roles {#recipe} - -To get started, let's create a recipe for a simple logistic regression model. Before training the model, we can use a recipe to create a few new predictors and conduct some preprocessing required by the model. - -Let's initiate a new recipe: - - -```r -flights_rec <- - recipe(arr_delay ~ ., data = train_data) -``` - -The [`recipe()` function](https://recipes.tidymodels.org/reference/recipe.html) as we used it here has two arguments: - -+ A **formula**. Any variable on the left-hand side of the tilde (`~`) is considered the model outcome (here, `arr_delay`). On the right-hand side of the tilde are the predictors. Variables may be listed by name, or you can use the dot (`.`) to indicate all other variables as predictors. - -+ The **data**. A recipe is associated with the data set used to create the model. This will typically be the _training_ set, so `data = train_data` here. Naming a data set doesn't actually change the data itself; it is only used to catalog the names of the variables and their types, like factors, integers, dates, etc. - -Now we can add [roles](https://recipes.tidymodels.org/reference/roles.html) to this recipe. We can use the [`update_role()` function](https://recipes.tidymodels.org/reference/roles.html) to let recipes know that `flight` and `time_hour` are variables with a custom role that we called `"ID"` (a role can have any character value). Whereas our formula included all variables in the training set other than `arr_delay` as predictors, this tells the recipe to keep these two variables but not use them as either outcomes or predictors. - - -```r -flights_rec <- - recipe(arr_delay ~ ., data = train_data) %>% - update_role(flight, time_hour, new_role = "ID") -``` - -This step of adding roles to a recipe is optional; the purpose of using it here is that those two variables can be retained in the data but not included in the model. This can be convenient when, after the model is fit, we want to investigate some poorly predicted value. These ID columns will be available and can be used to try to understand what went wrong. - -To get the current set of variables and roles, use the `summary()` function: - - -```r -summary(flights_rec) -#> # A tibble: 10 × 4 -#> variable type role source -#> -#> 1 dep_time predictor original -#> 2 flight ID original -#> 3 origin predictor original -#> 4 dest predictor original -#> 5 air_time predictor original -#> 6 distance predictor original -#> 7 carrier predictor original -#> 8 date predictor original -#> 9 time_hour ID original -#> 10 arr_delay outcome original -``` - - - -## Create features {#features} - -Now we can start adding steps onto our recipe using the pipe operator. Perhaps it is reasonable for the date of the flight to have an effect on the likelihood of a late arrival. A little bit of **feature engineering** might go a long way to improving our model. How should the date be encoded into the model? The `date` column has an R `date` object so including that column "as is" will mean that the model will convert it to a numeric format equal to the number of days after a reference date: - - -```r -flight_data %>% - distinct(date) %>% - mutate(numeric_date = as.numeric(date)) -#> # A tibble: 364 × 2 -#> date numeric_date -#> -#> 1 2013-01-01 15706 -#> 2 2013-01-02 15707 -#> 3 2013-01-03 15708 -#> 4 2013-01-04 15709 -#> 5 2013-01-05 15710 -#> # … with 359 more rows -``` - -It's possible that the numeric date variable is a good option for modeling; perhaps the model would benefit from a linear trend between the log-odds of a late arrival and the numeric date variable. However, it might be better to add model terms _derived_ from the date that have a better potential to be important to the model. For example, we could derive the following meaningful features from the single `date` variable: - -* the day of the week, - -* the month, and - -* whether or not the date corresponds to a holiday. - -Let's do all three of these by adding steps to our recipe: - - - -```r -flights_rec <- - recipe(arr_delay ~ ., data = train_data) %>% - update_role(flight, time_hour, new_role = "ID") %>% - step_date(date, features = c("dow", "month")) %>% - step_holiday(date, - holidays = timeDate::listHolidays("US"), - keep_original_cols = FALSE) -``` - -What do each of these steps do? - -* With [`step_date()`](https://recipes.tidymodels.org/reference/step_date.html), we created two new factor columns with the appropriate day of the week and the month. - -* With [`step_holiday()`](https://recipes.tidymodels.org/reference/step_holiday.html), we created a binary variable indicating whether the current date is a holiday or not. The argument value of `timeDate::listHolidays("US")` uses the [timeDate package](https://cran.r-project.org/web/packages/timeDate/index.html) to list the 17 standard US holidays. - -* With `keep_original_cols = FALSE`, we remove the original `date` variable since we no longer want it in the model. Many recipe steps that create new variables have this argument. - -Next, we'll turn our attention to the variable types of our predictors. Because we plan to train a logistic regression model, we know that predictors will ultimately need to be numeric, as opposed to nominal data like strings and factor variables. In other words, there may be a difference in how we store our data (in factors inside a data frame), and how the underlying equations require them (a purely numeric matrix). - -For factors like `dest` and `origin`, [standard practice](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html) is to convert them into _dummy_ or _indicator_ variables to make them numeric. These are binary values for each level of the factor. For example, our `origin` variable has values of `"EWR"`, `"JFK"`, and `"LGA"`. The standard dummy variable encoding, shown below, will create _two_ numeric columns of the data that are 1 when the originating airport is `"JFK"` or `"LGA"` and zero otherwise, respectively. - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    origin origin_JFK origin_LGA
    JFK 1 0
    EWR 0 0
    LGA 0 1
    - - -But, unlike the standard model formula methods in R, a recipe **does not** automatically create these dummy variables for you; you'll need to tell your recipe to add this step. This is for two reasons. First, many models do not require [numeric predictors](https://bookdown.org/max/FES/categorical-trees.html), so dummy variables may not always be preferred. Second, recipes can also be used for purposes outside of modeling, where non-dummy versions of the variables may work better. For example, you may want to make a table or a plot with a variable as a single factor. For those reasons, you need to explicitly tell recipes to create dummy variables using `step_dummy()`: - - -```r -flights_rec <- - recipe(arr_delay ~ ., data = train_data) %>% - update_role(flight, time_hour, new_role = "ID") %>% - step_date(date, features = c("dow", "month")) %>% - step_holiday(date, - holidays = timeDate::listHolidays("US"), - keep_original_cols = FALSE) %>% - step_dummy(all_nominal_predictors()) -``` - -Here, we did something different than before: instead of applying a step to an individual variable, we used [selectors](https://recipes.tidymodels.org/reference/selections.html) to apply this recipe step to several variables at once, `all_nominal_predictors()`. The [selector functions](https://recipes.tidymodels.org/reference/selections.html) can be combined to select intersections of variables. - -At this stage in the recipe, this step selects the `origin`, `dest`, and `carrier` variables. It also includes two new variables, `date_dow` and `date_month`, that were created by the earlier `step_date()`. - -More generally, the recipe selectors mean that you don't always have to apply steps to individual variables one at a time. Since a recipe knows the _variable type_ and _role_ of each column, they can also be selected (or dropped) using this information. - -We need one final step to add to our recipe. Since `carrier` and `dest` have some infrequently occurring factor values, it is possible that dummy variables might be created for values that don't exist in the training set. For example, there is one destination that is only in the test set: - - -```r -test_data %>% - distinct(dest) %>% - anti_join(train_data) -#> Joining, by = "dest" -#> # A tibble: 1 × 1 -#> dest -#> -#> 1 LEX -``` - -When the recipe is applied to the training set, a column is made for LEX because the factor levels come from `flight_data` (not the training set), but this column will contain all zeros. This is a "zero-variance predictor" that has no information within the column. While some R functions will not produce an error for such predictors, it usually causes warnings and other issues. `step_zv()` will remove columns from the data when the training set data have a single value, so it is added to the recipe *after* `step_dummy()`: - - -```r -flights_rec <- - recipe(arr_delay ~ ., data = train_data) %>% - update_role(flight, time_hour, new_role = "ID") %>% - step_date(date, features = c("dow", "month")) %>% - step_holiday(date, - holidays = timeDate::listHolidays("US"), - keep_original_cols = FALSE) %>% - step_dummy(all_nominal_predictors()) %>% - step_zv(all_predictors()) -``` - - -Now we've created a _specification_ of what should be done with the data. How do we use the recipe we made? - -## Fit a model with a recipe {#fit-workflow} - -Let's use logistic regression to model the flight data. As we saw in [*Build a Model*](/start/models/), we start by [building a model specification](/start/models/#build-model) using the parsnip package: - - -```r -lr_mod <- - logistic_reg() %>% - set_engine("glm") -``` - - -We will want to use our recipe across several steps as we train and test our model. We will: - -1. **Process the recipe using the training set**: This involves any estimation or calculations based on the training set. For our recipe, the training set will be used to determine which predictors should be converted to dummy variables and which predictors will have zero-variance in the training set, and should be slated for removal. - -1. **Apply the recipe to the training set**: We create the final predictor set on the training set. - -1. **Apply the recipe to the test set**: We create the final predictor set on the test set. Nothing is recomputed and no information from the test set is used here; the dummy variable and zero-variance results from the training set are applied to the test set. - -To simplify this process, we can use a _model workflow_, which pairs a model and recipe together. This is a straightforward approach because different recipes are often needed for different models, so when a model and recipe are bundled, it becomes easier to train and test _workflows_. We'll use the [workflows package](https://workflows.tidymodels.org/) from tidymodels to bundle our parsnip model (`lr_mod`) with our recipe (`flights_rec`). - - -```r -flights_wflow <- - workflow() %>% - add_model(lr_mod) %>% - add_recipe(flights_rec) - -flights_wflow -#> ══ Workflow ══════════════════════════════════════════════════════════ -#> Preprocessor: Recipe -#> Model: logistic_reg() -#> -#> ── Preprocessor ────────────────────────────────────────────────────── -#> 4 Recipe Steps -#> -#> • step_date() -#> • step_holiday() -#> • step_dummy() -#> • step_zv() -#> -#> ── Model ───────────────────────────────────────────────────────────── -#> Logistic Regression Model Specification (classification) -#> -#> Computational engine: glm -``` - -Now, there is a single function that can be used to prepare the recipe and train the model from the resulting predictors: - - -```r -flights_fit <- - flights_wflow %>% - fit(data = train_data) -``` - -This object has the finalized recipe and fitted model objects inside. You may want to extract the model or recipe objects from the workflow. To do this, you can use the helper functions `extract_fit_parsnip()` and `extract_recipe()`. For example, here we pull the fitted model object then use the `broom::tidy()` function to get a tidy tibble of model coefficients: - - -```r -flights_fit %>% - extract_fit_parsnip() %>% - tidy() -#> # A tibble: 157 × 5 -#> term estimate std.error statistic p.value -#> -#> 1 (Intercept) 7.28 2.73 2.67 7.64e- 3 -#> 2 dep_time -0.00166 0.0000141 -118. 0 -#> 3 air_time -0.0440 0.000563 -78.2 0 -#> 4 distance 0.00507 0.00150 3.38 7.32e- 4 -#> 5 date_USChristmasDay 1.33 0.177 7.49 6.93e-14 -#> # … with 152 more rows -``` - -## Use a trained workflow to predict {#predict-workflow} - -Our goal was to predict whether a plane arrives more than 30 minutes late. We have just: - -1. Built the model (`lr_mod`), - -1. Created a preprocessing recipe (`flights_rec`), - -1. Bundled the model and recipe (`flights_wflow`), and - -1. Trained our workflow using a single call to `fit()`. - -The next step is to use the trained workflow (`flights_fit`) to predict with the unseen test data, which we will do with a single call to `predict()`. The `predict()` method applies the recipe to the new data, then passes them to the fitted model. - - -```r -predict(flights_fit, test_data) -#> # A tibble: 81,455 × 1 -#> .pred_class -#> -#> 1 on_time -#> 2 on_time -#> 3 on_time -#> 4 on_time -#> 5 on_time -#> # … with 81,450 more rows -``` - -Because our outcome variable here is a factor, the output from `predict()` returns the predicted class: `late` versus `on_time`. But, let's say we want the predicted class probabilities for each flight instead. To return those, we can specify `type = "prob"` when we use `predict()` or use `augment()` with the model plus test data to save them together: - - -```r -flights_aug <- - augment(flights_fit, test_data) - -# The data look like: -flights_aug %>% - select(arr_delay, time_hour, flight, .pred_class, .pred_on_time) -#> # A tibble: 81,455 × 5 -#> arr_delay time_hour flight .pred_class .pred_on_time -#> -#> 1 on_time 2013-01-01 05:00:00 1545 on_time 0.945 -#> 2 on_time 2013-01-01 05:00:00 1714 on_time 0.949 -#> 3 on_time 2013-01-01 06:00:00 507 on_time 0.964 -#> 4 on_time 2013-01-01 06:00:00 5708 on_time 0.961 -#> 5 on_time 2013-01-01 06:00:00 71 on_time 0.962 -#> # … with 81,450 more rows -``` - -Now that we have a tibble with our predicted class probabilities, how will we evaluate the performance of our workflow? We can see from these first few rows that our model predicted these 5 on time flights correctly because the values of `.pred_on_time` are *p* > .50. But we also know that we have 81,455 rows total to predict. We would like to calculate a metric that tells how well our model predicted late arrivals, compared to the true status of our outcome variable, `arr_delay`. - -Let's use the area under the [ROC curve](https://bookdown.org/max/FES/measuring-performance.html#class-metrics) as our metric, computed using `roc_curve()` and `roc_auc()` from the [yardstick package](https://yardstick.tidymodels.org/). - -To generate a ROC curve, we need the predicted class probabilities for `late` and `on_time`, which we just calculated in the code chunk above. We can create the ROC curve with these values, using `roc_curve()` and then piping to the `autoplot()` method: - - -```r -flights_aug %>% - roc_curve(truth = arr_delay, .pred_late) %>% - autoplot() -``` - - - -Similarly, `roc_auc()` estimates the area under the curve: - - -```r -flights_aug %>% - roc_auc(truth = arr_delay, .pred_late) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 roc_auc binary 0.764 -``` - -Not too bad! We leave it to the reader to test out this workflow [*without*](https://workflows.tidymodels.org/reference/add_formula.html) this recipe. You can use `workflows::add_formula(arr_delay ~ .)` instead of `add_recipe()` (remember to remove the identification variables first!), and see whether our recipe improved our model's ability to predict late arrivals. - - - - -## Session information {#session-info} - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> nycflights13 * 1.0.2 2021-04-12 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> skimr * 2.1.4 2022-04-15 [1] CRAN (R 4.2.0) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/start/resampling/index.Rmarkdown b/content/start/resampling/index.Rmarkdown deleted file mode 100755 index 27d22994..00000000 --- a/content/start/resampling/index.Rmarkdown +++ /dev/null @@ -1,339 +0,0 @@ ---- -title: "Evaluate your model with resampling" -weight: 3 -tags: [rsample, parsnip, tune, workflows, yardstick] -categories: [resampling] -description: | - Measure model performance by generating different versions of the training data through resampling. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/start/common.R")) -``` - - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(tidymodels) -library(ranger) -library(modeldata) -library(kableExtra) - -data(cells, package = "modeldata") - -pkgs <- c("tidymodels", "ranger", "modeldata") - -theme_set(theme_bw() + theme(legend.position = "top")) -``` - - -## Introduction {#intro} - -So far, we have [built a model](/start/models/) and [preprocessed data with a recipe](/start/recipes/). We also introduced [workflows](/start/recipes/#fit-workflow) as a way to bundle a [parsnip model](https://parsnip.tidymodels.org/) and [recipe](https://recipes.tidymodels.org/) together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on **resampling** statistics. - -`r article_req_pkgs(pkgs)` - -```{r eval=FALSE} -library(tidymodels) # for the rsample package, along with the rest of tidymodels - -# Helper packages -library(modeldata) # for the cells data -``` - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - -## The cell image data {#data} - -Let's use data from [Hill, LaPan, Li, and Haney (2007)](http://www.biomedcentral.com/1471-2105/8/340), available in the [modeldata package](https://cran.r-project.org/web/packages/modeldata/index.html), to predict cell image segmentation quality with resampling. To start, we load this data into R: - -```{r cell-import, R.options = list(tibble.print_min = 5, tibble.max_extra_cols = 5)} -data(cells, package = "modeldata") -cells -``` - -We have data for `r nrow(cells)` cells, with `r ncol(cells)` variables. The main outcome variable of interest for us here is called `class`, which you can see is a factor. But before we jump into predicting the `class` variable, we need to understand it better. Below is a brief primer on cell image segmentation. - -### Predicting image segmentation quality - -Some biologists conduct experiments on cells. In drug discovery, a particular type of cell can be treated with either a drug or control and then observed to see what the effect is (if any). A common approach for this kind of measurement is cell imaging. Different parts of the cells can be colored so that the locations of a cell can be determined. - -For example, in top panel of this image of five cells, the green color is meant to define the boundary of the cell (coloring something called the cytoskeleton) while the blue color defines the nucleus of the cell. - -```{r cell-fig, echo = FALSE, fig.align='center', out.width="70%"} -knitr::include_graphics("img/cells.png") -``` - -Using these colors, the cells in an image can be _segmented_ so that we know which pixels belong to which cell. If this is done well, the cell can be measured in different ways that are important to the biology. Sometimes the shape of the cell matters and different mathematical tools are used to summarize characteristics like the size or "oblongness" of the cell. - -The bottom panel shows some segmentation results. Cells 1 and 5 are fairly well segmented. However, cells 2 to 4 are bunched up together because the segmentation was not very good. The consequence of bad segmentation is data contamination; when the biologist analyzes the shape or size of these cells, the data are inaccurate and could lead to the wrong conclusion. - -A cell-based experiment might involve millions of cells so it is unfeasible to visually assess them all. Instead, a subsample can be created and these cells can be manually labeled by experts as either poorly segmented (`PS`) or well-segmented (`WS`). If we can predict these labels accurately, the larger data set can be improved by filtering out the cells most likely to be poorly segmented. - -### Back to the cells data - -The `cells` data has `class` labels for `r nrow(cells)` cells — each cell is labeled as either poorly segmented (`PS`) or well-segmented (`WS`). Each also has a total of `r ncol(cells) - 2` predictors based on automated image analysis measurements. For example, `avg_inten_ch_1` is the mean intensity of the data contained in the nucleus, `area_ch_1` is the total size of the cell, and so on (some predictors are fairly arcane in nature). - -```{r cells-show, R.options = list(tibble.print_min = 5, tibble.max_extra_cols = 5)} -cells -``` - -The rates of the classes are somewhat imbalanced; there are more poorly segmented cells than well-segmented cells: - -```{r rates} -cells %>% - count(class) %>% - mutate(prop = n/sum(n)) -``` - -## Data splitting {#data-split} - -In our previous [*Preprocess your data with recipes*](/start/recipes/#data-split) article, we started by splitting our data. It is common when beginning a modeling project to [separate the data set](https://bookdown.org/max/FES/data-splitting.html) into two partitions: - - * The _training set_ is used to estimate parameters, compare models and feature engineering techniques, tune models, etc. - - * The _test set_ is held in reserve until the end of the project, at which point there should only be one or two models under serious consideration. It is used as an unbiased source for measuring final model performance. - -There are different ways to create these partitions of the data. The most common approach is to use a random sample. Suppose that one quarter of the data were reserved for the test set. Random sampling would randomly select 25% for the test set and use the remainder for the training set. We can use the [rsample](https://rsample.tidymodels.org/) package for this purpose. - -Since random sampling uses random numbers, it is important to set the random number seed. This ensures that the random numbers can be reproduced at a later time (if needed). - -The function `rsample::initial_split()` takes the original data and saves the information on how to make the partitions. In the original analysis, the authors made their own training/test set and that information is contained in the column `case`. To demonstrate how to make a split, we'll remove this column before we make our own split: - -```{r cell-split} -set.seed(123) -cell_split <- initial_split(cells %>% select(-case), - strata = class) -``` - -Here we used the [`strata` argument](https://rsample.tidymodels.org/reference/initial_split.html), which conducts a stratified split. This ensures that, despite the imbalance we noticed in our `class` variable, our training and test data sets will keep roughly the same proportions of poorly and well-segmented cells as in the original data. After the `initial_split`, the `training()` and `testing()` functions return the actual data sets. - -```{r cell-train-test} -cell_train <- training(cell_split) -cell_test <- testing(cell_split) - -nrow(cell_train) -nrow(cell_train)/nrow(cells) - -# training set proportions by class -cell_train %>% - count(class) %>% - mutate(prop = n/sum(n)) - -# test set proportions by class -cell_test %>% - count(class) %>% - mutate(prop = n/sum(n)) -``` - -The majority of the modeling work is then conducted on the training set data. - - -## Modeling - -[Random forest models](https://en.wikipedia.org/wiki/Random_forest) are [ensembles](https://en.wikipedia.org/wiki/Ensemble_learning) of [decision trees](https://en.wikipedia.org/wiki/Decision_tree). A large number of decision tree models are created for the ensemble based on slightly different versions of the training set. When creating the individual decision trees, the fitting process encourages them to be as diverse as possible. The collection of trees are combined into the random forest model and, when a new sample is predicted, the votes from each tree are used to calculate the final predicted value for the new sample. For categorical outcome variables like `class` in our `cells` data example, the majority vote across all the trees in the random forest determines the predicted class for the new sample. - -One of the benefits of a random forest model is that it is very low maintenance; it requires very little preprocessing of the data and the default parameters tend to give reasonable results. For that reason, we won't create a recipe for the `cells` data. - -At the same time, the number of trees in the ensemble should be large (in the thousands) and this makes the model moderately expensive to compute. - -To fit a random forest model on the training set, let's use the [parsnip](https://parsnip.tidymodels.org/) package with the [ranger](https://cran.r-project.org/package=ranger) engine. We first define the model that we want to create: - -```{r rf-def} -rf_mod <- - rand_forest(trees = 1000) %>% - set_engine("ranger") %>% - set_mode("classification") -``` - -Starting with this parsnip model object, the `fit()` function can be used with a model formula. Since random forest models use random numbers, we again set the seed prior to computing: - -```{r rf-fit} -set.seed(234) -rf_fit <- - rf_mod %>% - fit(class ~ ., data = cell_train) -rf_fit -``` - -This new `rf_fit` object is our fitted model, trained on our training data set. - - -## Estimating performance {#performance} - -During a modeling project, we might create a variety of different models. To choose between them, we need to consider how well these models do, as measured by some performance statistics. In our example in this article, some options we could use are: - - * the area under the Receiver Operating Characteristic (ROC) curve, and - - * overall classification accuracy. - -The ROC curve uses the class probability estimates to give us a sense of performance across the entire set of potential probability cutoffs. Overall accuracy uses the hard class predictions to measure performance. The hard class predictions tell us whether our model predicted `PS` or `WS` for each cell. But, behind those predictions, the model is actually estimating a probability. A simple 50% probability cutoff is used to categorize a cell as poorly segmented. - -The [yardstick package](https://yardstick.tidymodels.org/) has functions for computing both of these measures called `roc_auc()` and `accuracy()`. - -At first glance, it might seem like a good idea to use the training set data to compute these statistics. (This is actually a very bad idea.) Let's see what happens if we try this. To evaluate performance based on the training set, we call the `predict()` method to get both types of predictions (i.e. probabilities and hard class predictions). - -```{r rf-train-pred} -rf_training_pred <- - predict(rf_fit, cell_train) %>% - bind_cols(predict(rf_fit, cell_train, type = "prob")) %>% - # Add the true outcome data back in - bind_cols(cell_train %>% - select(class)) -``` - -Using the yardstick functions, this model has spectacular results, so spectacular that you might be starting to get suspicious: - -```{r rf-train-perf} -rf_training_pred %>% # training set predictions - roc_auc(truth = class, .pred_PS) -rf_training_pred %>% # training set predictions - accuracy(truth = class, .pred_class) -``` - -Now that we have this model with exceptional performance, we proceed to the test set. Unfortunately, we discover that, although our results aren't bad, they are certainly worse than what we initially thought based on predicting the training set: - -```{r rf-test} -rf_testing_pred <- - predict(rf_fit, cell_test) %>% - bind_cols(predict(rf_fit, cell_test, type = "prob")) %>% - bind_cols(cell_test %>% select(class)) -``` - -```{r rf-test-perf} -rf_testing_pred %>% # test set predictions - roc_auc(truth = class, .pred_PS) -rf_testing_pred %>% # test set predictions - accuracy(truth = class, .pred_class) -``` - -### What happened here? - -There are several reasons why training set statistics like the ones shown in this section can be unrealistically optimistic: - - * Models like random forests, neural networks, and other black-box methods can essentially memorize the training set. Re-predicting that same set should always result in nearly perfect results. - -* The training set does not have the capacity to be a good arbiter of performance. It is not an independent piece of information; predicting the training set can only reflect what the model already knows. - -To understand that second point better, think about an analogy from teaching. Suppose you give a class a test, then give them the answers, then provide the same test. The student scores on the _second_ test do not accurately reflect what they know about the subject; these scores would probably be higher than their results on the first test. - - - -## Resampling to the rescue {#resampling} - -Resampling methods, such as cross-validation and the bootstrap, are empirical simulation systems. They create a series of data sets similar to the training/testing split discussed previously; a subset of the data are used for creating the model and a different subset is used to measure performance. Resampling is always used with the _training set_. This schematic from [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/resampling.html) illustrates data usage for resampling methods: - -```{r resampling-fig, echo = FALSE, fig.align='center', out.width="85%"} -knitr::include_graphics("img/resampling.svg") -``` - -In the first level of this diagram, you see what happens when you use `rsample::initial_split()`, which splits the original data into training and test sets. Then, the training set is chosen for resampling, and the test set is held out. - -Let's use 10-fold cross-validation (CV) in this example. This method randomly allocates the `r nrow(cell_train)` cells in the training set to 10 groups of roughly equal size, called "folds". For the first iteration of resampling, the first fold of about `r floor(nrow(cell_train)/10)` cells are held out for the purpose of measuring performance. This is similar to a test set but, to avoid confusion, we call these data the _assessment set_ in the tidymodels framework. - -The other 90% of the data (about `r floor(nrow(cell_train) * .9)` cells) are used to fit the model. Again, this sounds similar to a training set, so in tidymodels we call this data the _analysis set_. This model, trained on the analysis set, is applied to the assessment set to generate predictions, and performance statistics are computed based on those predictions. - -In this example, 10-fold CV moves iteratively through the folds and leaves a different 10% out each time for model assessment. At the end of this process, there are 10 sets of performance statistics that were created on 10 data sets that were not used in the modeling process. For the cell example, this means 10 accuracies and 10 areas under the ROC curve. While 10 models were created, these are not used further; we do not keep the models themselves trained on these folds because their only purpose is calculating performance metrics. - -```{r rf-rs, include = FALSE} -set.seed(345) -folds <- vfold_cv(cell_train) - -rf_wf <- - workflow() %>% - add_model(rf_mod) %>% - add_formula(class ~ .) - -set.seed(456) -rf_fit_rs <- fit_resamples(rf_wf, folds) - -assessment_size <- - folds %>% - tidy() %>% - group_by(Fold, Data) %>% - count() %>% - ungroup() %>% - filter(Data == "Assessment") %>% - select(`assessment size` = n, id = Fold) - -assessment_stats <- - collect_metrics(rf_fit_rs, summarize = FALSE) %>% - select(id, .estimate, .metric) %>% - pivot_wider( - id_cols = c(id), - names_from = c(.metric), - values_from = c(.estimate) - ) %>% - full_join(assessment_size, by = "id") %>% - dplyr::rename(resample = id) - -rs_stats <- collect_metrics(rf_fit_rs) -``` - -The final resampling estimates for the model are the **averages** of the performance statistics replicates. For example, suppose for our data the results were: - -```{r rs-table, echo = FALSE, results = "asis"} -assessment_stats %>% - kable() %>% - kable_styling(full_width = FALSE) -``` - -From these resampling statistics, the final estimate of performance for this random forest model would be `r round(rs_stats$mean[rs_stats$.metric == "roc_auc"], 3)` for the area under the ROC curve and `r round(rs_stats$mean[rs_stats$.metric == "accuracy"], 3)` for accuracy. - -These resampling statistics are an effective method for measuring model performance _without_ predicting the training set directly as a whole. - -## Fit a model with resampling {#fit-resamples} - -To generate these results, the first step is to create a resampling object using rsample. There are [several resampling methods](https://rsample.tidymodels.org/reference/index.html#section-resampling-methods) implemented in rsample; cross-validation folds can be created using `vfold_cv()`: - -```{r folds} -set.seed(345) -folds <- vfold_cv(cell_train, v = 10) -folds -``` - -The list column for `splits` contains the information on which rows belong in the analysis and assessment sets. There are functions that can be used to extract the individual resampled data called `analysis()` and `assessment()`. - -However, the tune package contains high-level functions that can do the required computations to resample a model for the purpose of measuring performance. You have several options for building an object for resampling: - -+ Resample a model specification preprocessed with a formula or [recipe](/start/recipes/), or - -+ Resample a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and formula/recipe. - -For this example, let's use a `workflow()` that bundles together the random forest model and a formula, since we are not using a recipe. Whichever of these options you use, the syntax to `fit_resamples()` is very similar to `fit()`: - -```{r rs, eval = FALSE} -rf_wf <- - workflow() %>% - add_model(rf_mod) %>% - add_formula(class ~ .) - -set.seed(456) -rf_fit_rs <- - rf_wf %>% - fit_resamples(folds) -``` - -```{r rs-show} -rf_fit_rs -``` - -The results are similar to the `folds` results with some extra columns. The column `.metrics` contains the performance statistics created from the 10 assessment sets. These can be manually unnested but the tune package contains a number of simple functions that can extract these data: - -```{r metrics} -collect_metrics(rf_fit_rs) -``` - -Think about these values we now have for accuracy and AUC. These performance metrics are now more realistic (i.e. lower) than our ill-advised first attempt at computing performance metrics in the section above. If we wanted to try different model types for this data set, we could more confidently compare performance metrics computed using resampling to choose between models. Also, remember that at the end of our project, we return to our test set to estimate final model performance. We have looked at this once already before we started using resampling, but let's remind ourselves of the results: - -```{r ref.label='rf-test-perf'} - -``` - -The performance metrics from the test set are much closer to the performance metrics computed using resampling than our first ("bad idea") attempt. Resampling allows us to simulate how well our model will perform on new data, and the test set acts as the final, unbiased check for our model's performance. - - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/start/resampling/index.markdown b/content/start/resampling/index.markdown deleted file mode 100644 index 84f17e46..00000000 --- a/content/start/resampling/index.markdown +++ /dev/null @@ -1,527 +0,0 @@ ---- -title: "Evaluate your model with resampling" -weight: 3 -tags: [rsample, parsnip, tune, workflows, yardstick] -categories: [resampling] -description: | - Measure model performance by generating different versions of the training data through resampling. ---- - - - - - - - - - -## Introduction {#intro} - -So far, we have [built a model](/start/models/) and [preprocessed data with a recipe](/start/recipes/). We also introduced [workflows](/start/recipes/#fit-workflow) as a way to bundle a [parsnip model](https://parsnip.tidymodels.org/) and [recipe](https://recipes.tidymodels.org/) together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on **resampling** statistics. - -To use code in this article, you will need to install the following packages: modeldata, ranger, and tidymodels. - - -```r -library(tidymodels) # for the rsample package, along with the rest of tidymodels - -# Helper packages -library(modeldata) # for the cells data -``` - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - -## The cell image data {#data} - -Let's use data from [Hill, LaPan, Li, and Haney (2007)](http://www.biomedcentral.com/1471-2105/8/340), available in the [modeldata package](https://cran.r-project.org/web/packages/modeldata/index.html), to predict cell image segmentation quality with resampling. To start, we load this data into R: - - -```r -data(cells, package = "modeldata") -cells -#> # A tibble: 2,019 × 58 -#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3 -#> -#> 1 Test PS 143. 185 15.7 4.95 9.55 -#> 2 Train PS 134. 819 31.9 207. 69.9 -#> 3 Train WS 107. 431 28.0 116. 63.9 -#> 4 Train PS 69.2 298 19.5 102. 28.2 -#> 5 Test PS 2.89 285 24.3 112. 20.5 -#> # … with 2,014 more rows, and 51 more variables: avg_inten_ch_4 , -#> # convex_hull_area_ratio_ch_1 , convex_hull_perim_ratio_ch_1 , -#> # diff_inten_density_ch_1 , diff_inten_density_ch_3 , … -``` - -We have data for 2019 cells, with 58 variables. The main outcome variable of interest for us here is called `class`, which you can see is a factor. But before we jump into predicting the `class` variable, we need to understand it better. Below is a brief primer on cell image segmentation. - -### Predicting image segmentation quality - -Some biologists conduct experiments on cells. In drug discovery, a particular type of cell can be treated with either a drug or control and then observed to see what the effect is (if any). A common approach for this kind of measurement is cell imaging. Different parts of the cells can be colored so that the locations of a cell can be determined. - -For example, in top panel of this image of five cells, the green color is meant to define the boundary of the cell (coloring something called the cytoskeleton) while the blue color defines the nucleus of the cell. - - - -Using these colors, the cells in an image can be _segmented_ so that we know which pixels belong to which cell. If this is done well, the cell can be measured in different ways that are important to the biology. Sometimes the shape of the cell matters and different mathematical tools are used to summarize characteristics like the size or "oblongness" of the cell. - -The bottom panel shows some segmentation results. Cells 1 and 5 are fairly well segmented. However, cells 2 to 4 are bunched up together because the segmentation was not very good. The consequence of bad segmentation is data contamination; when the biologist analyzes the shape or size of these cells, the data are inaccurate and could lead to the wrong conclusion. - -A cell-based experiment might involve millions of cells so it is unfeasible to visually assess them all. Instead, a subsample can be created and these cells can be manually labeled by experts as either poorly segmented (`PS`) or well-segmented (`WS`). If we can predict these labels accurately, the larger data set can be improved by filtering out the cells most likely to be poorly segmented. - -### Back to the cells data - -The `cells` data has `class` labels for 2019 cells — each cell is labeled as either poorly segmented (`PS`) or well-segmented (`WS`). Each also has a total of 56 predictors based on automated image analysis measurements. For example, `avg_inten_ch_1` is the mean intensity of the data contained in the nucleus, `area_ch_1` is the total size of the cell, and so on (some predictors are fairly arcane in nature). - - -```r -cells -#> # A tibble: 2,019 × 58 -#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3 -#> -#> 1 Test PS 143. 185 15.7 4.95 9.55 -#> 2 Train PS 134. 819 31.9 207. 69.9 -#> 3 Train WS 107. 431 28.0 116. 63.9 -#> 4 Train PS 69.2 298 19.5 102. 28.2 -#> 5 Test PS 2.89 285 24.3 112. 20.5 -#> # … with 2,014 more rows, and 51 more variables: avg_inten_ch_4 , -#> # convex_hull_area_ratio_ch_1 , convex_hull_perim_ratio_ch_1 , -#> # diff_inten_density_ch_1 , diff_inten_density_ch_3 , … -``` - -The rates of the classes are somewhat imbalanced; there are more poorly segmented cells than well-segmented cells: - - -```r -cells %>% - count(class) %>% - mutate(prop = n/sum(n)) -#> # A tibble: 2 × 3 -#> class n prop -#> -#> 1 PS 1300 0.644 -#> 2 WS 719 0.356 -``` - -## Data splitting {#data-split} - -In our previous [*Preprocess your data with recipes*](/start/recipes/#data-split) article, we started by splitting our data. It is common when beginning a modeling project to [separate the data set](https://bookdown.org/max/FES/data-splitting.html) into two partitions: - - * The _training set_ is used to estimate parameters, compare models and feature engineering techniques, tune models, etc. - - * The _test set_ is held in reserve until the end of the project, at which point there should only be one or two models under serious consideration. It is used as an unbiased source for measuring final model performance. - -There are different ways to create these partitions of the data. The most common approach is to use a random sample. Suppose that one quarter of the data were reserved for the test set. Random sampling would randomly select 25% for the test set and use the remainder for the training set. We can use the [rsample](https://rsample.tidymodels.org/) package for this purpose. - -Since random sampling uses random numbers, it is important to set the random number seed. This ensures that the random numbers can be reproduced at a later time (if needed). - -The function `rsample::initial_split()` takes the original data and saves the information on how to make the partitions. In the original analysis, the authors made their own training/test set and that information is contained in the column `case`. To demonstrate how to make a split, we'll remove this column before we make our own split: - - -```r -set.seed(123) -cell_split <- initial_split(cells %>% select(-case), - strata = class) -``` - -Here we used the [`strata` argument](https://rsample.tidymodels.org/reference/initial_split.html), which conducts a stratified split. This ensures that, despite the imbalance we noticed in our `class` variable, our training and test data sets will keep roughly the same proportions of poorly and well-segmented cells as in the original data. After the `initial_split`, the `training()` and `testing()` functions return the actual data sets. - - -```r -cell_train <- training(cell_split) -cell_test <- testing(cell_split) - -nrow(cell_train) -#> [1] 1514 -nrow(cell_train)/nrow(cells) -#> [1] 0.7498762 - -# training set proportions by class -cell_train %>% - count(class) %>% - mutate(prop = n/sum(n)) -#> # A tibble: 2 × 3 -#> class n prop -#> -#> 1 PS 975 0.644 -#> 2 WS 539 0.356 - -# test set proportions by class -cell_test %>% - count(class) %>% - mutate(prop = n/sum(n)) -#> # A tibble: 2 × 3 -#> class n prop -#> -#> 1 PS 325 0.644 -#> 2 WS 180 0.356 -``` - -The majority of the modeling work is then conducted on the training set data. - - -## Modeling - -[Random forest models](https://en.wikipedia.org/wiki/Random_forest) are [ensembles](https://en.wikipedia.org/wiki/Ensemble_learning) of [decision trees](https://en.wikipedia.org/wiki/Decision_tree). A large number of decision tree models are created for the ensemble based on slightly different versions of the training set. When creating the individual decision trees, the fitting process encourages them to be as diverse as possible. The collection of trees are combined into the random forest model and, when a new sample is predicted, the votes from each tree are used to calculate the final predicted value for the new sample. For categorical outcome variables like `class` in our `cells` data example, the majority vote across all the trees in the random forest determines the predicted class for the new sample. - -One of the benefits of a random forest model is that it is very low maintenance; it requires very little preprocessing of the data and the default parameters tend to give reasonable results. For that reason, we won't create a recipe for the `cells` data. - -At the same time, the number of trees in the ensemble should be large (in the thousands) and this makes the model moderately expensive to compute. - -To fit a random forest model on the training set, let's use the [parsnip](https://parsnip.tidymodels.org/) package with the [ranger](https://cran.r-project.org/package=ranger) engine. We first define the model that we want to create: - - -```r -rf_mod <- - rand_forest(trees = 1000) %>% - set_engine("ranger") %>% - set_mode("classification") -``` - -Starting with this parsnip model object, the `fit()` function can be used with a model formula. Since random forest models use random numbers, we again set the seed prior to computing: - - -```r -set.seed(234) -rf_fit <- - rf_mod %>% - fit(class ~ ., data = cell_train) -rf_fit -#> parsnip model object -#> -#> Ranger result -#> -#> Call: -#> ranger::ranger(x = maybe_data_frame(x), y = y, num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE) -#> -#> Type: Probability estimation -#> Number of trees: 1000 -#> Sample size: 1514 -#> Number of independent variables: 56 -#> Mtry: 7 -#> Target node size: 10 -#> Variable importance mode: none -#> Splitrule: gini -#> OOB prediction error (Brier s.): 0.1189338 -``` - -This new `rf_fit` object is our fitted model, trained on our training data set. - - -## Estimating performance {#performance} - -During a modeling project, we might create a variety of different models. To choose between them, we need to consider how well these models do, as measured by some performance statistics. In our example in this article, some options we could use are: - - * the area under the Receiver Operating Characteristic (ROC) curve, and - - * overall classification accuracy. - -The ROC curve uses the class probability estimates to give us a sense of performance across the entire set of potential probability cutoffs. Overall accuracy uses the hard class predictions to measure performance. The hard class predictions tell us whether our model predicted `PS` or `WS` for each cell. But, behind those predictions, the model is actually estimating a probability. A simple 50% probability cutoff is used to categorize a cell as poorly segmented. - -The [yardstick package](https://yardstick.tidymodels.org/) has functions for computing both of these measures called `roc_auc()` and `accuracy()`. - -At first glance, it might seem like a good idea to use the training set data to compute these statistics. (This is actually a very bad idea.) Let's see what happens if we try this. To evaluate performance based on the training set, we call the `predict()` method to get both types of predictions (i.e. probabilities and hard class predictions). - - -```r -rf_training_pred <- - predict(rf_fit, cell_train) %>% - bind_cols(predict(rf_fit, cell_train, type = "prob")) %>% - # Add the true outcome data back in - bind_cols(cell_train %>% - select(class)) -``` - -Using the yardstick functions, this model has spectacular results, so spectacular that you might be starting to get suspicious: - - -```r -rf_training_pred %>% # training set predictions - roc_auc(truth = class, .pred_PS) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 roc_auc binary 1.00 -rf_training_pred %>% # training set predictions - accuracy(truth = class, .pred_class) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 accuracy binary 0.991 -``` - -Now that we have this model with exceptional performance, we proceed to the test set. Unfortunately, we discover that, although our results aren't bad, they are certainly worse than what we initially thought based on predicting the training set: - - -```r -rf_testing_pred <- - predict(rf_fit, cell_test) %>% - bind_cols(predict(rf_fit, cell_test, type = "prob")) %>% - bind_cols(cell_test %>% select(class)) -``` - - -```r -rf_testing_pred %>% # test set predictions - roc_auc(truth = class, .pred_PS) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 roc_auc binary 0.891 -rf_testing_pred %>% # test set predictions - accuracy(truth = class, .pred_class) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 accuracy binary 0.816 -``` - -### What happened here? - -There are several reasons why training set statistics like the ones shown in this section can be unrealistically optimistic: - - * Models like random forests, neural networks, and other black-box methods can essentially memorize the training set. Re-predicting that same set should always result in nearly perfect results. - -* The training set does not have the capacity to be a good arbiter of performance. It is not an independent piece of information; predicting the training set can only reflect what the model already knows. - -To understand that second point better, think about an analogy from teaching. Suppose you give a class a test, then give them the answers, then provide the same test. The student scores on the _second_ test do not accurately reflect what they know about the subject; these scores would probably be higher than their results on the first test. - - - -## Resampling to the rescue {#resampling} - -Resampling methods, such as cross-validation and the bootstrap, are empirical simulation systems. They create a series of data sets similar to the training/testing split discussed previously; a subset of the data are used for creating the model and a different subset is used to measure performance. Resampling is always used with the _training set_. This schematic from [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/resampling.html) illustrates data usage for resampling methods: - - - -In the first level of this diagram, you see what happens when you use `rsample::initial_split()`, which splits the original data into training and test sets. Then, the training set is chosen for resampling, and the test set is held out. - -Let's use 10-fold cross-validation (CV) in this example. This method randomly allocates the 1514 cells in the training set to 10 groups of roughly equal size, called "folds". For the first iteration of resampling, the first fold of about 151 cells are held out for the purpose of measuring performance. This is similar to a test set but, to avoid confusion, we call these data the _assessment set_ in the tidymodels framework. - -The other 90% of the data (about 1362 cells) are used to fit the model. Again, this sounds similar to a training set, so in tidymodels we call this data the _analysis set_. This model, trained on the analysis set, is applied to the assessment set to generate predictions, and performance statistics are computed based on those predictions. - -In this example, 10-fold CV moves iteratively through the folds and leaves a different 10% out each time for model assessment. At the end of this process, there are 10 sets of performance statistics that were created on 10 data sets that were not used in the modeling process. For the cell example, this means 10 accuracies and 10 areas under the ROC curve. While 10 models were created, these are not used further; we do not keep the models themselves trained on these folds because their only purpose is calculating performance metrics. - - - -The final resampling estimates for the model are the **averages** of the performance statistics replicates. For example, suppose for our data the results were: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    resample accuracy roc_auc assessment size
    Fold01 0.8289474 0.8937128 152
    Fold02 0.7697368 0.8768989 152
    Fold03 0.8552632 0.9017666 152
    Fold04 0.8552632 0.8928076 152
    Fold05 0.7947020 0.8816342 151
    Fold06 0.8476821 0.9244306 151
    Fold07 0.8145695 0.8960339 151
    Fold08 0.8543046 0.9267677 151
    Fold09 0.8543046 0.9231392 151
    Fold10 0.8476821 0.9266917 151
    - -From these resampling statistics, the final estimate of performance for this random forest model would be 0.904 for the area under the ROC curve and 0.832 for accuracy. - -These resampling statistics are an effective method for measuring model performance _without_ predicting the training set directly as a whole. - -## Fit a model with resampling {#fit-resamples} - -To generate these results, the first step is to create a resampling object using rsample. There are [several resampling methods](https://rsample.tidymodels.org/reference/index.html#section-resampling-methods) implemented in rsample; cross-validation folds can be created using `vfold_cv()`: - - -```r -set.seed(345) -folds <- vfold_cv(cell_train, v = 10) -folds -#> # 10-fold cross-validation -#> # A tibble: 10 × 2 -#> splits id -#> -#> 1 Fold01 -#> 2 Fold02 -#> 3 Fold03 -#> 4 Fold04 -#> 5 Fold05 -#> 6 Fold06 -#> 7 Fold07 -#> 8 Fold08 -#> 9 Fold09 -#> 10 Fold10 -``` - -The list column for `splits` contains the information on which rows belong in the analysis and assessment sets. There are functions that can be used to extract the individual resampled data called `analysis()` and `assessment()`. - -However, the tune package contains high-level functions that can do the required computations to resample a model for the purpose of measuring performance. You have several options for building an object for resampling: - -+ Resample a model specification preprocessed with a formula or [recipe](/start/recipes/), or - -+ Resample a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and formula/recipe. - -For this example, let's use a `workflow()` that bundles together the random forest model and a formula, since we are not using a recipe. Whichever of these options you use, the syntax to `fit_resamples()` is very similar to `fit()`: - - -```r -rf_wf <- - workflow() %>% - add_model(rf_mod) %>% - add_formula(class ~ .) - -set.seed(456) -rf_fit_rs <- - rf_wf %>% - fit_resamples(folds) -``` - - -```r -rf_fit_rs -#> # Resampling results -#> # 10-fold cross-validation -#> # A tibble: 10 × 4 -#> splits id .metrics .notes -#> -#> 1 Fold01 -#> 2 Fold02 -#> 3 Fold03 -#> 4 Fold04 -#> 5 Fold05 -#> 6 Fold06 -#> 7 Fold07 -#> 8 Fold08 -#> 9 Fold09 -#> 10 Fold10 -``` - -The results are similar to the `folds` results with some extra columns. The column `.metrics` contains the performance statistics created from the 10 assessment sets. These can be manually unnested but the tune package contains a number of simple functions that can extract these data: - - -```r -collect_metrics(rf_fit_rs) -#> # A tibble: 2 × 6 -#> .metric .estimator mean n std_err .config -#> -#> 1 accuracy binary 0.832 10 0.00952 Preprocessor1_Model1 -#> 2 roc_auc binary 0.904 10 0.00610 Preprocessor1_Model1 -``` - -Think about these values we now have for accuracy and AUC. These performance metrics are now more realistic (i.e. lower) than our ill-advised first attempt at computing performance metrics in the section above. If we wanted to try different model types for this data set, we could more confidently compare performance metrics computed using resampling to choose between models. Also, remember that at the end of our project, we return to our test set to estimate final model performance. We have looked at this once already before we started using resampling, but let's remind ourselves of the results: - - -```r -rf_testing_pred %>% # test set predictions - roc_auc(truth = class, .pred_PS) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 roc_auc binary 0.891 -rf_testing_pred %>% # test set predictions - accuracy(truth = class, .pred_class) -#> # A tibble: 1 × 3 -#> .metric .estimator .estimate -#> -#> 1 accuracy binary 0.816 -``` - -The performance metrics from the test set are much closer to the performance metrics computed using resampling than our first ("bad idea") attempt. Resampling allows us to simulate how well our model will perform on new data, and the test set acts as the final, unbiased check for our model's performance. - - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> modeldata * 1.0.1 2022-09-06 [1] CRAN (R 4.2.0) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> ranger * 0.14.1 2022-06-18 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/content/start/resampling/index_files/kePrint/kePrint.js b/content/start/resampling/index_files/kePrint/kePrint.js deleted file mode 100644 index e6fbbfc4..00000000 --- a/content/start/resampling/index_files/kePrint/kePrint.js +++ /dev/null @@ -1,8 +0,0 @@ -$(document).ready(function(){ - if (typeof $('[data-toggle="tooltip"]').tooltip === 'function') { - $('[data-toggle="tooltip"]').tooltip(); - } - if ($('[data-toggle="popover"]').popover === 'function') { - $('[data-toggle="popover"]').popover(); - } -}); diff --git a/content/start/resampling/index_files/lightable/lightable.css b/content/start/resampling/index_files/lightable/lightable.css deleted file mode 100644 index 3be3be90..00000000 --- a/content/start/resampling/index_files/lightable/lightable.css +++ /dev/null @@ -1,272 +0,0 @@ -/*! - * lightable v0.0.1 - * Copyright 2020 Hao Zhu - * Licensed under MIT (https://github.com/haozhu233/kableExtra/blob/master/LICENSE) - */ - -.lightable-minimal { - border-collapse: separate; - border-spacing: 16px 1px; - width: 100%; - margin-bottom: 10px; -} - -.lightable-minimal td { - margin-left: 5px; - margin-right: 5px; -} - -.lightable-minimal th { - margin-left: 5px; - margin-right: 5px; -} - -.lightable-minimal thead tr:last-child th { - border-bottom: 2px solid #00000050; - empty-cells: hide; - -} - -.lightable-minimal tbody tr:first-child td { - padding-top: 0.5em; -} - -.lightable-minimal.lightable-hover tbody tr:hover { - background-color: #f5f5f5; -} - -.lightable-minimal.lightable-striped tbody tr:nth-child(even) { - background-color: #f5f5f5; -} - -.lightable-classic { - border-top: 0.16em solid #111111; - border-bottom: 0.16em solid #111111; - width: 100%; - margin-bottom: 10px; - margin: 10px 5px; -} - -.lightable-classic tfoot tr td { - border: 0; -} - -.lightable-classic tfoot tr:first-child td { - border-top: 0.14em solid #111111; -} - -.lightable-classic caption { - color: #222222; -} - -.lightable-classic td { - padding-left: 5px; - padding-right: 5px; - color: #222222; -} - -.lightable-classic th { - padding-left: 5px; - padding-right: 5px; - font-weight: normal; - color: #222222; -} - -.lightable-classic thead tr:last-child th { - border-bottom: 0.10em solid #111111; -} - -.lightable-classic.lightable-hover tbody tr:hover { - background-color: #F9EEC1; -} - -.lightable-classic.lightable-striped tbody tr:nth-child(even) { - background-color: #f5f5f5; -} - -.lightable-classic-2 { - border-top: 3px double #111111; - border-bottom: 3px double #111111; - width: 100%; - margin-bottom: 10px; -} - -.lightable-classic-2 tfoot tr td { - border: 0; -} - -.lightable-classic-2 tfoot tr:first-child td { - border-top: 3px double #111111; -} - -.lightable-classic-2 caption { - color: #222222; -} - -.lightable-classic-2 td { - padding-left: 5px; - padding-right: 5px; - color: #222222; -} - -.lightable-classic-2 th { - padding-left: 5px; - padding-right: 5px; - font-weight: normal; - color: #222222; -} - -.lightable-classic-2 tbody tr:last-child td { - border-bottom: 3px double #111111; -} - -.lightable-classic-2 thead tr:last-child th { - border-bottom: 1px solid #111111; -} - -.lightable-classic-2.lightable-hover tbody tr:hover { - background-color: #F9EEC1; -} - -.lightable-classic-2.lightable-striped tbody tr:nth-child(even) { - background-color: #f5f5f5; -} - -.lightable-material { - min-width: 100%; - white-space: nowrap; - table-layout: fixed; - font-family: Roboto, sans-serif; - border: 1px solid #EEE; - border-collapse: collapse; - margin-bottom: 10px; -} - -.lightable-material tfoot tr td { - border: 0; -} - -.lightable-material tfoot tr:first-child td { - border-top: 1px solid #EEE; -} - -.lightable-material th { - height: 56px; - padding-left: 16px; - padding-right: 16px; -} - -.lightable-material td { - height: 52px; - padding-left: 16px; - padding-right: 16px; - border-top: 1px solid #eeeeee; -} - -.lightable-material.lightable-hover tbody tr:hover { - background-color: #f5f5f5; -} - -.lightable-material.lightable-striped tbody tr:nth-child(even) { - background-color: #f5f5f5; -} - -.lightable-material.lightable-striped tbody td { - border: 0; -} - -.lightable-material.lightable-striped thead tr:last-child th { - border-bottom: 1px solid #ddd; -} - -.lightable-material-dark { - min-width: 100%; - white-space: nowrap; - table-layout: fixed; - font-family: Roboto, sans-serif; - border: 1px solid #FFFFFF12; - border-collapse: collapse; - margin-bottom: 10px; - background-color: #363640; -} - -.lightable-material-dark tfoot tr td { - border: 0; -} - -.lightable-material-dark tfoot tr:first-child td { - border-top: 1px solid #FFFFFF12; -} - -.lightable-material-dark th { - height: 56px; - padding-left: 16px; - padding-right: 16px; - color: #FFFFFF60; -} - -.lightable-material-dark td { - height: 52px; - padding-left: 16px; - padding-right: 16px; - color: #FFFFFF; - border-top: 1px solid #FFFFFF12; -} - -.lightable-material-dark.lightable-hover tbody tr:hover { - background-color: #FFFFFF12; -} - -.lightable-material-dark.lightable-striped tbody tr:nth-child(even) { - background-color: #FFFFFF12; -} - -.lightable-material-dark.lightable-striped tbody td { - border: 0; -} - -.lightable-material-dark.lightable-striped thead tr:last-child th { - border-bottom: 1px solid #FFFFFF12; -} - -.lightable-paper { - width: 100%; - margin-bottom: 10px; - color: #444; -} - -.lightable-paper tfoot tr td { - border: 0; -} - -.lightable-paper tfoot tr:first-child td { - border-top: 1px solid #00000020; -} - -.lightable-paper thead tr:last-child th { - color: #666; - vertical-align: bottom; - border-bottom: 1px solid #00000020; - line-height: 1.15em; - padding: 10px 5px; -} - -.lightable-paper td { - vertical-align: middle; - border-bottom: 1px solid #00000010; - line-height: 1.15em; - padding: 7px 5px; -} - -.lightable-paper.lightable-hover tbody tr:hover { - background-color: #F9EEC1; -} - -.lightable-paper.lightable-striped tbody tr:nth-child(even) { - background-color: #00000008; -} - -.lightable-paper.lightable-striped tbody td { - border: 0; -} - diff --git a/content/start/tuning/figs/best-rf-1.svg b/content/start/tuning/figs/best-rf-1.svg deleted file mode 100644 index 6dfad785..00000000 --- a/content/start/tuning/figs/best-rf-1.svg +++ /dev/null @@ -1,590 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/content/start/tuning/index.Rmarkdown b/content/start/tuning/index.Rmarkdown deleted file mode 100755 index 8af651a9..00000000 --- a/content/start/tuning/index.Rmarkdown +++ /dev/null @@ -1,266 +0,0 @@ ---- -title: "Tune model parameters" -weight: 4 -tags: [rsample, parsnip, tune, dials, workflows, yardstick] -categories: [tuning] -description: | - Estimate the best values for hyperparameters that cannot be learned directly during model training. ---- - -```{r setup, include = FALSE, message = FALSE, warning = FALSE} -source(here::here("content/start/common.R")) -``` - -```{r load, include = FALSE, message = FALSE, warning = FALSE} -library(tidymodels) -library(rpart) -library(rpart.plot) -library(kableExtra) -library(vip) -theme_set(theme_bw()) -doParallel::registerDoParallel() -pkgs <- c("tidymodels", "rpart", "rpart.plot", "vip") -``` - - -## Introduction {#intro} - -Some model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called **hyperparameters**. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this `mtry` in tidymodels) or the learning rate in a boosted tree model (we call this `learn_rate`). Instead of learning these kinds of hyperparameters during model training, we can _estimate_ the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called **tuning**. - -`r article_req_pkgs(pkgs)` - -```{r eval=FALSE} -library(tidymodels) # for the tune package, along with the rest of tidymodels - -# Helper packages -library(rpart.plot) # for visualizing a decision tree -library(vip) # for variable importance plots -``` - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - -## The cell image data, revisited {#data} - -In our previous [*Evaluate your model with resampling*](/start/resampling/) article, we introduced a data set of images of cells that were labeled by experts as well-segmented (`WS`) or poorly segmented (`PS`). We trained a [random forest model](/start/resampling/#modeling) to predict which images are segmented well vs. poorly, so that a biologist could filter out poorly segmented cell images in their analysis. We used [resampling](/start/resampling/#resampling) to estimate the performance of our model on this data. - -```{r cell-import, R.options = list(tibble.print_min = 5, tibble.max_extra_cols = 5)} -data(cells, package = "modeldata") -cells -``` - -## Predicting image segmentation, but better {#why-tune} - -Random forest models are a tree-based ensemble method, and typically perform well with [default hyperparameters](https://bradleyboehmke.github.io/HOML/random-forest.html#out-of-the-box-performance). However, the accuracy of some other tree-based models, such as [boosted tree models](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) or [decision tree models](https://en.wikipedia.org/wiki/Decision_tree), can be sensitive to the values of hyperparameters. In this article, we will train a **decision tree** model. There are several hyperparameters for decision tree models that can be tuned for better performance. Let's explore: - -- the complexity parameter (which we call `cost_complexity` in tidymodels) for the tree, and -- the maximum `tree_depth`. - -Tuning these hyperparameters can improve model performance because decision tree models are prone to [overfitting](https://bookdown.org/max/FES/important-concepts.html#overfitting). This happens because single tree models tend to fit the training data _too well_ — so well, in fact, that they over-learn patterns present in the training data that end up being detrimental when predicting new data. - -We will tune the model hyperparameters to avoid overfitting. Tuning the value of `cost_complexity` helps by [pruning](https://bradleyboehmke.github.io/HOML/DT.html#pruning) back our tree. It adds a cost, or penalty, to error rates of more complex trees; a cost closer to zero decreases the number tree nodes pruned and is more likely to result in an overfit tree. However, a high cost increases the number of tree nodes pruned and can result in the opposite problem—an underfit tree. Tuning `tree_depth`, on the other hand, helps by [stopping](https://bradleyboehmke.github.io/HOML/DT.html#early-stopping) our tree from growing after it reaches a certain depth. We want to tune these hyperparameters to find what those two values should be for our model to do the best job predicting image segmentation. - -Before we start the tuning process, we split our data into training and testing sets, just like when we trained the model with one default set of hyperparameters. As [before](/start/resampling/), we can use `strata = class` if we want our training and testing sets to be created using stratified sampling so that both have the same proportion of both kinds of segmentation. - -```{r cell-split} -set.seed(123) -cell_split <- initial_split(cells %>% select(-case), - strata = class) -cell_train <- training(cell_split) -cell_test <- testing(cell_split) -``` - -We use the training data for tuning the model. - -## Tuning hyperparameters {#tuning} - -Let’s start with the parsnip package, using a [`decision_tree()`](https://parsnip.tidymodels.org/reference/decision_tree.html) model with the [rpart](https://cran.r-project.org/web/packages/rpart/index.html) engine. To tune the decision tree hyperparameters `cost_complexity` and `tree_depth`, we create a model specification that identifies which hyperparameters we plan to tune. - -```{r tune-spec} -tune_spec <- - decision_tree( - cost_complexity = tune(), - tree_depth = tune() - ) %>% - set_engine("rpart") %>% - set_mode("classification") - -tune_spec -``` - -Think of `tune()` here as a placeholder. After the tuning process, we will select a single numeric value for each of these hyperparameters. For now, we specify our parsnip model object and identify the hyperparameters we will `tune()`. - -We can't train this specification on a single data set (such as the entire training set) and learn what the hyperparameter values should be, but we _can_ train many models using resampled data and see which models turn out best. We can create a regular grid of values to try using some convenience functions for each hyperparameter: - -```{r tree-grid} -tree_grid <- grid_regular(cost_complexity(), - tree_depth(), - levels = 5) -``` - -The function [`grid_regular()`](https://dials.tidymodels.org/reference/grid_regular.html) is from the [dials](https://dials.tidymodels.org/) package. It chooses sensible values to try for each hyperparameter; here, we asked for 5 of each. Since we have two to tune, `grid_regular()` returns 5 $\times$ 5 = 25 different possible tuning combinations to try in a tidy tibble format. - -```{r tree-grid-tibble} -tree_grid -``` - -Here, you can see all 5 values of `cost_complexity` ranging up to `r max(tree_grid$cost_complexity)`. These values get repeated for each of the 5 values of `tree_depth`: - -```{r} -tree_grid %>% - count(tree_depth) -``` - - -Armed with our grid filled with 25 candidate decision tree models, let's create [cross-validation folds](/start/resampling/) for tuning: - -```{r cell-folds} -set.seed(234) -cell_folds <- vfold_cv(cell_train) -``` - -Tuning in tidymodels requires a resampled object created with the [rsample](https://rsample.tidymodels.org/) package. - -## Model tuning with a grid {#tune-grid} - -We are ready to tune! Let's use [`tune_grid()`](https://tune.tidymodels.org/reference/tune_grid.html) to fit models at all the different values we chose for each tuned hyperparameter. There are several options for building the object for tuning: - -+ Tune a model specification along with a recipe or model, or - -+ Tune a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and a recipe or model preprocessor. - -Here we use a `workflow()` with a straightforward formula; if this model required more involved data preprocessing, we could use `add_recipe()` instead of `add_formula()`. - -```{r tree-res, message=FALSE} -set.seed(345) - -tree_wf <- workflow() %>% - add_model(tune_spec) %>% - add_formula(class ~ .) - -tree_res <- - tree_wf %>% - tune_grid( - resamples = cell_folds, - grid = tree_grid - ) - -tree_res -``` - -Once we have our tuning results, we can both explore them through visualization and then select the best result. The function `collect_metrics()` gives us a tidy tibble with all the results. We had 25 candidate models and two metrics, `accuracy` and `roc_auc`, and we get a row for each `.metric` and model. - -```{r collect-trees} -tree_res %>% - collect_metrics() -``` - -We might get more out of plotting these results: - -```{r best-tree, fig.width=8, fig.height=7} -tree_res %>% - collect_metrics() %>% - mutate(tree_depth = factor(tree_depth)) %>% - ggplot(aes(cost_complexity, mean, color = tree_depth)) + - geom_line(linewidth = 1.5, alpha = 0.6) + - geom_point(size = 2) + - facet_wrap(~ .metric, scales = "free", nrow = 2) + - scale_x_log10(labels = scales::label_number()) + - scale_color_viridis_d(option = "plasma", begin = .9, end = 0) -``` - -We can see that our "stubbiest" tree, with a depth of `r min(tree_grid$tree_depth)`, is the worst model according to both metrics and across all candidate values of `cost_complexity`. Our deepest tree, with a depth of `r max(tree_grid$tree_depth)`, did better. However, the best tree seems to be between these values with a tree depth of 4. The [`show_best()`](https://tune.tidymodels.org/reference/show_best.html) function shows us the top 5 candidate models by default: - -```{r show-best-tree} -tree_res %>% - show_best("accuracy") -``` - -We can also use the [`select_best()`](https://tune.tidymodels.org/reference/show_best.html) function to pull out the single set of hyperparameter values for our best decision tree model: - -```{r select-best-tree} -best_tree <- tree_res %>% - select_best("accuracy") - -best_tree -``` - -These are the values for `tree_depth` and `cost_complexity` that maximize accuracy in this data set of cell images. - - -## Finalizing our model {#final-model} - -We can update (or "finalize") our workflow object `tree_wf` with the values from `select_best()`. - -```{r final-wf} -final_wf <- - tree_wf %>% - finalize_workflow(best_tree) - -final_wf -``` - -Our tuning is done! - -### The last fit - -Finally, let's fit this final model to the training data and use our test data to estimate the model performance we expect to see with new data. We can use the function [`last_fit()`](https://tune.tidymodels.org/reference/last_fit.html) with our finalized model; this function _fits_ the finalized model on the full training data set and _evaluates_ the finalized model on the testing data. - -```{r last-fit} -final_fit <- - final_wf %>% - last_fit(cell_split) - -final_fit %>% - collect_metrics() - -final_fit %>% - collect_predictions() %>% - roc_curve(class, .pred_PS) %>% - autoplot() -``` - -The performance metrics from the test set indicate that we did not overfit during our tuning procedure. - -The `final_fit` object contains a finalized, fitted workflow that you can use for predicting on new data or further understanding the results. You may want to extract this object, using [one of the `extract_` helper functions](https://tune.tidymodels.org/reference/extract-tune.html). - -```{r last-fit-wf} -final_tree <- extract_workflow(final_fit) -final_tree -``` - -We can create a visualization of the decision tree using another helper function to extract the underlying engine-specific fit. - -```{r rpart-plot, fig.width=8, fig.height=5} -final_tree %>% - extract_fit_engine() %>% - rpart.plot(roundint = FALSE) -``` - -Perhaps we would also like to understand what variables are important in this final model. We can use the [vip](https://koalaverse.github.io/vip/) package to estimate variable importance [based on the model's structure](https://koalaverse.github.io/vip/reference/vi_model.html#details). - -```{r vip, fig.width=6, fig.height=5} -library(vip) - -final_tree %>% - extract_fit_parsnip() %>% - vip() -``` - -These are the automated image analysis measurements that are the most important in driving segmentation quality predictions. - - -We leave it to the reader to explore whether you can tune a different decision tree hyperparameter. You can explore the [reference docs](/find/parsnip/#models), or use the `args()` function to see which parsnip object arguments are available: - -```{r} -args(decision_tree) -``` - -You could tune the other hyperparameter we didn't use here, `min_n`, which sets the minimum `n` to split at any node. This is another early stopping method for decision trees that can help prevent overfitting. Use this [searchable table](/find/parsnip/#model-args) to find the original argument for `min_n` in the rpart package ([hint](https://stat.ethz.ch/R-manual/R-devel/library/rpart/html/rpart.control.html)). See whether you can tune a different combination of hyperparameters and/or values to improve a tree's ability to predict cell segmentation quality. - - - -## Session information - -```{r si, echo = FALSE} -small_session(pkgs) -``` diff --git a/content/start/tuning/index.markdown b/content/start/tuning/index.markdown deleted file mode 100644 index aefe91d5..00000000 --- a/content/start/tuning/index.markdown +++ /dev/null @@ -1,451 +0,0 @@ ---- -title: "Tune model parameters" -weight: 4 -tags: [rsample, parsnip, tune, dials, workflows, yardstick] -categories: [tuning] -description: | - Estimate the best values for hyperparameters that cannot be learned directly during model training. ---- - - - - - - -## Introduction {#intro} - -Some model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called **hyperparameters**. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this `mtry` in tidymodels) or the learning rate in a boosted tree model (we call this `learn_rate`). Instead of learning these kinds of hyperparameters during model training, we can _estimate_ the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called **tuning**. - -To use code in this article, you will need to install the following packages: rpart, rpart.plot, tidymodels, and vip. - - -```r -library(tidymodels) # for the tune package, along with the rest of tidymodels - -# Helper packages -library(rpart.plot) # for visualizing a decision tree -library(vip) # for variable importance plots -``` - -{{< test-drive url="https://rstudio.cloud/project/2674862" >}} - -## The cell image data, revisited {#data} - -In our previous [*Evaluate your model with resampling*](/start/resampling/) article, we introduced a data set of images of cells that were labeled by experts as well-segmented (`WS`) or poorly segmented (`PS`). We trained a [random forest model](/start/resampling/#modeling) to predict which images are segmented well vs. poorly, so that a biologist could filter out poorly segmented cell images in their analysis. We used [resampling](/start/resampling/#resampling) to estimate the performance of our model on this data. - - -```r -data(cells, package = "modeldata") -cells -#> # A tibble: 2,019 × 58 -#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3 -#> -#> 1 Test PS 143. 185 15.7 4.95 9.55 -#> 2 Train PS 134. 819 31.9 207. 69.9 -#> 3 Train WS 107. 431 28.0 116. 63.9 -#> 4 Train PS 69.2 298 19.5 102. 28.2 -#> 5 Test PS 2.89 285 24.3 112. 20.5 -#> # … with 2,014 more rows, and 51 more variables: avg_inten_ch_4 , -#> # convex_hull_area_ratio_ch_1 , convex_hull_perim_ratio_ch_1 , -#> # diff_inten_density_ch_1 , diff_inten_density_ch_3 , … -``` - -## Predicting image segmentation, but better {#why-tune} - -Random forest models are a tree-based ensemble method, and typically perform well with [default hyperparameters](https://bradleyboehmke.github.io/HOML/random-forest.html#out-of-the-box-performance). However, the accuracy of some other tree-based models, such as [boosted tree models](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) or [decision tree models](https://en.wikipedia.org/wiki/Decision_tree), can be sensitive to the values of hyperparameters. In this article, we will train a **decision tree** model. There are several hyperparameters for decision tree models that can be tuned for better performance. Let's explore: - -- the complexity parameter (which we call `cost_complexity` in tidymodels) for the tree, and -- the maximum `tree_depth`. - -Tuning these hyperparameters can improve model performance because decision tree models are prone to [overfitting](https://bookdown.org/max/FES/important-concepts.html#overfitting). This happens because single tree models tend to fit the training data _too well_ — so well, in fact, that they over-learn patterns present in the training data that end up being detrimental when predicting new data. - -We will tune the model hyperparameters to avoid overfitting. Tuning the value of `cost_complexity` helps by [pruning](https://bradleyboehmke.github.io/HOML/DT.html#pruning) back our tree. It adds a cost, or penalty, to error rates of more complex trees; a cost closer to zero decreases the number tree nodes pruned and is more likely to result in an overfit tree. However, a high cost increases the number of tree nodes pruned and can result in the opposite problem—an underfit tree. Tuning `tree_depth`, on the other hand, helps by [stopping](https://bradleyboehmke.github.io/HOML/DT.html#early-stopping) our tree from growing after it reaches a certain depth. We want to tune these hyperparameters to find what those two values should be for our model to do the best job predicting image segmentation. - -Before we start the tuning process, we split our data into training and testing sets, just like when we trained the model with one default set of hyperparameters. As [before](/start/resampling/), we can use `strata = class` if we want our training and testing sets to be created using stratified sampling so that both have the same proportion of both kinds of segmentation. - - -```r -set.seed(123) -cell_split <- initial_split(cells %>% select(-case), - strata = class) -cell_train <- training(cell_split) -cell_test <- testing(cell_split) -``` - -We use the training data for tuning the model. - -## Tuning hyperparameters {#tuning} - -Let’s start with the parsnip package, using a [`decision_tree()`](https://parsnip.tidymodels.org/reference/decision_tree.html) model with the [rpart](https://cran.r-project.org/web/packages/rpart/index.html) engine. To tune the decision tree hyperparameters `cost_complexity` and `tree_depth`, we create a model specification that identifies which hyperparameters we plan to tune. - - -```r -tune_spec <- - decision_tree( - cost_complexity = tune(), - tree_depth = tune() - ) %>% - set_engine("rpart") %>% - set_mode("classification") - -tune_spec -#> Decision Tree Model Specification (classification) -#> -#> Main Arguments: -#> cost_complexity = tune() -#> tree_depth = tune() -#> -#> Computational engine: rpart -``` - -Think of `tune()` here as a placeholder. After the tuning process, we will select a single numeric value for each of these hyperparameters. For now, we specify our parsnip model object and identify the hyperparameters we will `tune()`. - -We can't train this specification on a single data set (such as the entire training set) and learn what the hyperparameter values should be, but we _can_ train many models using resampled data and see which models turn out best. We can create a regular grid of values to try using some convenience functions for each hyperparameter: - - -```r -tree_grid <- grid_regular(cost_complexity(), - tree_depth(), - levels = 5) -``` - -The function [`grid_regular()`](https://dials.tidymodels.org/reference/grid_regular.html) is from the [dials](https://dials.tidymodels.org/) package. It chooses sensible values to try for each hyperparameter; here, we asked for 5 of each. Since we have two to tune, `grid_regular()` returns 5 `\(\times\)` 5 = 25 different possible tuning combinations to try in a tidy tibble format. - - -```r -tree_grid -#> # A tibble: 25 × 2 -#> cost_complexity tree_depth -#> -#> 1 0.0000000001 1 -#> 2 0.0000000178 1 -#> 3 0.00000316 1 -#> 4 0.000562 1 -#> 5 0.1 1 -#> 6 0.0000000001 4 -#> 7 0.0000000178 4 -#> 8 0.00000316 4 -#> 9 0.000562 4 -#> 10 0.1 4 -#> # … with 15 more rows -``` - -Here, you can see all 5 values of `cost_complexity` ranging up to 0.1. These values get repeated for each of the 5 values of `tree_depth`: - - -```r -tree_grid %>% - count(tree_depth) -#> # A tibble: 5 × 2 -#> tree_depth n -#> -#> 1 1 5 -#> 2 4 5 -#> 3 8 5 -#> 4 11 5 -#> 5 15 5 -``` - - -Armed with our grid filled with 25 candidate decision tree models, let's create [cross-validation folds](/start/resampling/) for tuning: - - -```r -set.seed(234) -cell_folds <- vfold_cv(cell_train) -``` - -Tuning in tidymodels requires a resampled object created with the [rsample](https://rsample.tidymodels.org/) package. - -## Model tuning with a grid {#tune-grid} - -We are ready to tune! Let's use [`tune_grid()`](https://tune.tidymodels.org/reference/tune_grid.html) to fit models at all the different values we chose for each tuned hyperparameter. There are several options for building the object for tuning: - -+ Tune a model specification along with a recipe or model, or - -+ Tune a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and a recipe or model preprocessor. - -Here we use a `workflow()` with a straightforward formula; if this model required more involved data preprocessing, we could use `add_recipe()` instead of `add_formula()`. - - -```r -set.seed(345) - -tree_wf <- workflow() %>% - add_model(tune_spec) %>% - add_formula(class ~ .) - -tree_res <- - tree_wf %>% - tune_grid( - resamples = cell_folds, - grid = tree_grid - ) - -tree_res -#> # Tuning results -#> # 10-fold cross-validation -#> # A tibble: 10 × 4 -#> splits id .metrics .notes -#> -#> 1 Fold01 -#> 2 Fold02 -#> 3 Fold03 -#> 4 Fold04 -#> 5 Fold05 -#> 6 Fold06 -#> 7 Fold07 -#> 8 Fold08 -#> 9 Fold09 -#> 10 Fold10 -``` - -Once we have our tuning results, we can both explore them through visualization and then select the best result. The function `collect_metrics()` gives us a tidy tibble with all the results. We had 25 candidate models and two metrics, `accuracy` and `roc_auc`, and we get a row for each `.metric` and model. - - -```r -tree_res %>% - collect_metrics() -#> # A tibble: 50 × 8 -#> cost_complexity tree_depth .metric .estimator mean n std_err .config -#> -#> 1 0.0000000001 1 accuracy binary 0.732 10 0.0148 Preproces… -#> 2 0.0000000001 1 roc_auc binary 0.777 10 0.0107 Preproces… -#> 3 0.0000000178 1 accuracy binary 0.732 10 0.0148 Preproces… -#> 4 0.0000000178 1 roc_auc binary 0.777 10 0.0107 Preproces… -#> 5 0.00000316 1 accuracy binary 0.732 10 0.0148 Preproces… -#> 6 0.00000316 1 roc_auc binary 0.777 10 0.0107 Preproces… -#> 7 0.000562 1 accuracy binary 0.732 10 0.0148 Preproces… -#> 8 0.000562 1 roc_auc binary 0.777 10 0.0107 Preproces… -#> 9 0.1 1 accuracy binary 0.732 10 0.0148 Preproces… -#> 10 0.1 1 roc_auc binary 0.777 10 0.0107 Preproces… -#> # … with 40 more rows -``` - -We might get more out of plotting these results: - - -```r -tree_res %>% - collect_metrics() %>% - mutate(tree_depth = factor(tree_depth)) %>% - ggplot(aes(cost_complexity, mean, color = tree_depth)) + - geom_line(linewidth = 1.5, alpha = 0.6) + - geom_point(size = 2) + - facet_wrap(~ .metric, scales = "free", nrow = 2) + - scale_x_log10(labels = scales::label_number()) + - scale_color_viridis_d(option = "plasma", begin = .9, end = 0) -``` - - - -We can see that our "stubbiest" tree, with a depth of 1, is the worst model according to both metrics and across all candidate values of `cost_complexity`. Our deepest tree, with a depth of 15, did better. However, the best tree seems to be between these values with a tree depth of 4. The [`show_best()`](https://tune.tidymodels.org/reference/show_best.html) function shows us the top 5 candidate models by default: - - -```r -tree_res %>% - show_best("accuracy") -#> # A tibble: 5 × 8 -#> cost_complexity tree_depth .metric .estimator mean n std_err .config -#> -#> 1 0.0000000001 4 accuracy binary 0.807 10 0.0119 Preprocess… -#> 2 0.0000000178 4 accuracy binary 0.807 10 0.0119 Preprocess… -#> 3 0.00000316 4 accuracy binary 0.807 10 0.0119 Preprocess… -#> 4 0.000562 4 accuracy binary 0.807 10 0.0119 Preprocess… -#> 5 0.1 4 accuracy binary 0.786 10 0.0124 Preprocess… -``` - -We can also use the [`select_best()`](https://tune.tidymodels.org/reference/show_best.html) function to pull out the single set of hyperparameter values for our best decision tree model: - - -```r -best_tree <- tree_res %>% - select_best("accuracy") - -best_tree -#> # A tibble: 1 × 3 -#> cost_complexity tree_depth .config -#> -#> 1 0.0000000001 4 Preprocessor1_Model06 -``` - -These are the values for `tree_depth` and `cost_complexity` that maximize accuracy in this data set of cell images. - - -## Finalizing our model {#final-model} - -We can update (or "finalize") our workflow object `tree_wf` with the values from `select_best()`. - - -```r -final_wf <- - tree_wf %>% - finalize_workflow(best_tree) - -final_wf -#> ══ Workflow ══════════════════════════════════════════════════════════ -#> Preprocessor: Formula -#> Model: decision_tree() -#> -#> ── Preprocessor ────────────────────────────────────────────────────── -#> class ~ . -#> -#> ── Model ───────────────────────────────────────────────────────────── -#> Decision Tree Model Specification (classification) -#> -#> Main Arguments: -#> cost_complexity = 1e-10 -#> tree_depth = 4 -#> -#> Computational engine: rpart -``` - -Our tuning is done! - -### The last fit - -Finally, let's fit this final model to the training data and use our test data to estimate the model performance we expect to see with new data. We can use the function [`last_fit()`](https://tune.tidymodels.org/reference/last_fit.html) with our finalized model; this function _fits_ the finalized model on the full training data set and _evaluates_ the finalized model on the testing data. - - -```r -final_fit <- - final_wf %>% - last_fit(cell_split) - -final_fit %>% - collect_metrics() -#> # A tibble: 2 × 4 -#> .metric .estimator .estimate .config -#> -#> 1 accuracy binary 0.802 Preprocessor1_Model1 -#> 2 roc_auc binary 0.840 Preprocessor1_Model1 - -final_fit %>% - collect_predictions() %>% - roc_curve(class, .pred_PS) %>% - autoplot() -``` - - - -The performance metrics from the test set indicate that we did not overfit during our tuning procedure. - -The `final_fit` object contains a finalized, fitted workflow that you can use for predicting on new data or further understanding the results. You may want to extract this object, using [one of the `extract_` helper functions](https://tune.tidymodels.org/reference/extract-tune.html). - - -```r -final_tree <- extract_workflow(final_fit) -final_tree -#> ══ Workflow [trained] ════════════════════════════════════════════════ -#> Preprocessor: Formula -#> Model: decision_tree() -#> -#> ── Preprocessor ────────────────────────────────────────────────────── -#> class ~ . -#> -#> ── Model ───────────────────────────────────────────────────────────── -#> n= 1514 -#> -#> node), split, n, loss, yval, (yprob) -#> * denotes terminal node -#> -#> 1) root 1514 539 PS (0.64398943 0.35601057) -#> 2) total_inten_ch_2< 41732.5 642 33 PS (0.94859813 0.05140187) -#> 4) shape_p_2_a_ch_1>=1.251801 631 27 PS (0.95721078 0.04278922) * -#> 5) shape_p_2_a_ch_1< 1.251801 11 5 WS (0.45454545 0.54545455) * -#> 3) total_inten_ch_2>=41732.5 872 366 WS (0.41972477 0.58027523) -#> 6) fiber_width_ch_1< 11.37318 406 160 PS (0.60591133 0.39408867) -#> 12) avg_inten_ch_1< 145.4883 293 85 PS (0.70989761 0.29010239) * -#> 13) avg_inten_ch_1>=145.4883 113 38 WS (0.33628319 0.66371681) -#> 26) total_inten_ch_3>=57919.5 33 10 PS (0.69696970 0.30303030) * -#> 27) total_inten_ch_3< 57919.5 80 15 WS (0.18750000 0.81250000) * -#> 7) fiber_width_ch_1>=11.37318 466 120 WS (0.25751073 0.74248927) -#> 14) eq_ellipse_oblate_vol_ch_1>=1673.942 30 8 PS (0.73333333 0.26666667) -#> 28) var_inten_ch_3>=41.10858 20 2 PS (0.90000000 0.10000000) * -#> 29) var_inten_ch_3< 41.10858 10 4 WS (0.40000000 0.60000000) * -#> 15) eq_ellipse_oblate_vol_ch_1< 1673.942 436 98 WS (0.22477064 0.77522936) * -``` - -We can create a visualization of the decision tree using another helper function to extract the underlying engine-specific fit. - - -```r -final_tree %>% - extract_fit_engine() %>% - rpart.plot(roundint = FALSE) -``` - - - -Perhaps we would also like to understand what variables are important in this final model. We can use the [vip](https://koalaverse.github.io/vip/) package to estimate variable importance [based on the model's structure](https://koalaverse.github.io/vip/reference/vi_model.html#details). - - -```r -library(vip) - -final_tree %>% - extract_fit_parsnip() %>% - vip() -``` - - - -These are the automated image analysis measurements that are the most important in driving segmentation quality predictions. - - -We leave it to the reader to explore whether you can tune a different decision tree hyperparameter. You can explore the [reference docs](/find/parsnip/#models), or use the `args()` function to see which parsnip object arguments are available: - - -```r -args(decision_tree) -#> function (mode = "unknown", engine = "rpart", cost_complexity = NULL, -#> tree_depth = NULL, min_n = NULL) -#> NULL -``` - -You could tune the other hyperparameter we didn't use here, `min_n`, which sets the minimum `n` to split at any node. This is another early stopping method for decision trees that can help prevent overfitting. Use this [searchable table](/find/parsnip/#model-args) to find the original argument for `min_n` in the rpart package ([hint](https://stat.ethz.ch/R-manual/R-devel/library/rpart/html/rpart.control.html)). See whether you can tune a different combination of hyperparameters and/or values to improve a tree's ability to predict cell segmentation quality. - - - -## Session information - - -``` -#> ─ Session info ───────────────────────────────────────────────────── -#> setting value -#> version R version 4.2.1 (2022-06-23) -#> os macOS Big Sur ... 10.16 -#> system x86_64, darwin17.0 -#> ui X11 -#> language (EN) -#> collate en_US.UTF-8 -#> ctype en_US.UTF-8 -#> tz America/Los_Angeles -#> date 2022-12-07 -#> pandoc 2.19.2 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/tools/ (via rmarkdown) -#> -#> ─ Packages ───────────────────────────────────────────────────────── -#> package * version date (UTC) lib source -#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.2.0) -#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.2.0) -#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.0) -#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.2.0) -#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.2.1) -#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.2.0) -#> purrr * 0.3.5 2022-10-06 [1] CRAN (R 4.2.0) -#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.2.0) -#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.0) -#> rpart * 4.1.19 2022-10-21 [1] CRAN (R 4.2.0) -#> rpart.plot * 3.1.1 2022-05-21 [1] CRAN (R 4.2.0) -#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.2.1) -#> tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.2.0) -#> tidymodels * 1.0.0 2022-07-13 [1] CRAN (R 4.2.0) -#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.2.0) -#> vip * 0.3.2 2020-12-17 [1] CRAN (R 4.2.0) -#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.2.0) -#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.2.0) -#> -#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library -#> -#> ──────────────────────────────────────────────────────────────────── -``` diff --git a/contribute/index.qmd b/contribute/index.qmd new file mode 100644 index 00000000..d7869b92 --- /dev/null +++ b/contribute/index.qmd @@ -0,0 +1,52 @@ +--- +title: How to contribute to tidymodels +toc: true +toc-depth: 0 +include-after-body: ../resources.html +--- + +The ecosystem of tidymodels packages would not be possible without the contributions of the R community. No matter your current skills, it's possible to contribute back to tidymodels. Contributions are guided by our design goals. + +## Design goals + +The goals of tidymodels packages are to: + + * Encourage empirical validation and good statistical practice. + + * Smooth out heterogeneous interfaces. + + * Establish highly reusable infrastructure. + + * Enable a wider variety of methodologies. + + * Help package developers quickly build high quality model packages of their own. + +These goals are guided by our [principles for creating modeling packages](https://tidymodels.github.io/model-implementation-principles/). + +**What are different ways _you_ can contribute?** + +## Answer questions + +You can help others use and learn tidymodels by answering questions on the [RStudio community site](https://community.rstudio.com/tag/tidymodels), [Stack Overflow](https://stackoverflow.com/questions/tagged/tidymodels?sort=newest), and [Twitter](https://twitter.com/search?q=%23tidymodels&f=live). Many people asking for help with tidymodels don't know what a [reprex](https://www.tidyverse.org/help#reprex) is or how to craft one. Acknowledging an individual's problem, showing them how to build a reprex, and pointing them to helpful resources are all enormously beneficial, even if you don't immediately solve their problem. + +Remember that while you might have seen a problem a hundred times before, it's new to the person asking it. Be patient, polite, and empathic. + +## File issues + +If you've found a bug, first create a minimal [reprex](https://www.tidyverse.org/help#reprex). Spend some time working to make it as minimal as possible; the more time you spend doing this, the easier it is to fix the bug. When your reprex is ready, file it on the [GitHub repo](https://github.com/tidymodels/) of the appropriate package. + +The tidymodels team often focuses on one package at a time to reduce context switching and be more efficient. We may not address each issue right away, but we will use the reprex you create to understand your problem when it is time to focus on that package. + +## Contribute documentation + +Documentation is a high priority for tidymodels, and pull requests to correct or improve documentation are welcome. The most important thing to know is that tidymodels packages use [roxygen2](https://roxygen2.r-lib.org/); this means that documentation is found in the R code close to the source of each function. There are some special tags, but most tidymodels packages now use markdown in the documentation. This makes it particularly easy to get started! + + +## Contribute code + +If you are a more experienced R programmer, you may have the inclination, interest, and ability to contribute directly to package development. Before you submit a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal. + +In tidymodels packages, we use the [tidyverse style guide](https://style.tidyverse.org/) which will make sure that your new code and documentation matches the existing style. This makes the review process much smoother. + +The tidymodels packages are explicitly built to support the creation of other modeling packages, and we would love to hear about what you build yourself! Check out our learning resources for [developing custom modeling tools](/learn/develop/). + diff --git a/data/homepage.yaml b/data/homepage.yaml deleted file mode 100644 index 34ec9fe8..00000000 --- a/data/homepage.yaml +++ /dev/null @@ -1,14 +0,0 @@ -bandtwo: -- url: /start/ - title: Get Started - icon: fa-flag-checkered - icon_pack: fas - what: | - What do you need to know to start using tidymodels? Learn what you need in 5 articles, starting with how to create a model and ending with a beginning-to-end modeling case study. - -- url: /learn/ - title: Learn - icon: fa-lightbulb - icon_pack: far - what: | - After you are comfortable with the basics, you can learn how to go farther with tidymodels in your modeling and machine learning projects. diff --git a/data/resources.yaml b/data/resources.yaml deleted file mode 100644 index b0822695..00000000 --- a/data/resources.yaml +++ /dev/null @@ -1,21 +0,0 @@ -resources: -- url: /find/ - title: Find - icon: fa-search - icon_pack: fas - what: | - Explore searchable tables of all tidymodels packages and functions. - -- url: /books/ - title: Books - icon: fa-book-open - icon_pack: fas - what: | - Study up on statistics and modeling with our comprehensive books. - -- url: https://www.tidyverse.org/tags/tidymodels/ - title: News - icon: fa-bullhorn - icon_pack: fas - what: | - Hear the latest about tidymodels packages at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/). diff --git a/docs/about/davis/index.html b/docs/about/davis/index.html new file mode 100644 index 00000000..e5c185af --- /dev/null +++ b/docs/about/davis/index.html @@ -0,0 +1,387 @@ + + + + + + + + + + +tidymodels – index + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + + + +

    Davis Vaughan is a Software Engineer at RStudio. He is the author or maintainer of several R packages for finance and data analytics, including tidyquant, timetk, tibbletime, sweep, rray and hardhat. He is well-known for this work around creating modeling packages in R.

    + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/docs/about/index.html b/docs/about/index.html new file mode 100644 index 00000000..a2a73809 --- /dev/null +++ b/docs/about/index.html @@ -0,0 +1,428 @@ + + + + + + + + + +tidymodels - Meet the team + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Meet the team

    +
    + + + +
    + + + + +
    + + +
    + + + + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/docs/about/max/index.html b/docs/about/max/index.html new file mode 100644 index 00000000..1822b76e --- /dev/null +++ b/docs/about/max/index.html @@ -0,0 +1,387 @@ + + + + + + + + + + +tidymodels – index + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + + + +

    Dr. Max Kuhn is a Software Engineer at RStudio. He is the author or maintainer of several R packages for predictive modeling including caret, AppliedPredictiveModeling, Cubist, C50 and SparseLDA. He routinely teaches classes in predictive modeling at Predictive Analytics World and UseR! and his publications include work on neuroscience biomarkers, drug discovery, molecular diagnostics and response surface methodology.

    + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/docs/books/fes/cover.jpg b/docs/books/fes/cover.jpg new file mode 100644 index 00000000..07067b22 Binary files /dev/null and b/docs/books/fes/cover.jpg differ diff --git a/docs/books/fes/index.html b/docs/books/fes/index.html new file mode 100644 index 00000000..a41f9e0f --- /dev/null +++ b/docs/books/fes/index.html @@ -0,0 +1,418 @@ + + + + + + + + + + +tidymodels - Feature Engineering & Selection + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Feature Engineering & Selection

    +
    + +
    +
    + A Practical Approach for Predictive Models +
    +
    + + +
    + +
    +
    Authors
    +
    +

    Max Kuhn

    +

    Kjell Johnson

    +
    +
    + + + +
    + + +
    + +

    +

    A primary goal of predictive modeling is to find a reliable and effective predictive relationship between an available set of features and an outcome. This book provides an extensive set of techniques for uncovering effective representations of the features for modeling the outcome and for finding an optimal subset of features to improve a model’s predictive performance.

    +

    An HTML version of this text can be found at https://bookdown.org/max/FES.

    +

    The data sets and R code are available in the GitHub repository https://github.com/topepo/FES.

    +

    The physical copies are sold by Amazon and Taylor & Francis.

    + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/docs/books/index.html b/docs/books/index.html new file mode 100644 index 00000000..b7077d23 --- /dev/null +++ b/docs/books/index.html @@ -0,0 +1,550 @@ + + + + + + + + + + +tidymodels - Books + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Books

    +
    + +
    +
    + Study up on statistics and modeling with our comprehensive books. +
    +
    + + +
    + + + + +
    + + +
    + + + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/docs/books/moderndive/cover.png b/docs/books/moderndive/cover.png new file mode 100644 index 00000000..a1952163 Binary files /dev/null and b/docs/books/moderndive/cover.png differ diff --git a/docs/books/moderndive/index.html b/docs/books/moderndive/index.html new file mode 100644 index 00000000..24e206f4 --- /dev/null +++ b/docs/books/moderndive/index.html @@ -0,0 +1,418 @@ + + + + + + + + + + +tidymodels - Statistical Inference via Data Science + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Statistical Inference via Data Science

    +
    + +
    +
    + A ModernDive into R and the Tidyverse +
    +
    + + +
    + +
    +
    Authors
    +
    +

    Chester Ismay

    +

    Albert Y. Kim

    +
    +
    + + + +
    + + +
    + +

    +

    This book is intended to be a gentle introduction to the practice of analyzing data and answering questions using data the way data scientists, statisticians, data journalists, and other researchers would. Over the course of this book, you will develop your “data science toolbox,” equipping yourself with tools such as data visualization, data formatting, data wrangling, data modeling using regression, and statistical inference via hypothesis testing and confidence intervals.

    +

    An HTML version of this text can be found at https://moderndive.com/.

    +

    The data sets and R code are available in the GitHub repository https://github.com/moderndive/ModernDive_book and also the accompanying moderndive R package on CRAN.

    +

    The physical copies are sold by Amazon and CRC Press.

    + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/docs/books/smltar/cover.png b/docs/books/smltar/cover.png new file mode 100644 index 00000000..2dfdd429 Binary files /dev/null and b/docs/books/smltar/cover.png differ diff --git a/docs/books/smltar/index.html b/docs/books/smltar/index.html new file mode 100644 index 00000000..4386fae8 --- /dev/null +++ b/docs/books/smltar/index.html @@ -0,0 +1,413 @@ + + + + + + + + + +tidymodels - Supervised Machine Learning for Text Analysis in R + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Supervised Machine Learning for Text Analysis in R

    +
    + + + +
    + +
    +
    Authors
    +
    +

    Emil Hvitfeldt

    +

    Julia Silge

    +
    +
    + + + +
    + + +
    + +

    +

    This book explains how to preprocess text data for modeling, train models, and evaluate model performance using tools from the tidyverse and tidymodels ecosystem. Models like these can be used to make predictions for new observations, to understand what natural language features or characteristics contribute to differences in the output, and more. If you are already familiar with the basics of predictive modeling, use the comprehensive, detailed examples in this book to extend your skills to the domain of natural language processing.

    +

    This book provides practical guidance and directly applicable knowledge for data scientists and analysts who want to integrate unstructured text data into their modeling pipelines. Learn how to use text data for both regression and classification tasks, and how to apply more straightforward algorithms like regularized regression or support vector machines as well as deep learning approaches. Natural language must be dramatically transformed to be ready for computation, so we explore typical text preprocessing and feature engineering steps like tokenization and word embeddings from the ground up. These steps influence model results in ways we can measure, both in terms of model metrics and other tangible consequences such as how fair or appropriate model results are.

    +

    An HTML version of this text can be found at https://smltar.com/.

    +

    The data sets and R code are available in the GitHub repository https://github.com/EmilHvitfeldt/smltar.

    +

    The physical copies are sold by Amazon and CRC Press.

    + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/docs/books/tidytext/cover.png b/docs/books/tidytext/cover.png new file mode 100644 index 00000000..0497b756 Binary files /dev/null and b/docs/books/tidytext/cover.png differ diff --git a/docs/books/tidytext/index.html b/docs/books/tidytext/index.html new file mode 100644 index 00000000..d21d19a1 --- /dev/null +++ b/docs/books/tidytext/index.html @@ -0,0 +1,418 @@ + + + + + + + + + + +tidymodels - Text Mining with R + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Text Mining with R

    +
    + +
    +
    + A Tidy Approach +
    +
    + + +
    + +
    +
    Authors
    +
    +

    Julia Silge

    +

    David Robinson

    +
    +
    + + + +
    + + +
    + +

    +

    This practical book provides an introduction to text mining using tidy data principles in R, focusing on exploratory data analysis for text. Using tidy data principles can make text mining task easier and more effective; in this book, learn how to manipulate, summarize, and visualize characteristics of text using these methods and R packages from the tidy tool ecosystem.

    +

    An HTML version of this text can be found at https://www.tidytextmining.com/.

    +

    The data sets and R code are available in the GitHub repository https://github.com/dgrtwo/tidy-text-mining.

    +

    The physical copies are sold by Amazon and O’Reilly.

    + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/docs/books/tmwr/cover.png b/docs/books/tmwr/cover.png new file mode 100644 index 00000000..13460651 Binary files /dev/null and b/docs/books/tmwr/cover.png differ diff --git a/docs/books/tmwr/index.html b/docs/books/tmwr/index.html new file mode 100644 index 00000000..fb92d176 --- /dev/null +++ b/docs/books/tmwr/index.html @@ -0,0 +1,415 @@ + + + + + + + + + +tidymodels - Tidy Modeling with R + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Tidy Modeling with R

    +
    + + + +
    + +
    +
    Authors
    +
    +

    Max Kuhn

    +

    Julia Silge

    +
    +
    + + + +
    + + +
    + +

    +

    Modeling of data is integral to science, business, politics, and many other aspects of our lives. The goals of this book are to:

    +
      +
    • introduce and demonstrate how to use the tidymodels packages, and
    • +
    • outline good practices for the phases of the modeling process.
    • +
    +

    An HTML version of this text can be found at https://tmwr.org.

    +

    The sources to create the book are available in the GitHub repository https://github.com/tidymodels/TMwR.

    + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/docs/contribute/index.html b/docs/contribute/index.html new file mode 100644 index 00000000..a39ca95b --- /dev/null +++ b/docs/contribute/index.html @@ -0,0 +1,462 @@ + + + + + + + + + +tidymodels - How to contribute to tidymodels + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    How to contribute to tidymodels

    +
    + + + +
    + + + + +
    + + +
    + +

    The ecosystem of tidymodels packages would not be possible without the contributions of the R community. No matter your current skills, it’s possible to contribute back to tidymodels. Contributions are guided by our design goals.

    +
    +

    Design goals

    +

    The goals of tidymodels packages are to:

    +
      +
    • Encourage empirical validation and good statistical practice.

    • +
    • Smooth out heterogeneous interfaces.

    • +
    • Establish highly reusable infrastructure.

    • +
    • Enable a wider variety of methodologies.

    • +
    • Help package developers quickly build high quality model packages of their own.

    • +
    +

    These goals are guided by our principles for creating modeling packages.

    +

    What are different ways you can contribute?

    +
    +
    +

    Answer questions

    +

    You can help others use and learn tidymodels by answering questions on the RStudio community site, Stack Overflow, and Twitter. Many people asking for help with tidymodels don’t know what a reprex is or how to craft one. Acknowledging an individual’s problem, showing them how to build a reprex, and pointing them to helpful resources are all enormously beneficial, even if you don’t immediately solve their problem.

    +

    Remember that while you might have seen a problem a hundred times before, it’s new to the person asking it. Be patient, polite, and empathic.

    +
    +
    +

    File issues

    +

    If you’ve found a bug, first create a minimal reprex. Spend some time working to make it as minimal as possible; the more time you spend doing this, the easier it is to fix the bug. When your reprex is ready, file it on the GitHub repo of the appropriate package.

    +

    The tidymodels team often focuses on one package at a time to reduce context switching and be more efficient. We may not address each issue right away, but we will use the reprex you create to understand your problem when it is time to focus on that package.

    +
    +
    +

    Contribute documentation

    +

    Documentation is a high priority for tidymodels, and pull requests to correct or improve documentation are welcome. The most important thing to know is that tidymodels packages use roxygen2; this means that documentation is found in the R code close to the source of each function. There are some special tags, but most tidymodels packages now use markdown in the documentation. This makes it particularly easy to get started!

    +
    +
    +

    Contribute code

    +

    If you are a more experienced R programmer, you may have the inclination, interest, and ability to contribute directly to package development. Before you submit a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal.

    +

    In tidymodels packages, we use the tidyverse style guide which will make sure that your new code and documentation matches the existing style. This makes the review process much smoother.

    +

    The tidymodels packages are explicitly built to support the creation of other modeling packages, and we would love to hear about what you build yourself! Check out our learning resources for developing custom modeling tools.

    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/docs/find/all/index.html b/docs/find/all/index.html new file mode 100644 index 00000000..860ae5dc --- /dev/null +++ b/docs/find/all/index.html @@ -0,0 +1,450 @@ + + + + + + + + + +tidymodels - Search all of tidymodels + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Search all of tidymodels

    +
    + + + +
    + + + + +
    + + +
    + +

    Here are all the functions available across all of the tidymodels packages. Click on the link in the topic column to find the relevant reference documentation.

    +
    +
    + +
    + +
    +
    + + + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/docs/find/broom/index.html b/docs/find/broom/index.html new file mode 100644 index 00000000..3d238f08 --- /dev/null +++ b/docs/find/broom/index.html @@ -0,0 +1,457 @@ + + + + + + + + + + +tidymodels - Search broom methods + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Search broom methods

    +

    Broom

    +
    + +
    +
    +

    Find tidy(), augment(), and glance() methods for different objects.

    +
    +
    + + +
    + + + + +
    + + +
    + +

    Here are all the broom functions available across CRAN packages. Click on the link in the topic column to find more information.

    +
    +
    + +
    + +
    +
    + + + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/docs/find/index.html b/docs/find/index.html new file mode 100644 index 00000000..212b39c0 --- /dev/null +++ b/docs/find/index.html @@ -0,0 +1,448 @@ + + + + + + + + + + +tidymodels - Explore tidymodels + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Explore tidymodels

    +
    + +
    +
    + Explore searchable tables of all tidymodels packages and functions. +
    +
    + + +
    + + + + +
    + + +
    + +

    Below you’ll find searchable tables to help you explore the tidymodels packages and functions. The tables also include links to the relevant reference page to help you navigate the package documentation. Use the following categories to guide you:

    + +
    +

    Books

    +

    If you want to read more long form there are a number of books written about and using tidymodels

    + + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/docs/find/parsnip/index.html b/docs/find/parsnip/index.html new file mode 100644 index 00000000..d6a92551 --- /dev/null +++ b/docs/find/parsnip/index.html @@ -0,0 +1,456 @@ + + + + + + + + + + +tidymodels - Search parsnip models + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Search parsnip models

    +
    + +
    +
    +

    Find model types, engines, and arguments to fit and predict in the tidymodels framework.

    +
    +
    + + +
    + + + + +
    + + +
    + +

    To learn about the parsnip package, see Get Started: Build a Model. Use the tables below to find model types and engines.

    +
    +
    + +
    + +
    +
    + + + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/docs/find/recipes/index.html b/docs/find/recipes/index.html new file mode 100644 index 00000000..1c6fc6bc --- /dev/null +++ b/docs/find/recipes/index.html @@ -0,0 +1,457 @@ + + + + + + + + + + +tidymodels - Search recipe steps + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Search recipe steps

    +

    Recipes

    +
    + +
    +
    +

    Find recipe steps in the tidymodels framework to help you prep your data for modeling.

    +
    +
    + + +
    + + + + +
    + + +
    + +

    To learn about the recipes package, see Get Started: Preprocess your data with recipes. The table below allows you to search for recipe steps across tidymodels packages.

    +
    +
    + +
    + +
    +
    + + + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/docs/help/index.html b/docs/help/index.html new file mode 100644 index 00000000..1bca64c1 --- /dev/null +++ b/docs/help/index.html @@ -0,0 +1,452 @@ + + + + + + + + + +tidymodels - Get Help + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Get Help

    +
    + + + +
    + + + + +
    + + +
    + +
    +

    Asking for help

    +

    If you’re asking for R help, reporting a bug, or requesting a new feature, you’re more likely to succeed if you include a good reproducible example, which is precisely what the reprex package is built for. You can learn more about reprex, along with other tips on how to help others help you in the tidyverse.org help section.

    +
    +
    +

    Where to ask

    +

    +

    Now that you’ve made a reprex, you need to share it in an appropriate forum. Here are some options:

    +
      +
    • community.rstudio.com: This is a warm and welcoming place to ask any questions you might have about tidymodels or more generally about modeling, machine learning, and deep learning. (You can also ask questions about the tidyverse and RStudio there, too!)

    • +
    • Stack Overflow. You’re probably already familiar with Stack Overflow from googling; it’s a frequent source of answers to coding related questions. Asking a question on Stack Overflow can be intimidating, but if you’ve taken the time to create a reprex, you’re much more likely to get a useful answer. Make sure to tag your question with r and tidymodels so that the right people are more likely to see it.

    • +
    • Twitter and Mastodon. These sites are great places to share a link to your reprex that’s hosted elsewhere! The #rstats twitter and #rstats fosstodon communities are extremely friendly and active, and have great crowds to be a part of. Make sure you tag your tweet with #rstats and #tidymodels.

    • +
    • If you think you’ve found a bug, please follow the instructions for filing an issue on contributing to tidymodels.

    • +
    +
    +
    +

    Resources

    + + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/static/images/broom.png b/docs/images/broom.png similarity index 100% rename from static/images/broom.png rename to docs/images/broom.png diff --git a/static/images/cover.png b/docs/images/cover.png similarity index 100% rename from static/images/cover.png rename to docs/images/cover.png diff --git a/static/images/dials.png b/docs/images/dials.png similarity index 100% rename from static/images/dials.png rename to docs/images/dials.png diff --git a/static/images/parsnip-flagger.jpg b/docs/images/parsnip-flagger.jpg similarity index 100% rename from static/images/parsnip-flagger.jpg rename to docs/images/parsnip-flagger.jpg diff --git a/static/images/parsnip.png b/docs/images/parsnip.png similarity index 100% rename from static/images/parsnip.png rename to docs/images/parsnip.png diff --git a/static/images/recipes.png b/docs/images/recipes.png similarity index 100% rename from static/images/recipes.png rename to docs/images/recipes.png diff --git a/static/images/rsample.png b/docs/images/rsample.png similarity index 100% rename from static/images/rsample.png rename to docs/images/rsample.png diff --git a/static/images/tidymodels.png b/docs/images/tidymodels.png similarity index 100% rename from static/images/tidymodels.png rename to docs/images/tidymodels.png diff --git a/static/images/tune.png b/docs/images/tune.png similarity index 100% rename from static/images/tune.png rename to docs/images/tune.png diff --git a/static/images/workflows.png b/docs/images/workflows.png similarity index 100% rename from static/images/workflows.png rename to docs/images/workflows.png diff --git a/static/images/yardstick.png b/docs/images/yardstick.png similarity index 100% rename from static/images/yardstick.png rename to docs/images/yardstick.png diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000..02830929 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,497 @@ + + + + + + + + + +tidymodels + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + + + + + + + \ No newline at end of file diff --git a/docs/learn/develop/broom/index.html b/docs/learn/develop/broom/index.html new file mode 100644 index 00000000..46d2e75c --- /dev/null +++ b/docs/learn/develop/broom/index.html @@ -0,0 +1,1127 @@ + + + + + + + + + + +tidymodels - Create your own broom tidier methods + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Create your own broom tidier methods

    +
    +
    developer tools
    +
    +
    + +
    +
    +

    Write tidy(), glance(), and augment() methods for new model objects.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: generics, tidymodels, tidyverse, and usethis.

    +

    The broom package provides tools to summarize key information about models in tidy tibble()s. The package provides three verbs, or “tidiers,” to help make model objects easier to work with:

    +
      +
    • tidy() summarizes information about model components
    • +
    • glance() reports information about the entire model
    • +
    • augment() adds information about observations to a dataset
    • +
    +

    Each of the three verbs above are generic, in that they do not define a procedure to tidy a given model object, but instead redirect to the relevant method implemented to tidy a specific type of model object. The broom package provides methods for model objects from over 100 modeling packages along with nearly all of the model objects in the stats package that comes with base R. However, for maintainability purposes, the broom package authors now ask that requests for new methods be first directed to the parent package (i.e. the package that supplies the model object) rather than to broom. New methods will generally only be integrated into broom in the case that the requester has already asked the maintainers of the model-owning package to implement tidier methods in the parent package.

    +

    We’d like to make implementing external tidier methods as painless as possible. The general process for doing so is:

    +
      +
    • re-export the tidier generics
    • +
    • implement tidying methods
    • +
    • document the new methods
    • +
    +

    In this article, we’ll walk through each of the above steps in detail, giving examples and pointing out helpful functions when possible.

    +
    +
    +

    Re-export the tidier generics

    +

    The first step is to re-export the generic functions for tidy(), glance(), and/or augment(). You could do so from broom itself, but we’ve provided an alternative, much lighter dependency called generics.

    +

    First you’ll need to add the generics package to Imports. We recommend using the usethis package for this:

    +
    +
    usethis::use_package("generics", "Imports")
    +
    +

    Next, you’ll need to re-export the appropriate tidying methods. If you plan to implement a glance() method, for example, you can re-export the glance() generic by adding the following somewhere inside the /R folder of your package:

    +
    +
    #' @importFrom generics glance
    +#' @export
    +generics::glance
    +
    +

    Oftentimes it doesn’t make sense to define one or more of these methods for a particular model. In this case, only implement the methods that do make sense.

    +
    +
    +
    + +
    +
    +Warning +
    +
    +
    +

    Please do not define tidy(), glance(), or augment() generics in your package. This will result in namespace conflicts whenever your package is used along other packages that also export tidying methods.

    +
    +
    +
    +
    +

    Implement tidying methods

    +

    You’ll now need to implement specific tidying methods for each of the generics you’ve re-exported in the above step. For each of tidy(), glance(), and augment(), we’ll walk through the big picture, an example, and helpful resources.

    +

    In this article, we’ll use the base R dataset trees, giving the tree girth (in inches), height (in feet), and volume (in cubic feet), to fit an example linear model using the base R lm() function.

    +
    +
    # load in the trees dataset
    +data(trees)
    +
    +# take a look!
    +str(trees)
    +#> 'data.frame':    31 obs. of  3 variables:
    +#>  $ Girth : num  8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
    +#>  $ Height: num  70 65 63 72 81 83 66 75 80 75 ...
    +#>  $ Volume: num  10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
    +
    +# fit the timber volume as a function of girth and height
    +trees_model <- lm(Volume ~ Girth + Height, data = trees)
    +
    +

    Let’s take a look at the summary() of our trees_model fit.

    +
    +
    summary(trees_model)
    +#> 
    +#> Call:
    +#> lm(formula = Volume ~ Girth + Height, data = trees)
    +#> 
    +#> Residuals:
    +#>     Min      1Q  Median      3Q     Max 
    +#> -6.4065 -2.6493 -0.2876  2.2003  8.4847 
    +#> 
    +#> Coefficients:
    +#>             Estimate Std. Error t value Pr(>|t|)    
    +#> (Intercept) -57.9877     8.6382  -6.713 2.75e-07 ***
    +#> Girth         4.7082     0.2643  17.816  < 2e-16 ***
    +#> Height        0.3393     0.1302   2.607   0.0145 *  
    +#> ---
    +#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    +#> 
    +#> Residual standard error: 3.882 on 28 degrees of freedom
    +#> Multiple R-squared:  0.948,  Adjusted R-squared:  0.9442 
    +#> F-statistic:   255 on 2 and 28 DF,  p-value: < 2.2e-16
    +
    +

    This output gives some summary statistics on the residuals (which would be described more fully in an augment() output), model coefficients (which, in this case, make up the tidy() output), and some model-level summarizations such as RSE, \(R^2\), etc. (which make up the glance() output.)

    +
    +

    Implementing the tidy() method

    +

    The tidy(x, ...) method will return a tibble where each row contains information about a component of the model. The x input is a model object, and the dots (...) are an optional argument to supply additional information to any calls inside your method. New tidy() methods can take additional arguments, but must include the x and ... arguments to be compatible with the generic function. (For a glossary of currently acceptable additional arguments, see the end of this article.) Examples of model components include regression coefficients (for regression models), clusters (for classification/clustering models), etc. These tidy() methods are useful for inspecting model details and creating custom model visualizations.

    +

    Returning to the example of our linear model on timber volume, we’d like to extract information on the model components. In this example, the components are the regression coefficients. After taking a look at the model object and its summary(), you might notice that you can extract the regression coefficients as follows:

    +
    +
    summary(trees_model)$coefficients
    +#>                Estimate Std. Error   t value     Pr(>|t|)
    +#> (Intercept) -57.9876589  8.6382259 -6.712913 2.749507e-07
    +#> Girth         4.7081605  0.2642646 17.816084 8.223304e-17
    +#> Height        0.3392512  0.1301512  2.606594 1.449097e-02
    +
    +

    This object contains the model coefficients as a table, where the information giving which coefficient is being described in each row is given in the row names. Converting to a tibble where the row names are contained in a column, you might write:

    +
    +
    trees_model_tidy <- summary(trees_model)$coefficients %>% 
    +  as_tibble(rownames = "term")
    +
    +trees_model_tidy
    +#> # A tibble: 3 × 5
    +#>   term        Estimate `Std. Error` `t value` `Pr(>|t|)`
    +#>   <chr>          <dbl>        <dbl>     <dbl>      <dbl>
    +#> 1 (Intercept)  -58.0          8.64      -6.71   2.75e- 7
    +#> 2 Girth          4.71         0.264     17.8    8.22e-17
    +#> 3 Height         0.339        0.130      2.61   1.45e- 2
    +
    +

    The broom package standardizes common column names used to describe coefficients. In this case, the column names are:

    +
    +
    colnames(trees_model_tidy) <- c("term", "estimate", "std.error", "statistic", "p.value")
    +
    +

    A glossary giving the currently acceptable column names outputted by tidy() methods can be found at the end of this article. As a rule of thumb, column names resulting from tidy() methods should be all lowercase and contain only alphanumerics or periods (though there are plenty of exceptions).

    +

    Finally, it is common for tidy() methods to include an option to calculate confidence/credible intervals for each component based on the model, when possible. In this example, the confint() function can be used to calculate confidence intervals from a model object resulting from lm():

    +
    +
    confint(trees_model)
    +#>                    2.5 %      97.5 %
    +#> (Intercept) -75.68226247 -40.2930554
    +#> Girth         4.16683899   5.2494820
    +#> Height        0.07264863   0.6058538
    +
    +

    With these considerations in mind, a reasonable tidy() method for lm() might look something like:

    +
    +
    tidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {
    +  
    +  result <- summary(x)$coefficients %>%
    +    tibble::as_tibble(rownames = "term") %>%
    +    dplyr::rename(estimate = Estimate,
    +                  std.error = `Std. Error`,
    +                  statistic = `t value`,
    +                  p.value = `Pr(>|t|)`)
    +  
    +  if (conf.int) {
    +    ci <- confint(x, level = conf.level)
    +    result <- dplyr::left_join(result, ci, by = "term")
    +  }
    +  
    +  result
    +}
    +
    +
    +
    +
    + +
    +
    +Note +
    +
    +
    +

    If you’re interested, the actual tidy.lm() source can be found here! It’s not too different from the version above except for some argument checking and additional columns.

    +
    +
    +

    With this method exported, then, if a user calls tidy(fit), where fit is an output from lm(), the tidy() generic would “redirect” the call to the tidy.lm() function above.

    +

    Some things to keep in mind while writing your tidy() method:

    +
      +
    • Sometimes a model will have several different types of components. For example, in mixed models, there is different information associated with fixed effects and random effects. Since this information doesn’t have the same interpretation, it doesn’t make sense to summarize the fixed and random effects in the same table. In cases like this you should add an argument that allows the user to specify which type of information they want. For example, you might implement an interface along the lines of:
    • +
    +
    +
    model <- mixed_model(...)
    +tidy(model, effects = "fixed")
    +tidy(model, effects = "random")
    +
    +
      +
    • How are missing values encoded in the model object and its summary()? Ensure that rows are included even when the associated model component is missing or rank deficient.
    • +
    • Are there other measures specific to each component that could reasonably be expected to be included in their summarizations? Some common arguments to tidy() methods include: +
        +
      • conf.int: A logical indicating whether or not to calculate confidence/credible intervals. This should default to FALSE.
      • +
      • conf.level: The confidence level to use for the interval when conf.int = TRUE. Typically defaults to .95.
      • +
      • exponentiate: A logical indicating whether or not model terms should be presented on an exponential scale (typical for logistic regression).
      • +
    • +
    +
    +
    +

    Implementing the glance() method

    +

    glance() returns a one-row tibble providing model-level summarizations (e.g. goodness of fit measures and related statistics). This is useful to check for model misspecification and to compare many models. Again, the x input is a model object, and the ... is an optional argument to supply additional information to any calls inside your method. New glance() methods can also take additional arguments and must include the x and ... arguments. (For a glossary of currently acceptable additional arguments, see the end of this article.)

    +

    Returning to the trees_model example, we could pull out the \(R^2\) value with the following code:

    +
    +
    summary(trees_model)$r.squared
    +#> [1] 0.94795
    +
    +

    Similarly, for the adjusted \(R^2\):

    +
    +
    summary(trees_model)$adj.r.squared
    +#> [1] 0.9442322
    +
    +

    Unfortunately, for many model objects, the extraction of model-level information is largely a manual process. You will likely need to build a tibble() element-by-element by subsetting the summary() object repeatedly. The with() function, however, can help make this process a bit less tedious by evaluating expressions inside of the summary(trees_model) environment. To grab those those same two model elements from above using with():

    +
    +
    with(summary(trees_model),
    +     tibble::tibble(r.squared = r.squared,
    +                    adj.r.squared = adj.r.squared))
    +#> # A tibble: 1 × 2
    +#>   r.squared adj.r.squared
    +#>       <dbl>         <dbl>
    +#> 1     0.948         0.944
    +
    +

    A reasonable glance() method for lm(), then, might look something like:

    +
    +
    glance.lm <- function(x, ...) {
    +  with(
    +    summary(x),
    +    tibble::tibble(
    +      r.squared = r.squared,
    +      adj.r.squared = adj.r.squared,
    +      sigma = sigma,
    +      statistic = fstatistic["value"],
    +      p.value = pf(
    +        fstatistic["value"],
    +        fstatistic["numdf"],
    +        fstatistic["dendf"],
    +        lower.tail = FALSE
    +      ),
    +      df = fstatistic["numdf"],
    +      logLik = as.numeric(stats::logLik(x)),
    +      AIC = stats::AIC(x),
    +      BIC = stats::BIC(x),
    +      deviance = stats::deviance(x),
    +      df.residual = df.residual(x),
    +      nobs = stats::nobs(x)
    +    )
    +  )
    +}
    +
    +
    +
    +
    + +
    +
    +Note +
    +
    +
    +

    This is the actual definition of glance.lm() provided by broom!

    +
    +
    +

    Some things to keep in mind while writing glance() methods: * Output should not include the name of the modeling function or any arguments given to the modeling function. * In some cases, you may wish to provide model-level diagnostics not returned by the original object. For example, the above glance.lm() calculates AIC and BIC from the model fit. If these are easy to compute, feel free to add them. However, tidier methods are generally not an appropriate place to implement complex or time consuming calculations. * The glance method should always return the same columns in the same order when given an object of a given model class. If a summary metric (such as AIC) is not defined in certain circumstances, use NA.

    +
    +
    +

    Implementing the augment() method

    +

    augment() methods add columns to a dataset containing information such as fitted values, residuals or cluster assignments. All columns added to a dataset have a . prefix to prevent existing columns from being overwritten. (Currently acceptable column names are given in the glossary.) The x and ... arguments share their meaning with the two functions described above. augment methods also optionally accept a data argument that is a data.frame (or tibble) to add observation-level information to, returning a tibble object with the same number of rows as data. Many augment() methods also accept a newdata argument, following the same conventions as the data argument, except with the underlying assumption that the model has not “seen” the data yet. As a result, newdata arguments need not contain the response columns in data. Only one of data or newdata should be supplied. A full glossary of acceptable arguments to augment() methods can be found at the end of this article.

    +

    If a data argument is not specified, augment() should try to reconstruct the original data as much as possible from the model object. This may not always be possible, and often it will not be possible to recover columns not used by the model.

    +

    With this is mind, we can look back to our trees_model example. For one, the model element inside of the trees_model object will allow us to recover the original data:

    +
    +
    trees_model$model
    +#>    Volume Girth Height
    +#> 1    10.3   8.3     70
    +#> 2    10.3   8.6     65
    +#> 3    10.2   8.8     63
    +#> 4    16.4  10.5     72
    +#> 5    18.8  10.7     81
    +#> 6    19.7  10.8     83
    +#> 7    15.6  11.0     66
    +#> 8    18.2  11.0     75
    +#> 9    22.6  11.1     80
    +#> 10   19.9  11.2     75
    +#> 11   24.2  11.3     79
    +#> 12   21.0  11.4     76
    +#> 13   21.4  11.4     76
    +#> 14   21.3  11.7     69
    +#> 15   19.1  12.0     75
    +#> 16   22.2  12.9     74
    +#> 17   33.8  12.9     85
    +#> 18   27.4  13.3     86
    +#> 19   25.7  13.7     71
    +#> 20   24.9  13.8     64
    +#> 21   34.5  14.0     78
    +#> 22   31.7  14.2     80
    +#> 23   36.3  14.5     74
    +#> 24   38.3  16.0     72
    +#> 25   42.6  16.3     77
    +#> 26   55.4  17.3     81
    +#> 27   55.7  17.5     82
    +#> 28   58.3  17.9     80
    +#> 29   51.5  18.0     80
    +#> 30   51.0  18.0     80
    +#> 31   77.0  20.6     87
    +
    +

    Similarly, the fitted values and residuals can be accessed with the following code:

    +
    +
    head(trees_model$fitted.values)
    +#>         1         2         3         4         5         6 
    +#>  4.837660  4.553852  4.816981 15.874115 19.869008 21.018327
    +head(trees_model$residuals)
    +#>          1          2          3          4          5          6 
    +#>  5.4623403  5.7461484  5.3830187  0.5258848 -1.0690084 -1.3183270
    +
    +

    As with glance() methods, it’s fine (and encouraged!) to include common metrics associated with observations if they are not computationally intensive to compute. A common metric associated with linear models, for example, is the standard error of fitted values:

    +
    +
    se.fit <- predict(trees_model, newdata = trees, se.fit = TRUE)$se.fit %>%
    +  unname()
    +
    +head(se.fit)
    +#> [1] 1.3211285 1.4893775 1.6325024 0.9444212 1.3484251 1.5319772
    +
    +

    Thus, a reasonable augment() method for lm might look something like this:

    +
    +
    augment.lm <- function(x, data = x$model, newdata = NULL, ...) {
    +  if (is.null(newdata)) {
    +    dplyr::bind_cols(tibble::as_tibble(data),
    +                     tibble::tibble(.fitted = x$fitted.values,
    +                                    .se.fit = predict(x, 
    +                                                      newdata = data, 
    +                                                      se.fit = TRUE)$se.fit,
    +                                   .resid =  x$residuals))
    +  } else {
    +    predictions <- predict(x, newdata = newdata, se.fit = TRUE)
    +    dplyr::bind_cols(tibble::as_tibble(newdata),
    +                     tibble::tibble(.fitted = predictions$fit,
    +                                    .se.fit = predictions$se.fit))
    +  }
    +}
    +
    +

    Some other things to keep in mind while writing augment() methods: * The newdata argument should default to NULL. Users should only ever specify one of data or newdata. Providing both data and newdata should result in an error. The newdata argument should accept both data.frames and tibbles. * Data given to the data argument must have both the original predictors and the original response. Data given to the newdata argument only needs to have the original predictors. This is important because there may be important information associated with training data that is not associated with test data. This means that the original_data object in augment(model, data = original_data) should provide .fitted and .resid columns (in most cases), whereas test_data in augment(model, data = test_data) only needs a .fitted column, even if the response is present in test_data. * If the data or newdata is specified as a data.frame with rownames, augment should return them in a column called .rownames. * For observations where no fitted values or summaries are available (where there’s missing data, for example), return NA. * The augment() method should always return as many rows as were in data or newdata, depending on which is supplied

    +
    +
    +
    + +
    +
    +Note +
    +
    +
    +

    The recommended interface and functionality for augment() methods may change soon.

    +
    +
    +
    +
    +
    +

    Document the new methods

    +

    The only remaining step is to integrate the new methods into the parent package! To do so, just drop the methods into a .R file inside of the /R folder and document them using roxygen2. If you’re unfamiliar with the process of documenting objects, you can read more about it here. Here’s an example of how our tidy.lm() method might be documented:

    +
    +
    #' Tidy a(n) lm object
    +#'
    +#' @param x A `lm` object.
    +#' @param conf.int Logical indicating whether or not to include 
    +#'   a confidence interval in the tidied output. Defaults to FALSE.
    +#' @param conf.level The confidence level to use for the confidence 
    +#'   interval if conf.int = TRUE. Must be strictly greater than 0 
    +#'   and less than 1. Defaults to 0.95, which corresponds to a 
    +#'   95 percent confidence interval.
    +#' @param ... Unused, included for generic consistency only.
    +#' @return A tidy [tibble::tibble()] summarizing component-level
    +#'   information about the model
    +#'
    +#' @examples
    +#' # load the trees dataset
    +#' data(trees)
    +#' 
    +#' # fit a linear model on timber volume
    +#' trees_model <- lm(Volume ~ Girth + Height, data = trees)
    +#'
    +#' # summarize model coefficients in a tidy tibble!
    +#' tidy(trees_model)
    +#'
    +#' @export
    +tidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {
    +
    +  # ... the rest of the function definition goes here!
    +
    +

    Once you’ve documented each of your new methods and executed devtools::document(), you’re done! Congrats on implementing your own broom tidier methods for a new model object!

    +
    +
    +

    Glossaries

    +
    +

    Arguments

    +

    Tidier methods have a standardized set of acceptable argument and output column names. The currently acceptable argument names by tidier method are:

    +
    +
    + +
    + +
    +
    +
    +
    +

    Column Names

    +

    The currently acceptable column names by tidier method are:

    +
    +
    + +
    + +
    +
    +

    The alexpghayes/modeltests package provides unit testing infrastructure to check your new tidier methods. Please file an issue there to request new arguments/columns to be added to the glossaries!

    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  generics   * 0.1.3   2022-07-05 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tidyverse  * 2.0.0   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/develop/metrics/index.html b/docs/learn/develop/metrics/index.html new file mode 100644 index 00000000..6460b986 --- /dev/null +++ b/docs/learn/develop/metrics/index.html @@ -0,0 +1,1161 @@ + + + + + + + + + + +tidymodels - Custom performance metrics + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Custom performance metrics

    +
    +
    developer tools
    +
    +
    + +
    +
    +

    Create a new performance metric and integrate it with yardstick functions.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: rlang and tidymodels.

    +

    The yardstick package already includes a large number of metrics, but there’s obviously a chance that you might have a custom metric that hasn’t been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics.

    +

    Why create custom metrics? With the infrastructure yardstick provides, you get:

    +
      +
    • Standardization between your metric and other preexisting metrics
    • +
    • Automatic error handling for types and lengths
    • +
    • Automatic selection of binary / multiclass metric implementations
    • +
    • Automatic NA handling
    • +
    • Support for grouped data frames
    • +
    • Support for use alongside other metrics in metric_set()
    • +
    +

    The implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of roc_auc() after reading this vignette if you want to work on a class probability metric.

    +
    +
    +

    Numeric example: MSE

    +

    Mean squared error (sometimes MSE or from here on, mse()) is a numeric metric that measures the average of the squared errors. Numeric metrics are generally the simplest to create with yardstick, as they do not have multiclass implementations. The formula for mse() is:

    +

    \[ MSE = \frac{1}{N} \sum_{i=1}^{N} (truth_i - estimate_i) ^ 2 = mean( (truth - estimate) ^ 2) \]

    +

    All metrics should have a data frame version, and a vector version. The data frame version here will be named mse(), and the vector version will be mse_vec().

    +
    +

    Vector implementation

    +

    To start, create the vector version. Generally, all metrics have the same arguments unless the metric requires an extra parameter (such as beta in f_meas()). To create the vector function, you need to do two things:

    +
      +
    1. Create an internal implementation function, mse_impl().
    2. +
    3. Pass on that implementation function to metric_vec_template().
    4. +
    +

    Below, mse_impl() contains the actual implementation of the metric, and takes truth and estimate as arguments along with any metric specific arguments.

    +

    The yardstick function metric_vec_template() accepts the implementation function along with the other arguments to mse_vec() and actually executes mse_impl(). Additionally, it has a cls argument to specify the allowed class type of truth and estimate. If the classes are the same, a single character class can be passed, and if they are different a character vector of length 2 can be supplied.

    +

    The metric_vec_template() helper handles the removal of NA values in your metric, so your implementation function does not have to worry about them. It performs type checking using cls and also checks that the estimator is valid, the second of which is covered in the classification example. This way, all you have to worry about is the core implementation.

    +
    +
    library(tidymodels)
    +
    +mse_vec <- function(truth, estimate, na_rm = TRUE, ...) {
    +  
    +  mse_impl <- function(truth, estimate) {
    +    mean((truth - estimate) ^ 2)
    +  }
    +  
    +  metric_vec_template(
    +    metric_impl = mse_impl,
    +    truth = truth, 
    +    estimate = estimate,
    +    na_rm = na_rm,
    +    cls = "numeric",
    +    ...
    +  )
    +  
    +}
    +
    +

    At this point, you’ve created the vector version of the mean squared error metric.

    +
    +
    data("solubility_test")
    +
    +mse_vec(
    +  truth = solubility_test$solubility, 
    +  estimate = solubility_test$prediction
    +)
    +#> Warning: `metric_vec_template()` was deprecated in yardstick 1.2.0.
    +#> ℹ Please use `check_numeric_metric()`, `check_class_metric()`,
    +#>   `check_class_metric()`, `yardstick_remove_missing()`, and
    +#>   `yardstick_any_missing()` instead.
    +#> [1] 0.5214438
    +
    +

    Intelligent error handling is immediately available.

    +
    +
    mse_vec(truth = "apple", estimate = 1)
    +#> Error in `validate_class()`:
    +#> ! `truth` should be a numeric but a character was supplied.
    +
    +mse_vec(truth = 1, estimate = factor("xyz"))
    +#> Error in `validate_class()`:
    +#> ! `estimate` should be a numeric but a factor was supplied.
    +
    +

    NA values are removed if na_rm = TRUE (the default). If na_rm = FALSE and any NA values are detected, then the metric automatically returns NA.

    +
    +
    # NA values removed
    +mse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5))
    +#> [1] 0.01
    +
    +# NA returned
    +mse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5), na_rm = FALSE)
    +#> [1] NA
    +
    +
    +
    +

    Data frame implementation

    +

    The data frame version of the metric should be fairly simple. It is a generic function with a data.frame method that calls the yardstick helper, metric_summarizer(), and passes along the mse_vec() function to it along with versions of truth and estimate that have been wrapped in rlang::enquo() and then unquoted with !! so that non-standard evaluation can be supported.

    +
    +
    library(rlang)
    +
    +mse <- function(data, ...) {
    +  UseMethod("mse")
    +}
    +
    +mse <- new_numeric_metric(mse, direction = "minimize")
    +
    +mse.data.frame <- function(data, truth, estimate, na_rm = TRUE, ...) {
    +  
    +  metric_summarizer(
    +    metric_nm = "mse",
    +    metric_fn = mse_vec,
    +    data = data,
    +    truth = !! enquo(truth),
    +    estimate = !! enquo(estimate), 
    +    na_rm = na_rm,
    +    ...
    +  )
    +  
    +}
    +
    +

    And that’s it. The yardstick package handles the rest with an internal call to summarise().

    +
    +
    mse(solubility_test, truth = solubility, estimate = prediction)
    +
    +# Error handling
    +mse(solubility_test, truth = solubility, estimate = factor("xyz"))
    +
    +

    Let’s test it out on a grouped data frame.

    +
    +
    library(dplyr)
    +
    +set.seed(1234)
    +size <- 100
    +times <- 10
    +
    +# create 10 resamples
    +solubility_resampled <- bind_rows(
    +  replicate(
    +    n = times,
    +    expr = sample_n(solubility_test, size, replace = TRUE),
    +    simplify = FALSE
    +  ),
    +  .id = "resample"
    +)
    +
    +solubility_resampled %>%
    +  group_by(resample) %>%
    +  mse(solubility, prediction)
    +#> Warning: `metric_summarizer()` was deprecated in yardstick 1.2.0.
    +#> ℹ Please use `numeric_metric_summarizer()`,
    +#>   `class_metric_summarizer()`, `prob_metric_summarizer()`, or
    +#>   `curve_metric_summarizer()` instead.
    +#> # A tibble: 10 × 4
    +#>    resample .metric .estimator .estimate
    +#>    <chr>    <chr>   <chr>          <dbl>
    +#>  1 1        mse     standard       0.512
    +#>  2 10       mse     standard       0.454
    +#>  3 2        mse     standard       0.513
    +#>  4 3        mse     standard       0.414
    +#>  5 4        mse     standard       0.543
    +#>  6 5        mse     standard       0.456
    +#>  7 6        mse     standard       0.652
    +#>  8 7        mse     standard       0.642
    +#>  9 8        mse     standard       0.404
    +#> 10 9        mse     standard       0.479
    +
    +
    +
    +
    +

    Class example: miss rate

    +

    Miss rate is another name for the false negative rate, and is a classification metric in the same family as sens() and spec(). It follows the formula:

    +

    \[ miss\_rate = \frac{FN}{FN + TP} \]

    +

    This metric, like other classification metrics, is more easily computed when expressed as a confusion matrix. As you will see in the example, you can achieve this with a call to base::table(estimate, truth) which correctly puts the “correct” result in the columns of the confusion matrix.

    +

    Classification metrics are more complicated than numeric ones because you have to think about extensions to the multiclass case. For now, let’s start with the binary case.

    +
    +

    Vector implementation

    +

    The vector implementation for classification metrics initially has the same setup as numeric metrics, but has an additional argument, estimator that determines the type of estimator to use (binary or some kind of multiclass implementation or averaging). This argument is auto-selected for the user, so default it to NULL. Additionally, pass it along to metric_vec_template() so that it can check the provided estimator against the classes of truth and estimate to see if they are allowed.

    +
    +
    # Logic for `event_level`
    +event_col <- function(xtab, event_level) {
    +  if (identical(event_level, "first")) {
    +    colnames(xtab)[[1]]
    +  } else {
    +    colnames(xtab)[[2]]
    +  }
    +}
    +
    +miss_rate_vec <- function(truth, 
    +                          estimate, 
    +                          estimator = NULL, 
    +                          na_rm = TRUE, 
    +                          event_level = "first",
    +                          ...) {
    +  estimator <- finalize_estimator(truth, estimator)
    +  
    +  miss_rate_impl <- function(truth, estimate) {
    +    # Create 
    +    xtab <- table(estimate, truth)
    +    col <- event_col(xtab, event_level)
    +    col2 <- setdiff(colnames(xtab), col)
    +    
    +    tp <- xtab[col, col]
    +    fn <- xtab[col2, col]
    +    
    +    fn / (fn + tp)
    +  }
    +  
    +  metric_vec_template(
    +    metric_impl = miss_rate_impl,
    +    truth = truth,
    +    estimate = estimate,
    +    na_rm = na_rm,
    +    cls = "factor",
    +    estimator = estimator,
    +    ...
    +  )
    +}
    +
    +

    Another change from the numeric metric is that a call to finalize_estimator() is made. This is the infrastructure that auto-selects the type of estimator to use.

    +
    +
    data("two_class_example")
    +miss_rate_vec(two_class_example$truth, two_class_example$predicted)
    +#> [1] 0.120155
    +
    +

    What happens if you try and pass in a multiclass result?

    +
    +
    data("hpc_cv")
    +fold1 <- filter(hpc_cv, Resample == "Fold01")
    +miss_rate_vec(fold1$obs, fold1$pred)
    +#>          F          M          L 
    +#> 0.06214689 0.00000000 0.00000000
    +
    +

    This isn’t great, as currently multiclass miss_rate() isn’t supported and it would have been better to throw an error if the estimator was not "binary". Currently, finalize_estimator() uses its default implementation which selected "macro" as the estimator since truth was a factor with more than 2 classes. When we implement multiclass averaging, this is what you want, but if your metric only works with a binary implementation (or has other specialized multiclass versions), you might want to guard against this.

    +

    To fix this, a generic counterpart to finalize_estimator(), called finalize_estimator_internal(), exists that helps you restrict the input types. If you provide a method to finalize_estimator_internal() where the method name is the same as your metric name, and then set the metric_class argument in finalize_estimator() to be the same thing, you can control how the auto-selection of the estimator is handled.

    +

    Don’t worry about the metric_dispatcher argument. This is handled for you and just exists as a dummy argument to dispatch off of.

    +

    It is also good practice to call validate_estimator() which handles the case where a user passed in the estimator themselves. This validates that the supplied estimator is one of the allowed types and error otherwise.

    +
    +
    finalize_estimator_internal.miss_rate <- function(metric_dispatcher, x, estimator) {
    +  
    +  validate_estimator(estimator, estimator_override = "binary")
    +  if (!is.null(estimator)) {
    +    return(estimator)
    +  }
    +  
    +  lvls <- levels(x)
    +  if (length(lvls) > 2) {
    +    stop("A multiclass `truth` input was provided, but only `binary` is supported.")
    +  } 
    +  "binary"
    +}
    +
    +miss_rate_vec <- function(truth, 
    +                          estimate, 
    +                          estimator = NULL, 
    +                          na_rm = TRUE, 
    +                          event_level = "first",
    +                          ...) {
    +  # calls finalize_estimator_internal() internally
    +  estimator <- finalize_estimator(truth, estimator, metric_class = "miss_rate")
    +  
    +  miss_rate_impl <- function(truth, estimate) {
    +    # Create 
    +    xtab <- table(estimate, truth)
    +    col <- event_col(xtab, event_level)
    +    col2 <- setdiff(colnames(xtab), col)
    +    
    +    tp <- xtab[col, col]
    +    fn <- xtab[col2, col]
    +    
    +    fn / (fn + tp)
    +    
    +  }
    +  
    +  metric_vec_template(
    +    metric_impl = miss_rate_impl,
    +    truth = truth,
    +    estimate = estimate,
    +    na_rm = na_rm,
    +    cls = "factor",
    +    estimator = estimator,
    +    ...
    +  )
    +}
    +
    +# Error thrown by our custom handler
    +# miss_rate_vec(fold1$obs, fold1$pred)
    +
    +# Error thrown by validate_estimator()
    +# miss_rate_vec(fold1$obs, fold1$pred, estimator = "macro")
    +
    +
    +
    +

    Supporting multiclass miss rate

    +

    Like many other classification metrics such as precision() or recall(), miss rate does not have a natural multiclass extension, but one can be created using methods such as macro, weighted macro, and micro averaging. If you have not, I encourage you to read vignette("multiclass", "yardstick") for more information about how these methods work.

    +

    Generally, they require more effort to get right than the binary case, especially if you want to have a performant version. Luckily, a somewhat standard template is used in yardstick and can be used here as well.

    +

    Let’s first remove the “binary” restriction we created earlier.

    +
    +
    rm(finalize_estimator_internal.miss_rate)
    +
    +

    The main changes below are:

    +
      +
    • The binary implementation is moved to miss_rate_binary().

    • +
    • miss_rate_estimator_impl() is a helper function for switching between binary and multiclass implementations. It also applies the weighting required for multiclass estimators. It is called from miss_rate_impl() and also accepts the estimator argument using R’s function scoping rules.

    • +
    • miss_rate_multiclass() provides the implementation for the multiclass case. It calculates the true positive and false negative values as vectors with one value per class. For the macro case, it returns a vector of miss rate calculations, and for micro, it first sums the individual pieces and returns a single miss rate calculation. In the macro case, the vector is then weighted appropriately in miss_rate_estimator_impl() depending on whether or not it was macro or weighted macro.

    • +
    +
    +
    miss_rate_vec <- function(truth, 
    +                          estimate, 
    +                          estimator = NULL, 
    +                          na_rm = TRUE, 
    +                          event_level = "first",
    +                          ...) {
    +  # calls finalize_estimator_internal() internally
    +  estimator <- finalize_estimator(truth, estimator, metric_class = "miss_rate")
    +  
    +  miss_rate_impl <- function(truth, estimate) {
    +    xtab <- table(estimate, truth)
    +    # Rather than implement the actual method here, we rely on
    +    # an *_estimator_impl() function that can handle binary
    +    # and multiclass cases
    +    miss_rate_estimator_impl(xtab, estimator, event_level)
    +  }
    +  
    +  metric_vec_template(
    +    metric_impl = miss_rate_impl,
    +    truth = truth,
    +    estimate = estimate,
    +    na_rm = na_rm,
    +    cls = "factor",
    +    estimator = estimator,
    +    ...
    +  )
    +}
    +
    +
    +# This function switches between binary and multiclass implementations
    +miss_rate_estimator_impl <- function(data, estimator, event_level) {
    +  if(estimator == "binary") {
    +    miss_rate_binary(data, event_level)
    +  } else {
    +    # Encapsulates the macro, macro weighted, and micro cases
    +    wt <- get_weights(data, estimator)
    +    res <- miss_rate_multiclass(data, estimator)
    +    weighted.mean(res, wt)
    +  }
    +}
    +
    +
    +miss_rate_binary <- function(data, event_level) {
    +  col <- event_col(data, event_level)
    +  col2 <- setdiff(colnames(data), col)
    +  
    +  tp <- data[col, col]
    +  fn <- data[col2, col]
    +  
    +  fn / (fn + tp)
    +}
    +
    +miss_rate_multiclass <- function(data, estimator) {
    +  
    +  # We need tp and fn for all classes individually
    +  # we can get this by taking advantage of the fact
    +  # that tp + fn = colSums(data)
    +  tp <- diag(data)
    +  tpfn <- colSums(data)
    +  fn <- tpfn - tp
    +  
    +  # If using a micro estimator, we sum the individual
    +  # pieces before performing the miss rate calculation
    +  if (estimator == "micro") {
    +    tp <- sum(tp)
    +    fn <- sum(fn)
    +  }
    +  
    +  # return the vector 
    +  tp / (tp + fn)
    +}
    +
    +

    For the macro case, this separation of weighting from the core implementation might seem strange, but there is good reason for it. Some metrics are combinations of other metrics, and it is nice to be able to reuse code when calculating more complex metrics. For example, f_meas() is a combination of recall() and precision(). When calculating a macro averaged f_meas(), the weighting must be applied 1 time, at the very end of the calculation. recall_multiclass() and precision_multiclass() are defined similarly to how miss_rate_multiclass() is defined and returns the unweighted vector of calculations. This means we can directly use this in f_meas(), and then weight everything once at the end of that calculation.

    +

    Let’s try it out now:

    +
    +
    # two class
    +miss_rate_vec(two_class_example$truth, two_class_example$predicted)
    +#> [1] 0.120155
    +
    +# multiclass
    +miss_rate_vec(fold1$obs, fold1$pred)
    +#> [1] 0.5483506
    +
    +
    +

    Data frame implementation

    +

    Luckily, the data frame implementation is as simple as the numeric case, we just need to add an extra estimator argument and pass that through.

    +
    +
    miss_rate <- function(data, ...) {
    +  UseMethod("miss_rate")
    +}
    +
    +miss_rate <- new_class_metric(miss_rate, direction = "minimize")
    +
    +miss_rate.data.frame <- function(data, 
    +                                 truth, 
    +                                 estimate, 
    +                                 estimator = NULL, 
    +                                 na_rm = TRUE, 
    +                                 event_level = "first",
    +                                 ...) {
    +  metric_summarizer(
    +    metric_nm = "miss_rate",
    +    metric_fn = miss_rate_vec,
    +    data = data,
    +    truth = !! enquo(truth),
    +    estimate = !! enquo(estimate), 
    +    estimator = estimator,
    +    na_rm = na_rm,
    +    event_level = event_level,
    +    ...
    +  )
    +}
    +
    +
    +
    # Macro weighted automatically selected
    +fold1 %>%
    +  miss_rate(obs, pred)
    +
    +# Switch to micro
    +fold1 %>%
    +  miss_rate(obs, pred, estimator = "micro")
    +
    +# Macro weighted by resample
    +hpc_cv %>%
    +  group_by(Resample) %>%
    +  miss_rate(obs, pred, estimator = "macro_weighted")
    +
    +# Error handling
    +miss_rate(hpc_cv, obs, VF)
    +
    +
    +
    +
    +
    +

    Using custom metrics

    +

    The metric_set() function validates that all metric functions are of the same metric type by checking the class of the function. If any metrics are not of the right class, metric_set() fails. By using new_numeric_metric() and new_class_metric() in the above custom metrics, they work out of the box without any additional adjustments.

    +
    +
    numeric_mets <- metric_set(mse, rmse)
    +
    +numeric_mets(solubility_test, solubility, prediction)
    +#> # A tibble: 2 × 3
    +#>   .metric .estimator .estimate
    +#>   <chr>   <chr>          <dbl>
    +#> 1 mse     standard       0.521
    +#> 2 rmse    standard       0.722
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang      * 1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/develop/models/index.html b/docs/learn/develop/models/index.html new file mode 100644 index 00000000..28bd8962 --- /dev/null +++ b/docs/learn/develop/models/index.html @@ -0,0 +1,1365 @@ + + + + + + + + + + +tidymodels - How to build a parsnip model + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    How to build a parsnip model

    +
    +
    developer tools
    +
    +
    + +
    +
    +

    Create a parsnip model function from an existing model implementation.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: mda, modeldata, and tidymodels.

    +

    The parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this:

    +
      +
    • It eliminates a lot of duplicate code.
    • +
    • Since the expressions are not evaluated until fitting, it eliminates many package dependencies.
    • +
    +

    A parsnip model function is itself very general. For example, the logistic_reg() function itself doesn’t have any model code within it. Instead, each model function is associated with one or more computational engines. These might be different R packages or some function in another language (that can be evaluated by R).

    +

    This article describes the process of creating a new model function. Before proceeding, take a minute and read our guidelines on creating modeling packages to understand the general themes and conventions that we use.

    +
    +
    +

    An example model

    +

    As an example, we’ll create a function for mixture discriminant analysis. There are a few packages that implement this but we’ll focus on mda::mda:

    +
    +
    str(mda::mda)
    +#> function (formula = formula(data), data = sys.frame(sys.parent()), subclasses = 3, 
    +#>     sub.df = NULL, tot.df = NULL, dimension = sum(subclasses) - 1, eps = 100 * 
    +#>         .Machine$double.eps, iter = 5, weights = mda.start(x, g, subclasses, 
    +#>         trace, ...), method = polyreg, keep.fitted = (n * dimension < 5000), 
    +#>     trace = FALSE, ...)
    +
    +

    The main hyperparameter is the number of subclasses. We’ll name our function discrim_mixture.

    +
    +
    +

    Aspects of models

    +

    Before proceeding, it helps to to review how parsnip categorizes models:

    +
      +
    • The model type is related to the structural aspect of the model. For example, the model type linear_reg represents linear models (slopes and intercepts) that model a numeric outcome. Other model types in the package are nearest_neighbor, decision_tree, and so on.

    • +
    • Within a model type is the mode, related to the modeling goal. Currently the two modes in the package are regression and classification. Some models have methods for both models (e.g. nearest neighbors) while others have only a single mode (e.g. logistic regression).

    • +
    • The computation engine is a combination of the estimation method and the implementation. For example, for linear regression, one engine is "lm" which uses ordinary least squares analysis via the lm() function. Another engine is "stan" which uses the Stan infrastructure to estimate parameters using Bayes rule.

    • +
    +

    When adding a model into parsnip, the user has to specify which modes and engines are used. The package also enables users to add a new mode or engine to an existing model.

    +
    +
    +

    The general process

    +

    The parsnip package stores information about the models in an internal environment object. The environment can be accessed via the function get_model_env(). The package includes a variety of functions that can get or set the different aspects of the models.

    +

    If you are adding a new model from your own package, you can use these functions to add new entries into the model environment.

    +
    +

    Step 1. Register the model, modes, and arguments

    +

    We will add the MDA model using the model type discrim_mixture. Since this is a classification method, we only have to register a single mode:

    +
    +
    library(tidymodels)
    +set_new_model("discrim_mixture")
    +set_model_mode(model = "discrim_mixture", mode = "classification")
    +set_model_engine(
    +  "discrim_mixture", 
    +  mode = "classification", 
    +  eng = "mda"
    +)
    +set_dependency("discrim_mixture", eng = "mda", pkg = "mda")
    +
    +

    These functions should silently finish. There is also a function that can be used to show what aspects of the model have been added to parsnip:

    +
    +
    show_model_info("discrim_mixture")
    +#> Information for `discrim_mixture`
    +#>  modes: unknown, classification 
    +#> 
    +#>  engines: 
    +#>    classification: mdaNA
    +#> 
    +#> ¹The model can use case weights.
    +#> 
    +#>  no registered arguments.
    +#> 
    +#>  no registered fit modules.
    +#> 
    +#>  no registered prediction modules.
    +
    +

    The next step would be to declare the main arguments to the model. These are declared independent of the mode. To specify the argument, there are a few slots to fill in:

    +
      +
    • The name that parsnip uses for the argument. In general, we try to use non-jargony names for arguments (e.g. “penalty” instead of “lambda” for regularized regression). We recommend consulting the model argument table available here to see if an existing argument name can be used before creating a new one.

    • +
    • The argument name that is used by the underlying modeling function.

    • +
    • A function reference for a constructor that will be used to generate tuning parameter values. This should be a character vector with a named element called fun that is the constructor function. There is an optional element pkg that can be used to call the function using its namespace. If referencing functions from the dials package, quantitative parameters can have additional arguments in the list for trans and range while qualitative parameters can pass values via this list.

    • +
    • A logical value for whether the argument can be used to generate multiple predictions for a single R object. For example, for boosted trees, if a model is fit with 10 boosting iterations, many modeling packages allow the model object to make predictions for any iterations less than the one used to fit the model. In general this is not the case so one would use has_submodels = FALSE.

    • +
    +

    For mda::mda(), the main tuning parameter is subclasses which we will rewrite as sub_classes.

    +
    +
    set_model_arg(
    +  model = "discrim_mixture",
    +  eng = "mda",
    +  parsnip = "sub_classes",
    +  original = "subclasses",
    +  func = list(pkg = "foo", fun = "bar"),
    +  has_submodel = FALSE
    +)
    +show_model_info("discrim_mixture")
    +#> Information for `discrim_mixture`
    +#>  modes: unknown, classification 
    +#> 
    +#>  engines: 
    +#>    classification: mdaNA
    +#> 
    +#> ¹The model can use case weights.
    +#> 
    +#>  arguments: 
    +#>    mda: 
    +#>       sub_classes --> subclasses
    +#> 
    +#>  no registered fit modules.
    +#> 
    +#>  no registered prediction modules.
    +
    +
    +
    +

    Step 2. Create the model function

    +

    This is a fairly simple function that can follow a basic template. The main arguments to our function will be:

    +
      +
    • The mode. If the model can do more than one mode, you might default this to “unknown”. In our case, since it is only a classification model, it makes sense to default it to that mode so that the users won’t have to specify it.

    • +
    • The argument names (sub_classes here). These should be defaulted to NULL.

    • +
    +

    A basic version of the function is:

    +
    +
    discrim_mixture <-
    +  function(mode = "classification",  sub_classes = NULL) {
    +    # Check for correct mode
    +    if (mode  != "classification") {
    +      rlang::abort("`mode` should be 'classification'")
    +    }
    +    
    +    # Capture the arguments in quosures
    +    args <- list(sub_classes = rlang::enquo(sub_classes))
    +    
    +    # Save some empty slots for future parts of the specification
    +    new_model_spec(
    +      "discrim_mixture",
    +      args = args,
    +      eng_args = NULL,
    +      mode = mode,
    +      method = NULL,
    +      engine = NULL
    +    )
    +  }
    +
    +

    This is pretty simple since the data are not exposed to this function.

    +
    +
    +
    + +
    +
    +Warning +
    +
    +
    +

    We strongly suggest favoring rlang::abort() and rlang::warn() over stop() and warning(). The former return better traceback results and have safer defaults for handling call objects.

    +
    +
    +
    +
    +

    Step 3. Add a fit module

    +

    Now that parsnip knows about the model, mode, and engine, we can give it the information on fitting the model for our engine. The information needed to fit the model is contained in another list. The elements are:

    +
      +
    • interface is a single character value that could be “formula”, “data.frame”, or “matrix”. This defines the type of interface used by the underlying fit function (mda::mda, in this case). This helps the translation of the data to be in an appropriate format for the that function.

    • +
    • protect is an optional list of function arguments that should not be changeable by the user. In this case, we probably don’t want users to pass data values to these arguments (until the fit() function is called).

    • +
    • func is the package and name of the function that will be called. If you are using a locally defined function, only fun is required.

    • +
    • defaults is an optional list of arguments to the fit function that the user can change, but whose defaults can be set here. This isn’t needed in this case, but is described later in this document.

    • +
    +

    For the first engine:

    +
    +
    set_fit(
    +  model = "discrim_mixture",
    +  eng = "mda",
    +  mode = "classification",
    +  value = list(
    +    interface = "formula",
    +    protect = c("formula", "data"),
    +    func = c(pkg = "mda", fun = "mda"),
    +    defaults = list()
    +  )
    +)
    +
    +show_model_info("discrim_mixture")
    +#> Information for `discrim_mixture`
    +#>  modes: unknown, classification 
    +#> 
    +#>  engines: 
    +#>    classification: mda
    +#> 
    +#> ¹The model can use case weights.
    +#> 
    +#>  arguments: 
    +#>    mda: 
    +#>       sub_classes --> subclasses
    +#> 
    +#>  fit modules:
    +#>  engine           mode
    +#>     mda classification
    +#> 
    +#>  no registered prediction modules.
    +
    +

    We also set up the information on how the predictors should be handled. These options ensure that the data that parsnip gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly.

    +
      +
    • predictor_indicators describes whether and how to create indicator/dummy variables from factor predictors. There are three options: "none" (do not expand factor predictors), "traditional" (apply the standard model.matrix() encodings), and "one_hot" (create the complete set including the baseline level for all factors).

    • +
    • compute_intercept controls whether model.matrix() should include the intercept in its formula. This affects more than the inclusion of an intercept column. With an intercept, model.matrix() computes dummy variables for all but one factor level. Without an intercept, model.matrix() computes a full set of indicators for the first factor variable, but an incomplete set for the remainder.

    • +
    • remove_intercept removes the intercept column after model.matrix() is finished. This can be useful if the model function (e.g. lm()) automatically generates an intercept.

    • +
    • allow_sparse_x specifies whether the model can accommodate a sparse representation for predictors during fitting and tuning.

    • +
    +
    +
    set_encoding(
    +  model = "discrim_mixture",
    +  eng = "mda",
    +  mode = "classification",
    +  options = list(
    +    predictor_indicators = "traditional",
    +    compute_intercept = TRUE,
    +    remove_intercept = TRUE,
    +    allow_sparse_x = FALSE
    +  )
    +)
    +
    +
    +
    +

    Step 4. Add modules for prediction

    +

    Similar to the fitting module, we specify the code for making different types of predictions. To make hard class predictions, the class object contains the details. The elements of the list are:

    +
      +
    • pre and post are optional functions that can preprocess the data being fed to the prediction code and to postprocess the raw output of the predictions. These won’t be needed for this example, but a section below has examples of how these can be used when the model code is not easy to use. If the data being predicted has a simple type requirement, you can avoid using a pre function with the args below.
    • +
    • func is the prediction function (in the same format as above). In many cases, packages have a predict method for their model’s class but this is typically not exported. In this case (and the example below), it is simple enough to make a generic call to predict() with no associated package.
    • +
    • args is a list of arguments to pass to the prediction function. These will most likely be wrapped in rlang::expr() so that they are not evaluated when defining the method. For mda, the code would be predict(object, newdata, type = "class"). What is actually given to the function is the parsnip model fit object, which includes a sub-object called fit() that houses the mda model object. If the data need to be a matrix or data frame, you could also use newdata = quote(as.data.frame(newdata)) or similar.
    • +
    +

    The parsnip prediction code will expect the result to be an unnamed character string or factor. This will be coerced to a factor with the same levels as the original data.

    +

    To add this method to the model environment, a similar set() function is used:

    +
    +
    class_info <- 
    +  list(
    +    pre = NULL,
    +    post = NULL,
    +    func = c(fun = "predict"),
    +    args =
    +      # These lists should be of the form:
    +      # {predict.mda argument name} = {values provided from parsnip objects}
    +      list(
    +        # We don't want the first two arguments evaluated right now
    +        # since they don't exist yet. `type` is a simple object that
    +        # doesn't need to have its evaluation deferred. 
    +        object = quote(object$fit),
    +        newdata = quote(new_data),
    +        type = "class"
    +      )
    +  )
    +
    +set_pred(
    +  model = "discrim_mixture",
    +  eng = "mda",
    +  mode = "classification",
    +  type = "class",
    +  value = class_info
    +)
    +
    +

    A similar call can be used to define the class probability module (if they can be computed). The format is identical to the class module but the output is expected to be a tibble with columns for each factor level.

    +

    As an example of the post function, the data frame created by mda:::predict.mda() will be converted to a tibble. The arguments are x (the raw results coming from the predict method) and object (the parsnip model fit object). The latter has a sub-object called lvl which is a character string of the outcome’s factor levels (if any).

    +

    We register the probability module. There is a template function that makes this slightly easier to format the objects:

    +
    +
    prob_info <-
    +  pred_value_template(
    +    post = function(x, object) {
    +      tibble::as_tibble(x)
    +    },
    +    func = c(fun = "predict"),
    +    # Now everything else is put into the `args` slot
    +    object = quote(object$fit),
    +    newdata = quote(new_data),
    +    type = "posterior"
    +  )
    +
    +set_pred(
    +  model = "discrim_mixture",
    +  eng = "mda",
    +  mode = "classification",
    +  type = "prob",
    +  value = prob_info
    +)
    +
    +show_model_info("discrim_mixture")
    +#> Information for `discrim_mixture`
    +#>  modes: unknown, classification 
    +#> 
    +#>  engines: 
    +#>    classification: mda
    +#> 
    +#> ¹The model can use case weights.
    +#> 
    +#>  arguments: 
    +#>    mda: 
    +#>       sub_classes --> subclasses
    +#> 
    +#>  fit modules:
    +#>  engine           mode
    +#>     mda classification
    +#> 
    +#>  prediction modules:
    +#>              mode engine     methods
    +#>    classification    mda class, prob
    +
    +

    If this model could be used for regression situations, we could also add a “numeric” module. For pred, the model requires an unnamed numeric vector output (usually).

    +

    Examples are here and here.

    +
    +
    +

    Does it work?

    +

    As a developer, one thing that may come in handy is the translate() function. This will tell you what the model’s eventual syntax will be.

    +

    For example:

    +
    +
    discrim_mixture(sub_classes = 2) %>%
    +  translate(engine = "mda")
    +#> discrim mixture Model Specification (classification)
    +#> 
    +#> Main Arguments:
    +#>   sub_classes = 2
    +#> 
    +#> Computational engine: mda 
    +#> 
    +#> Model fit template:
    +#> mda::mda(formula = missing_arg(), data = missing_arg(), subclasses = 2)
    +
    +

    Let’s try it on a data set from the modeldata package:

    +
    +
    data("two_class_dat", package = "modeldata")
    +set.seed(4622)
    +example_split <- initial_split(two_class_dat, prop = 0.99)
    +example_train <- training(example_split)
    +example_test  <-  testing(example_split)
    +
    +mda_spec <- discrim_mixture(sub_classes = 2) %>% 
    +  set_engine("mda")
    +
    +mda_fit <- mda_spec %>%
    +  fit(Class ~ ., data = example_train, engine = "mda")
    +mda_fit
    +#> parsnip model object
    +#> 
    +#> Call:
    +#> mda::mda(formula = Class ~ ., data = data, subclasses = ~2)
    +#> 
    +#> Dimension: 2 
    +#> 
    +#> Percent Between-Group Variance Explained:
    +#>     v1     v2 
    +#>  82.63 100.00 
    +#> 
    +#> Degrees of Freedom (per dimension): 3 
    +#> 
    +#> Training Misclassification Error: 0.17241 ( N = 783 )
    +#> 
    +#> Deviance: 671.391
    +
    +predict(mda_fit, new_data = example_test, type = "prob") %>%
    +  bind_cols(example_test %>% select(Class))
    +#> # A tibble: 8 × 3
    +#>   .pred_Class1 .pred_Class2 Class 
    +#>          <dbl>        <dbl> <fct> 
    +#> 1       0.679         0.321 Class1
    +#> 2       0.690         0.310 Class1
    +#> 3       0.384         0.616 Class2
    +#> 4       0.300         0.700 Class1
    +#> 5       0.0262        0.974 Class2
    +#> 6       0.405         0.595 Class2
    +#> 7       0.793         0.207 Class1
    +#> 8       0.0949        0.905 Class2
    +
    +predict(mda_fit, new_data = example_test) %>% 
    + bind_cols(example_test %>% select(Class))
    +#> # A tibble: 8 × 2
    +#>   .pred_class Class 
    +#>   <fct>       <fct> 
    +#> 1 Class1      Class1
    +#> 2 Class1      Class1
    +#> 3 Class2      Class2
    +#> 4 Class2      Class1
    +#> 5 Class2      Class2
    +#> 6 Class2      Class2
    +#> 7 Class1      Class1
    +#> 8 Class2      Class2
    +
    +
    +
    +
    +

    Add an engine

    +

    The process for adding an engine to an existing model is almost the same as building a new model but simpler with fewer steps. You only need to add the engine-specific aspects of the model. For example, if we wanted to fit a linear regression model using M-estimation, we could only add a new engine. The code for the rlm() function in MASS is pretty similar to lm(), so we can copy that code and change the package/function names:

    +
    +
    set_model_engine("linear_reg", "regression", eng = "rlm")
    +set_dependency("linear_reg", eng = "rlm", pkg = "MASS")
    +
    +set_fit(
    +  model = "linear_reg",
    +  eng = "rlm",
    +  mode = "regression",
    +  value = list(
    +    interface = "formula",
    +    protect = c("formula", "data", "weights"),
    +    func = c(pkg = "MASS", fun = "rlm"),
    +    defaults = list()
    +  )
    +)
    +
    +set_encoding(
    +  model = "linear_reg",
    +  eng = "rlm",
    +  mode = "regression",
    +  options = list(
    +    predictor_indicators = "traditional",
    +    compute_intercept = TRUE,
    +    remove_intercept = TRUE,
    +    allow_sparse_x = FALSE
    +  )
    +)
    +
    +set_pred(
    +  model = "linear_reg",
    +  eng = "rlm",
    +  mode = "regression",
    +  type = "numeric",
    +  value = list(
    +    pre = NULL,
    +    post = NULL,
    +    func = c(fun = "predict"),
    +    args =
    +      list(
    +        object = expr(object$fit),
    +        newdata = expr(new_data),
    +        type = "response"
    +      )
    +  )
    +)
    +
    +# testing:
    +linear_reg() %>% 
    +  set_engine("rlm") %>% 
    +  fit(mpg ~ ., data = mtcars)
    +#> parsnip model object
    +#> 
    +#> Call:
    +#> rlm(formula = mpg ~ ., data = data)
    +#> Converged in 8 iterations
    +#> 
    +#> Coefficients:
    +#> (Intercept)         cyl        disp          hp        drat          wt 
    +#> 17.82250038 -0.27878615  0.01593890 -0.02536343  0.46391132 -4.14355431 
    +#>        qsec          vs          am        gear        carb 
    +#>  0.65307203  0.24975463  1.43412689  0.85943158 -0.01078897 
    +#> 
    +#> Degrees of freedom: 32 total; 21 residual
    +#> Scale estimate: 2.15
    +
    +
    +
    +

    Add parsnip models to another package

    +

    The process here is almost the same. All of the previous functions are still required but their execution is a little different.

    +

    For parsnip to register them, that package must already be loaded. For this reason, it makes sense to have parsnip in the “Depends” category.

    +

    The first difference is that the functions that define the model must be inside of a wrapper function that is called when your package is loaded. For our example here, this might look like:

    +
    +
    make_discrim_mixture_mda <- function() {
    +  parsnip::set_new_model("discrim_mixture")
    +
    +  parsnip::set_model_mode("discrim_mixture", "classification")
    +
    +  # and so one...
    +}
    +
    +

    This function is then executed when your package is loaded:

    +
    +
    .onLoad <- function(libname, pkgname) {
    +  # This defines discrim_mixture in the model database
    +  make_discrim_mixture_mda()
    +}
    +
    +

    For an example package that uses parsnip definitions, take a look at the discrim package.

    +
    +
    +
    + +
    +
    +Warning +
    +
    +
    +

    To use a new model and/or engine in the broader tidymodels infrastructure, we recommend your model definition declarations (e.g. set_new_model() and similar) reside in a package. If these definitions are in a script only, the new model may not work with the tune package, for example for parallel processing.

    +
    +
    +

    It is also important for parallel processing support to list the home package as a dependency. If the discrim_mixture() function lived in a package called mixedup, include the line:

    +
    set_dependency("discrim_mixture", eng = "mda", pkg = "mixedup")
    +

    Parallel processing requires this explicit dependency setting. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux will load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a parsnip model is not loaded in the worker processes, the model will not have an entry in parsnip’s internal database (and produce an error).

    +
    +
    +

    Your model, tuning parameters, and you

    +

    The tune package can be used to find reasonable values of model arguments via tuning. There are some S3 methods that are useful to define for your model. discrim_mixture() has one main tuning parameter: sub_classes. To work with tune it is helpful (but not required) to use an S3 method called tunable() to define which arguments should be tuned and how values of those arguments should be generated.

    +

    tunable() takes the model specification as its argument and returns a tibble with columns:

    +
      +
    • name: The name of the argument.

    • +
    • call_info: A list that describes how to call a function that returns a dials parameter object.

    • +
    • source: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just "model_spec".

    • +
    • component: A character string with more information about the source. For models, this is just the name of the function (e.g. "discrim_mixture").

    • +
    • component_id: A character string to indicate where a unique identifier is for the object. For a model, this is indicates the type of model argument (e.g. “main”).

    • +
    +

    The main piece of information that requires some detail is call_info. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object.

    +

    For example, for a nearest-neighbors neighbors parameter, this value is just:

    +
    +
    info <- list(pkg = "dials", fun = "neighbors")
    +
    +# FYI: how it is used under-the-hood: 
    +new_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)
    +rlang::eval_tidy(new_param_call)
    +#> # Nearest Neighbors (quantitative)
    +#> Range: [1, 10]
    +
    +

    For discrim_mixture(), a dials object is needed that returns an integer that is the number of sub-classes that should be create. We can create a dials parameter function for this:

    +
    +
    sub_classes <- function(range = c(1L, 10L), trans = NULL) {
    +  new_quant_param(
    +    type = "integer",
    +    range = range,
    +    inclusive = c(TRUE, TRUE),
    +    trans = trans,
    +    label = c(sub_classes = "# Sub-Classes"),
    +    finalize = NULL
    +  )
    +}
    +
    +

    If this were in the dials package, we could use:

    +
    +
    tunable.discrim_mixture <- function(x, ...) {
    +  tibble::tibble(
    +    name = c("sub_classes"),
    +    call_info = list(list(pkg = NULL, fun = "sub_classes")),
    +    source = "model_spec",
    +    component = "discrim_mixture",
    +    component_id = "main"
    +  )
    +}
    +
    +

    Once this method is in place, the tuning functions can be used:

    +
    +
    mda_spec <- 
    +  discrim_mixture(sub_classes = tune()) %>% 
    +  set_engine("mda")
    +
    +set.seed(452)
    +cv <- vfold_cv(example_train)
    +mda_tune_res <- mda_spec %>%
    +  tune_grid(Class ~ ., cv, grid = 4)
    +show_best(mda_tune_res, metric = "roc_auc")
    +#> # A tibble: 4 × 7
    +#>   sub_classes .metric .estimator  mean     n std_err .config             
    +#>         <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>               
    +#> 1           2 roc_auc binary     0.890    10  0.0143 Preprocessor1_Model3
    +#> 2           3 roc_auc binary     0.889    10  0.0142 Preprocessor1_Model4
    +#> 3           6 roc_auc binary     0.884    10  0.0147 Preprocessor1_Model2
    +#> 4           8 roc_auc binary     0.881    10  0.0146 Preprocessor1_Model1
    +
    +
    +
    +

    Pro-tips, what-ifs, exceptions, FAQ, and minutiae

    +

    There are various things that came to mind while developing this resource.

    +

    Do I have to return a simple vector for predict and predict_class?

    +

    Previously, when discussing the pred information:

    +
    +

    For pred, the model requires an unnamed numeric vector output (usually).

    +
    +

    There are some models (e.g. glmnet, plsr, Cubist, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so, for these cases, the current convention is to return a tibble with the prediction in a column called values and have extra columns for any parameters that define the different sub-models.

    +

    For example, if I fit a linear regression model via glmnet and get four values of the regularization parameter (lambda):

    +
    +
    linear_reg() %>%
    +  set_engine("glmnet", nlambda = 4) %>% 
    +  fit(mpg ~ ., data = mtcars) %>%
    +  multi_predict(new_data = mtcars[1:3, -1])
    +
    +

    However, the API is still being developed. Currently, there is not an interface in the prediction functions to pass in the values of the parameters to make predictions with (lambda, in this case).

    +

    What do I do about how my model handles factors or categorical data?

    +

    Some modeling functions in R create indicator/dummy variables from categorical data when you use a model formula (typically using model.matrix()), and some do not. Some examples of models that do not create indicator variables include tree-based models, naive Bayes models, and multilevel or hierarchical models. The tidymodels ecosystem assumes a model.matrix()-like default encoding for categorical data used in a model formula, but you can change this encoding using set_encoding(). For example, you can set predictor encodings that say, “leave my data alone,” and keep factors as is:

    +
    +
    set_encoding(
    +  model = "decision_tree",
    +  eng = "rpart",
    +  mode = "regression",
    +  options = list(
    +    predictor_indicators = "none",
    +    compute_intercept = FALSE,
    +    remove_intercept = FALSE
    +  )
    +)
    +
    +
    +
    +
    + +
    +
    +Note +
    +
    +
    +

    There are three options for predictor_indicators: - “none” (do not expand factor predictors) - “traditional” (apply the standard model.matrix() encoding) - “one_hot” (create the complete set including the baseline level for all factors)

    +
    +
    +

    To learn more about encoding categorical predictors, check out this blog post.

    +

    What is the defaults slot and why do I need it?

    +

    You might want to set defaults that can be overridden by the user. For example, for logistic regression with glm, it make sense to default family = binomial. However, if someone wants to use a different link function, they should be able to do that. For that model/engine definition, it has:

    +
    +
    defaults = list(family = expr(binomial))
    +
    +

    So that is the default:

    +
    +
    logistic_reg() %>% translate(engine = "glm")
    +
    +# but you can change it:
    +
    +logistic_reg() %>%
    +  set_engine("glm", family = expr(binomial(link = "probit"))) %>% 
    +  translate()
    +
    +

    That’s what defaults are for.

    +

    Note that we wrapped binomial inside of expr(). If we didn’t, it would substitute the results of executing binomial() inside of the expression (and that’s a mess).

    +

    What if I want more complex defaults?

    +

    The translate function can be used to check values or set defaults once the model’s mode is known. To do this, you can create a model-specific S3 method that first calls the general method (translate.model_spec()) and then makes modifications or conducts error traps.

    +

    For example, the ranger and randomForest package functions have arguments for calculating importance. One is a logical and the other is a string. Since this is likely to lead to a bunch of frustration and GitHub issues, we can put in a check:

    +
    +
    # Simplified version
    +translate.rand_forest <- function (x, engine, ...){
    +  # Run the general method to get the real arguments in place
    +  x <- translate.default(x, engine, ...)
    +  
    +  # Check and see if they make sense for the engine and/or mode:
    +  if (x$engine == "ranger") {
    +    if (any(names(x$method$fit$args) == "importance")) 
    +      if (is.logical(x$method$fit$args$importance)) 
    +        rlang::abort("`importance` should be a character value. See ?ranger::ranger.")
    +  }
    +  x
    +}
    +
    +

    As another example, nnet::nnet() has an option for the final layer to be linear (called linout). If mode = "regression", that should probably be set to TRUE. You couldn’t do this with the args (described above) since you need the function translated first.

    +

    My model fit requires more than one function call. So….?

    +

    The best course of action is to write wrapper so that it can be one call. This was the case with xgboost and keras.

    +

    Why would I preprocess my data?

    +

    There might be non-trivial transformations that the model prediction code requires (such as converting to a sparse matrix representation, etc.)

    +

    This would not include making dummy variables and model.matrix stuff. The parsnip infrastructure already does that for you.

    +

    Why would I post-process my predictions?

    +

    What comes back from some R functions may be somewhat… arcane or problematic. As an example, for xgboost, if you fit a multi-class boosted tree, you might expect the class probabilities to come back as a matrix (narrator: they don’t). If you have four classes and make predictions on three samples, you get a vector of 12 probability values. You need to convert these to a rectangular data set.

    +

    Another example is the predict method for ranger, which encapsulates the actual predictions in a more complex object structure.

    +

    These are the types of problems that the post-processor will solve.

    +

    Are there other modes?

    +

    Not yet but there will be. For example, it might make sense to have a different mode when doing risk-based modeling via Cox regression models. That would enable different classes of objects and those might be needed since the types of models don’t make direct predictions of the outcome.

    +

    If you have a suggestion, please add a GitHub issue to discuss it.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  mda        * 0.5-3   2022-05-05 [1] CRAN (R 4.3.0)
    +#>  modeldata  * 1.1.0   2023-01-25 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/develop/parameters/index.html b/docs/learn/develop/parameters/index.html new file mode 100644 index 00000000..8b517bcb --- /dev/null +++ b/docs/learn/develop/parameters/index.html @@ -0,0 +1,905 @@ + + + + + + + + + + +tidymodels - How to create a tuning parameter function + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    How to create a tuning parameter function

    +
    +
    developer tools
    +
    +
    + +
    +
    +

    Build functions to use in tuning both quantitative and qualitative parameters.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: dials and scales.

    +

    Some models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using new_quant_param() or new_qual_param(), respectively. This article is a guide to creating new parameters.

    +
    +
    +

    Quantitative parameters

    +

    As an example, let’s consider the multivariate adaptive regression spline (MARS) model, which creates nonlinear features from predictors and adds them to a linear regression models. The earth package is an excellent implementation of this method.

    +

    MARS creates an initial set of features and then prunes them back to an appropriate size. This can be done automatically by earth::earth() or the number of final terms can be set by the user. The parsnip function mars() has a parameter called num_terms that defines this.

    +

    What if we want to create a parameter for the number of initial terms included in the model. There is no argument in parsnip::mars() for this but we will make one now. The argument name in earth::earth() is nk, which is not very descriptive. Our parameter will be called num_initial_terms.

    +

    We use the new_quant_param() function since this is a numeric parameter. The main two arguments to a numeric parameter function are range and trans.

    +

    The range specifies the possible values of the parameter. For our example, a minimal value might be one or two. What is the upper limit? The default in the earth package is

    +
    +
    min(200, max(20, 2 * ncol(x))) + 1
    +
    +

    where x is the predictor matrix. We often put in values that are either sensible defaults or are minimal enough to work for the majority of data sets. For now, let’s specify an upper limit of 10 but this will be discussed more in the next section.

    +

    The other argument is trans, which represents a transformation that should be applied to the parameter values when working with them. For example, many regularization methods have a penalty parameter that tends to range between zero and some upper bound (let’s say 1). The effect of going from a penalty value of 0.01 to 0.1 is much more impactful than going from 0.9 to 1.0. In such a case, it might make sense to work with this parameter in transformed units (such as the log, in this example). If new parameter values are generated at random, it helps if they are uniformly simulated in the transformed units and then converted back to the original units.

    +

    The trans parameter accepts a transformation object from the scales package. For example:

    +
    +
    library(scales)
    +lsf.str("package:scales", pattern = "_trans$")
    +#> asn_trans : function ()  
    +#> atanh_trans : function ()  
    +#> boxcox_trans : function (p, offset = 0)  
    +#> compose_trans : function (...)  
    +#> date_trans : function ()  
    +#> exp_trans : function (base = exp(1))  
    +#> hms_trans : function ()  
    +#> identity_trans : function ()  
    +#> log_trans : function (base = exp(1))  
    +#> log10_trans : function ()  
    +#> log1p_trans : function ()  
    +#> log2_trans : function ()  
    +#> logit_trans : function ()  
    +#> modulus_trans : function (p, offset = 1)  
    +#> probability_trans : function (distribution, ...)  
    +#> probit_trans : function ()  
    +#> pseudo_log_trans : function (sigma = 1, base = exp(1))  
    +#> reciprocal_trans : function ()  
    +#> reverse_trans : function ()  
    +#> sqrt_trans : function ()  
    +#> time_trans : function (tz = NULL)  
    +#> yj_trans : function (p)
    +scales::log10_trans()
    +#> Transformer: log-10 [1e-100, Inf]
    +
    +

    A value of NULL means that no transformation should be used.

    +

    A quantitative parameter function should have these two arguments and, in the function body, a call new_quant_param(). There are a few arguments to this function:

    +
    +
    library(tidymodels)
    +args(new_quant_param)
    +#> function (type = c("double", "integer"), range = NULL, inclusive = NULL, 
    +#>     default = deprecated(), trans = NULL, values = NULL, label = NULL, 
    +#>     finalize = NULL, ..., call = caller_env()) 
    +#> NULL
    +
    +
      +
    • Possible types are double precision and integers. The value of type should agree with the values of range in the function definition.

    • +
    • It’s OK for our tuning to include the minimum or maximum, so we’ll use c(TRUE, TRUE) for inclusive. If the value cannot include one end of the range, set one or both of these values to FALSE.

    • +
    • The label should be a named character string where the name is the parameter name and the value represents what will be printed automatically.

    • +
    • finalize is an argument that can set parts of the range. This is discussed more below.

    • +
    +

    Here’s an example of a basic quantitative parameter object:

    +
    +
    num_initial_terms <- function(range = c(1L, 10L), trans = NULL) {
    +  new_quant_param(
    +    type = "integer",
    +    range = range,
    +    inclusive = c(TRUE, TRUE),
    +    trans = trans,
    +    label = c(num_initial_terms = "# Initial MARS Terms"),
    +    finalize = NULL
    +  )
    +}
    +
    +num_initial_terms()
    +#> # Initial MARS Terms (quantitative)
    +#> Range: [1, 10]
    +
    +# Sample from the parameter:
    +set.seed(4832856)
    +num_initial_terms() %>% value_sample(5)
    +#> [1]  6  4  9 10  4
    +
    +
    +

    Finalizing parameters

    +

    It might be the case that the range of the parameter is unknown. For example, parameters that are related to the number of columns in a data set cannot be exactly specified in the absence of data. In those cases, a placeholder of unknown() can be added. This will force the user to “finalize” the parameter object for their particular data set. Let’s redefine our function with an unknown() value:

    +
    +
    num_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {
    +  new_quant_param(
    +    type = "integer",
    +    range = range,
    +    inclusive = c(TRUE, TRUE),
    +    trans = trans,
    +    label = c(num_initial_terms = "# Initial MARS Terms"),
    +    finalize = NULL
    +  )
    +}
    +num_initial_terms()
    +
    +# Can we sample? 
    +num_initial_terms() %>% value_sample(5)
    +
    +

    The finalize argument of num_initial_terms() can take a function that uses data to set the range. For example, the package already includes a few functions for finalization:

    +
    +
    lsf.str("package:dials", pattern = "^get_")
    +#> get_batch_sizes : function (object, x, frac = c(1/10, 1/3), ...)  
    +#> get_log_p : function (object, x, ...)  
    +#> get_n : function (object, x, log_vals = FALSE, ...)  
    +#> get_n_frac : function (object, x, log_vals = FALSE, frac = 1/3, ...)  
    +#> get_n_frac_range : function (object, x, log_vals = FALSE, frac = c(1/10, 5/10), ...)  
    +#> get_p : function (object, x, log_vals = FALSE, ...)  
    +#> get_rbf_range : function (object, x, seed = sample.int(10^5, 1), ...)
    +
    +

    These functions generally take a data frame of predictors (in an argument called x) and add the range of the parameter object. Using the formula in the earth package, we might use:

    +
    +
    get_initial_mars_terms <- function(object, x) {
    +  upper_bound <- min(200, max(20, 2 * ncol(x))) + 1
    +  upper_bound <- as.integer(upper_bound)
    +  bounds <- range_get(object)
    +  bounds$upper <- upper_bound
    +  range_set(object, bounds)
    +}
    +
    +# Use the mtcars are the finalize the upper bound: 
    +num_initial_terms() %>% get_initial_mars_terms(x = mtcars[, -1])
    +#> # Initial MARS Terms (quantitative)
    +#> Range: [1, 21]
    +
    +

    Once we add this function to the object, the general finalize() method can be used:

    +
    +
    num_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {
    +  new_quant_param(
    +    type = "integer",
    +    range = range,
    +    inclusive = c(TRUE, TRUE),
    +    trans = trans,
    +    label = c(num_initial_terms = "# Initial MARS Terms"),
    +    finalize = get_initial_mars_terms
    +  )
    +}
    +
    +num_initial_terms() %>% finalize(x = mtcars[, -1])
    +#> # Initial MARS Terms (quantitative)
    +#> Range: [1, 21]
    +
    +
    +
    +
    +

    Qualitative parameters

    +

    Now let’s look at an example of a qualitative parameter. If a model includes a data aggregation step, we want to allow users to tune how our parameters are aggregated. For example, in embedding methods, possible values might be min, max, mean, sum, or to not aggregate at all (“none”). Since these cannot be put on a numeric scale, they are possible values of a qualitative parameter. We’ll take “character” input (not “logical”), and we must specify the allowed values. By default we won’t aggregate.

    +
    +
    aggregation <- function(values = c("none", "min", "max", "mean", "sum")) {
    +  new_qual_param(
    +    type = "character",
    +    values = values,
    +    # By default, the first value is selected as default. We'll specify that to
    +    # make it clear.
    +    default = "none",
    +    label = c(aggregation = "Aggregation Method")
    +  )
    +}
    +
    +

    Within the dials package, the convention is to have the values contained in a separate vector whose name starts with values_. For example:

    +
    +
    values_aggregation <- c("none", "min", "max", "mean", "sum")
    +aggregation <- function(values = values_aggregation) {
    +  new_qual_param(
    +    type = "character",
    +    values = values,
    +    # By default, the first value is selected as default. We'll specify that to
    +    # make it clear.
    +    default = "none",
    +    label = c(aggregation = "Aggregation Method")
    +  )
    +}
    +
    +

    This step may not make sense if you are using the function in a script and not keeping it within a package.

    +

    We can use our aggregation parameters with dials functions.

    +
    +
    aggregation()
    +#> Warning: The `default` argument of `new_qual_param()` is deprecated as of
    +#> dials 1.1.0.
    +#> Aggregation Method  (qualitative)
    +#> 5 possible values include:
    +#> 'none', 'min', 'max', 'mean' and 'sum'
    +aggregation() %>% value_sample(3)
    +#> [1] "min"  "sum"  "mean"
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  scales     * 1.2.1   2022-08-20 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/develop/recipes/figs/carbon_dist-1.svg b/docs/learn/develop/recipes/figs/carbon_dist-1.svg similarity index 100% rename from content/learn/develop/recipes/figs/carbon_dist-1.svg rename to docs/learn/develop/recipes/figs/carbon_dist-1.svg diff --git a/docs/learn/develop/recipes/index.html b/docs/learn/develop/recipes/index.html new file mode 100644 index 00000000..24995020 --- /dev/null +++ b/docs/learn/develop/recipes/index.html @@ -0,0 +1,1159 @@ + + + + + + + + + + +tidymodels - Create your own recipe step function + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Create your own recipe step function

    +
    +
    developer tools
    +
    +
    + +
    +
    +

    Write a new recipe step for data preprocessing.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: modeldata and tidymodels.

    +

    There are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages can be found here. However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for centering or PCA to start.

    +

    For check operations (e.g. check_class()), the process is very similar. Notes on this are available at the end of this article.

    +

    The general process to follow is to:

    +
      +
    1. Define a step constructor function.

    2. +
    3. Create the minimal S3 methods for prep(), bake(), and print().

    4. +
    5. Optionally add some extra methods to work with other tidymodels packages, such as tunable() and tidy().

    6. +
    +

    As an example, we will create a step for converting data into percentiles.

    +
    +
    +

    A new step definition

    +

    Let’s create a step that replaces the value of a variable with its percentile from the training set. The example data we’ll use is from the modeldata package:

    +
    +
    library(modeldata)
    +data(biomass)
    +str(biomass)
    +#> 'data.frame':    536 obs. of  8 variables:
    +#>  $ sample  : chr  "Akhrot Shell" "Alabama Oak Wood Waste" "Alder" "Alfalfa" ...
    +#>  $ dataset : chr  "Training" "Training" "Training" "Training" ...
    +#>  $ carbon  : num  49.8 49.5 47.8 45.1 46.8 ...
    +#>  $ hydrogen: num  5.64 5.7 5.8 4.97 5.4 5.75 5.99 5.7 5.5 5.9 ...
    +#>  $ oxygen  : num  42.9 41.3 46.2 35.6 40.7 ...
    +#>  $ nitrogen: num  0.41 0.2 0.11 3.3 1 2.04 2.68 1.7 0.8 1.2 ...
    +#>  $ sulfur  : num  0 0 0.02 0.16 0.02 0.1 0.2 0.2 0 0.1 ...
    +#>  $ HHV     : num  20 19.2 18.3 18.2 18.4 ...
    +
    +biomass_tr <- biomass[biomass$dataset == "Training",]
    +biomass_te <- biomass[biomass$dataset == "Testing",]
    +
    +

    To illustrate the transformation with the carbon variable, note the training set distribution of this variable with a vertical line below for the first value of the test set.

    +
    +
    library(ggplot2)
    +theme_set(theme_bw())
    +ggplot(biomass_tr, aes(x = carbon)) + 
    +  geom_histogram(binwidth = 5, col = "blue", fill = "blue", alpha = .5) + 
    +  geom_vline(xintercept = biomass_te$carbon[1], lty = 2)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Based on the training set, 42.1% of the data are less than a value of 46.35. There are some applications where it might be advantageous to represent the predictor values as percentiles rather than their original values.

    +

    Our new step will do this computation for any numeric variables of interest. We will call this new recipe step step_percentile(). The code below is designed for illustration and not speed or best practices. We’ve left out a lot of error trapping that we would want in a real implementation.

    +
    +
    +

    Create the function

    +

    To start, there is a user-facing function. Let’s call that step_percentile(). This is just a simple wrapper around a constructor function, which defines the rules for any step object that defines a percentile transformation. We’ll call this constructor step_percentile_new().

    +

    The function step_percentile() takes the same arguments as your function and simply adds it to a new recipe. The ... signifies the variable selectors that can be used.

    +
    +
    step_percentile <- function(
    +  recipe, 
    +  ..., 
    +  role = NA, 
    +  trained = FALSE, 
    +  ref_dist = NULL,
    +  options = list(probs = (0:100)/100, names = TRUE),
    +  skip = FALSE,
    +  id = rand_id("percentile")
    +  ) {
    +
    +  ## The variable selectors are not immediately evaluated by using
    +  ##  the `quos()` function in `rlang`. `ellipse_check()` captures 
    +  ##  the values and also checks to make sure that they are not empty.  
    +  terms <- ellipse_check(...) 
    +
    +  add_step(
    +    recipe, 
    +    step_percentile_new(
    +      terms = terms, 
    +      trained = trained,
    +      role = role, 
    +      ref_dist = ref_dist,
    +      options = options,
    +      skip = skip,
    +      id = id
    +    )
    +  )
    +}
    +
    +

    You should always keep the first four arguments (recipe though trained) the same as listed above. Some notes:

    +
      +
    • the role argument is used when you either 1) create new variables and want their role to be pre-set or 2) replace the existing variables with new values. The latter is what we will be doing and using role = NA will leave the existing role intact.
    • +
    • trained is set by the package when the estimation step has been run. You should default your function definition’s argument to FALSE.
    • +
    • skip is a logical. Whenever a recipe is prepped, each step is trained and then baked. However, there are some steps that should not be applied when a call to bake() is used. For example, if a step is applied to the variables with roles of “outcomes”, these data would not be available for new samples.
    • +
    • id is a character string that can be used to identify steps in package code. rand_id() will create an ID that has the prefix and a random character sequence.
    • +
    +

    We can estimate the percentiles of new data points based on the percentiles from the training set with approx(). Our step_percentile contains a ref_dist object to store these percentiles (pre-computed from the training set in prep()) for later use in bake().

    +

    We will use stats::quantile() to compute the grid. However, we might also want to have control over the granularity of this grid, so the options argument will be used to define how that calculation is done. We could use the ellipses (aka ...) so that any options passed to step_percentile() that are not one of its arguments will then be passed to stats::quantile(). However, we recommend making a separate list object with the options and use these inside the function because ... is already used to define the variable selection.

    +

    It is also important to consider if there are any main arguments to the step. For example, for spline-related steps such as step_ns(), users typically want to adjust the argument for the degrees of freedom in the spline (e.g. splines::ns(x, df)). Rather than letting users add df to the options argument:

    +
      +
    • Allow the important arguments to be main arguments to the step function.

    • +
    • Follow the tidymodels conventions for naming arguments. Whenever possible, avoid jargon and keep common argument names.

    • +
    +

    There are benefits to following these principles (as shown below).

    +
    +
    +

    Initialize a new object

    +

    Now, the constructor function can be created.

    +

    The function cascade is:

    +
    step_percentile() calls recipes::add_step()
    +└──> recipes::add_step() calls step_percentile_new()
    +    └──> step_percentile_new() calls recipes::step()
    +

    step() is a general constructor for recipes that mainly makes sure that the resulting step object is a list with an appropriate S3 class structure. Using subclass = "percentile" will set the class of new objects to "step_percentile".

    +
    +
    step_percentile_new <- 
    +  function(terms, role, trained, ref_dist, options, skip, id) {
    +    step(
    +      subclass = "percentile", 
    +      terms = terms,
    +      role = role,
    +      trained = trained,
    +      ref_dist = ref_dist,
    +      options = options,
    +      skip = skip,
    +      id = id
    +    )
    +  }
    +
    +

    This constructor function should have no default argument values. Defaults should be set in the user-facing step object.

    +
    +
    +

    Create the prep method

    +

    You will need to create a new prep() method for your step’s class. To do this, three arguments that the method should have are:

    +
    function(x, training, info = NULL)
    +

    where

    +
      +
    • x will be the step_percentile object,
    • +
    • training will be a tibble that has the training set data, and
    • +
    • info will also be a tibble that has information on the current set of data available. This information is updated as each step is evaluated by its specific prep() method so it may not have the variables from the original data. The columns in this tibble are variable (the variable name), type (currently either “numeric” or “nominal”), role (defining the variable’s role), and source (either “original” or “derived” depending on where it originated).
    • +
    +

    You can define other arguments as well.

    +

    The first thing that you might want to do in the prep() function is to translate the specification listed in the terms argument to column names in the current data. There is a function called recipes_eval_select() that can be used to obtain this.

    +
    +
    +
    + +
    +
    +Warning +
    +
    +
    +

    The recipes_eval_select() function is not one you interact with as a typical recipes user, but it is helpful if you develop your own custom recipe steps.

    +
    +
    +
    +
    prep.step_percentile <- function(x, training, info = NULL, ...) {
    +  col_names <- recipes_eval_select(x$terms, training, info) 
    +  # TODO finish the rest of the function
    +}
    +
    +

    After this function call, it is a good idea to check that the selected columns have the appropriate type (e.g. numeric for this example). See recipes::check_type() to do this for basic types.

    +

    Once we have this, we can save the approximation grid. For the grid, we will use a helper function that enables us to run rlang::exec() to splice in any extra arguments contained in the options list to the call to quantile():

    +
    +
    get_train_pctl <- function(x, args = NULL) {
    +  res <- rlang::exec("quantile", x = x, !!!args)
    +  # Remove duplicate percentile values
    +  res[!duplicated(res)]
    +}
    +
    +# For example:
    +get_train_pctl(biomass_tr$carbon, list(probs = 0:1))
    +#>    0%  100% 
    +#> 14.61 97.18
    +get_train_pctl(biomass_tr$carbon)
    +#>     0%    25%    50%    75%   100% 
    +#> 14.610 44.715 47.100 49.725 97.180
    +
    +

    Now, the prep() method can be created:

    +
    +
    prep.step_percentile <- function(x, training, info = NULL, ...) {
    +  col_names <- recipes_eval_select(x$terms, training, info)
    +  ## You can add error trapping for non-numeric data here and so on. 
    +  
    +  ## We'll use the names later so make sure they are available
    +  if (x$options$names == FALSE) {
    +    rlang::abort("`names` should be set to TRUE")
    +  }
    +  
    +  if (!any(names(x$options) == "probs")) {
    +    x$options$probs <- (0:100)/100
    +  } else {
    +    x$options$probs <- sort(unique(x$options$probs))
    +  }
    +  
    +  # Compute percentile grid
    +  ref_dist <- purrr::map(training[, col_names],  get_train_pctl, args = x$options)
    +
    +  ## Use the constructor function to return the updated object. 
    +  ## Note that `trained` is now set to TRUE
    +  
    +  step_percentile_new(
    +    terms = x$terms, 
    +    trained = TRUE,
    +    role = x$role, 
    +    ref_dist = ref_dist,
    +    options = x$options,
    +    skip = x$skip,
    +    id = x$id
    +  )
    +}
    +
    +

    We suggest favoring rlang::abort() and rlang::warn() over stop() and warning(). The former can be used for better traceback results.

    +
    +
    +

    Create the bake method

    +

    Remember that the prep() function does not apply the step to the data; it only estimates any required values such as ref_dist. We will need to create a new method for our step_percentile() class. The minimum arguments for this are

    +
    function(object, new_data, ...)
    +

    where object is the updated step function that has been through the corresponding prep() code and new_data is a tibble of data to be processed.

    +

    Here is the code to convert the new data to percentiles. The input data (x below) comes in as a numeric vector and the output is a vector of approximate percentiles:

    +
    +
    pctl_by_approx <- function(x, ref) {
    +  # In case duplicates were removed, get the percentiles from
    +  # the names of the reference object
    +  grid <- as.numeric(gsub("%$", "", names(ref))) 
    +  approx(x = ref, y = grid, xout = x)$y/100
    +}
    +
    +

    These computations are done column-wise using purrr::map2_dfc() to modify the new data in-place:

    +
    +
    bake.step_percentile <- function(object, new_data, ...) {
    +  ## For illustration (and not speed), we will loop through the affected variables
    +  ## and do the computations
    +  vars <- names(object$ref_dist)
    +  
    +  new_data[, vars] <-
    +    purrr::map2_dfc(new_data[, vars], object$ref_dist, pctl_by_approx)
    +  
    +  ## Always convert to tibbles on the way out
    +  tibble::as_tibble(new_data)
    +}
    +
    +
    +
    +
    + +
    +
    +Note +
    +
    +
    +

    You need to import recipes::prep() and recipes::bake() to create your own step function in a package.

    +
    +
    +
    +
    +

    Run the example

    +

    Let’s use the example data to make sure that it works:

    +
    +
    rec_obj <- 
    +  recipe(HHV ~ ., data = biomass_tr) %>%
    +  step_percentile(ends_with("gen")) %>%
    +  prep(training = biomass_tr)
    +
    +biomass_te %>% select(ends_with("gen")) %>% slice(1:2)
    +bake(rec_obj, biomass_te %>% slice(1:2), ends_with("gen"))
    +
    +# Checking to get approximate result: 
    +mean(biomass_tr$hydrogen <= biomass_te$hydrogen[1])
    +mean(biomass_tr$oxygen   <= biomass_te$oxygen[1])
    +
    +

    The plot below shows how the original hydrogen percentiles line up with the estimated values:

    +
    +
    hydrogen_values <- 
    +  bake(rec_obj, biomass_te, hydrogen) %>% 
    +  bind_cols(biomass_te %>% select(original = hydrogen))
    +
    +ggplot(biomass_tr, aes(x = hydrogen)) + 
    +  # Plot the empirical distribution function of the 
    +  # hydrogen training set values as a black line
    +  stat_ecdf() + 
    +  # Overlay the estimated percentiles for the new data: 
    +  geom_point(data = hydrogen_values, 
    +             aes(x = original, y = hydrogen), 
    +             col = "red", alpha = .5, cex = 2) + 
    +  labs(x = "New Hydrogen Values", y = "Percentile Based on Training Set")
    +
    +

    These line up very nicely!

    +
    +
    +

    Custom check operations

    +

    The process here is exactly the same as steps; the internal functions have a similar naming convention:

    +
      +
    • add_check() instead of add_step()
    • +
    • check() instead of step(), and so on.
    • +
    +

    It is strongly recommended that:

    +
      +
    1. The operations start with check_ (i.e. check_range() and check_range_new())
    2. +
    3. The check uses rlang::abort(paste0(...)) when the conditions are not met
    4. +
    5. The original data are returned (unaltered) by the check when the conditions are satisfied.
    6. +
    +
    +
    +

    Other step methods

    +

    There are a few other S3 methods that can be created for your step function. They are not required unless you plan on using your step in the broader tidymodels package set.

    +
    +

    A print method

    +

    If you don’t add a print method for step_percentile, it will still print but it will be printed as a list of (potentially large) objects and look a bit ugly. The recipes package contains a helper function called printer() that should be useful in most cases. We are using it here for the custom print method for step_percentile. It requires the original terms specification and the column names this specification is evaluated to by prep(). For the former, our step object is structured so that the list object ref_dist has the names of the selected variables:

    +
    +
    print.step_percentile <-
    +  function(x, width = max(20, options()$width - 35), ...) {
    +    cat("Percentile transformation on ", sep = "")
    +    printer(
    +      # Names before prep (could be selectors)
    +      untr_obj = x$terms,
    +      # Names after prep:
    +      tr_obj = names(x$ref_dist),
    +      # Has it been prepped? 
    +      trained = x$trained,
    +      # An estimate of how many characters to print on a line: 
    +      width = width
    +    )
    +    invisible(x)
    +  }
    +
    +# Results before `prep()`:
    +recipe(HHV ~ ., data = biomass_tr) %>%
    +  step_percentile(ends_with("gen"))
    +
    +# Results after `prep()`: 
    +rec_obj
    +
    +
    +
    +

    Methods for declaring required packages

    +

    Some recipe steps use functions from other packages. When this is the case, the step_*() function should check to see if the package is installed. The function recipes::recipes_pkg_check() will do this. For example:

    +
    > recipes::recipes_pkg_check("some_package")
    +1 package is needed for this step and is not installed. (some_package). Start 
    +a clean R session then run: install.packages("some_package")
    +

    There is an S3 method that can be used to declare what packages should be loaded when using the step. For a hypothetical step that relies on the hypothetical package, this might look like:

    +
    +
    required_pkgs.step_hypothetical <- function(x, ...) {
    +  c("hypothetical", "myrecipespkg")
    +}
    +
    +

    In this example, myrecipespkg is the package where the step resides (if it is in a package).

    +

    The reason to declare what packages should be loaded is parallel processing. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a recipe step is not loaded in the worker processes, the prep() methods cannot be found and an error occurs.

    +

    If this S3 method is used for your step, you can rely on this for checking the installation:

    +
    +
    recipes::recipes_pkg_check(required_pkgs.step_hypothetical())
    +
    +

    If you’d like an example of this in a package, please take a look at the embed or themis package.

    +
    +
    +

    A tidy method

    +

    The broom::tidy() method is a means to return information about the step in a usable format. For our step, it would be helpful to know the reference values.

    +

    When the recipe has been prepped, those data are in the list ref_dist. A small function can be used to reformat that data into a tibble. It is customary to return the main values as value:

    +
    +
    format_pctl <- function(x) {
    +  tibble::tibble(
    +    value = unname(x),
    +    percentile = as.numeric(gsub("%$", "", names(x))) 
    +  )
    +}
    +
    +# For example: 
    +pctl_step_object <- rec_obj$steps[[1]]
    +pctl_step_object
    +format_pctl(pctl_step_object$ref_dist[["hydrogen"]])
    +
    +

    The tidy method could return these values for each selected column. Before prep(), missing values can be used as placeholders.

    +
    +
    tidy.step_percentile <- function(x, ...) {
    +  if (is_trained(x)) {
    +    res <- map_dfr(x$ref_dist, format_pctl, .id = "term")
    +  }
    +  else {
    +    term_names <- sel2char(x$terms)
    +    res <-
    +      tibble(
    +        terms = term_names,
    +        value = rlang::na_dbl,
    +        percentile = rlang::na_dbl
    +      )
    +  }
    +  # Always return the step id: 
    +  res$id <- x$id
    +  res
    +}
    +
    +tidy(rec_obj, number = 1)
    +
    +
    +
    +

    Methods for tuning parameters

    +

    The tune package can be used to find reasonable values of step arguments by model tuning. There are some S3 methods that are useful to define for your step. The percentile example doesn’t really have any tunable parameters, so we will demonstrate using step_poly(), which returns a polynomial expansion of selected columns. Its function definition has the arguments:

    +
    +
    args(step_poly)
    +
    +

    The argument degree is tunable.

    +

    To work with tune it is helpful (but not required) to use an S3 method called tunable() to define which arguments should be tuned and how values of those arguments should be generated.

    +

    tunable() takes the step object as its argument and returns a tibble with columns:

    +
      +
    • name: The name of the argument.

    • +
    • call_info: A list that describes how to call a function that returns a dials parameter object.

    • +
    • source: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just "recipe".

    • +
    • component: A character string with more information about the source. For recipes, this is just the name of the step (e.g. "step_poly").

    • +
    • component_id: A character string to indicate where a unique identifier is for the object. For recipes, this is just the id value of the step object.

    • +
    +

    The main piece of information that requires some detail is call_info. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object.

    +

    For example, for a nearest-neighbors neighbors parameter, this value is just:

    +
    +
    info <- list(pkg = "dials", fun = "neighbors")
    +
    +# FYI: how it is used under-the-hood: 
    +new_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)
    +rlang::eval_tidy(new_param_call)
    +
    +

    For step_poly(), a dials object is needed that returns an integer that is the number of new columns to create. It turns out that there are a few different types of tuning parameters related to degree:

    +
    > lsf.str("package:dials", pattern = "degree")
    +degree : function (range = c(1, 3), trans = NULL)  
    +degree_int : function (range = c(1L, 3L), trans = NULL)  
    +prod_degree : function (range = c(1L, 2L), trans = NULL)  
    +spline_degree : function (range = c(3L, 10L), trans = NULL)  
    +

    Looking at the range values, some return doubles and others return integers. For our problem, degree_int() would be a good choice.

    +

    For step_poly() the tunable() S3 method could be:

    +
    +
    tunable.step_poly <- function (x, ...) {
    +  tibble::tibble(
    +    name = c("degree"),
    +    call_info = list(list(pkg = "dials", fun = "degree_int")),
    +    source = "recipe",
    +    component = "step_poly",
    +    component_id = x$id
    +  )
    +}
    +
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  modeldata  * 1.1.0   2023-01-25 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/index.html b/docs/learn/index.html new file mode 100644 index 00000000..d9099d7e --- /dev/null +++ b/docs/learn/index.html @@ -0,0 +1,1465 @@ + + + + + + + + + + +tidymodels - Learn + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Learn

    +
    + +
    +
    + Learn how to go farther with tidymodels in your modeling and machine learning projects. +
    +
    + + +
    + + + + +
    + + +
    + +

    After you know what you need to get started with tidymodels, you can learn more and go further. Find articles here to help you solve specific problems using the tidymodels framework.

    + + + + +
    + +
    + + + + + \ No newline at end of file diff --git a/content/learn/models/coefficients/figs/glmnet-plot-1.svg b/docs/learn/models/coefficients/figs/glmnet-plot-1.svg similarity index 100% rename from content/learn/models/coefficients/figs/glmnet-plot-1.svg rename to docs/learn/models/coefficients/figs/glmnet-plot-1.svg diff --git a/content/learn/models/coefficients/figs/lm-plot-1.svg b/docs/learn/models/coefficients/figs/lm-plot-1.svg similarity index 100% rename from content/learn/models/coefficients/figs/lm-plot-1.svg rename to docs/learn/models/coefficients/figs/lm-plot-1.svg diff --git a/docs/learn/models/coefficients/index.html b/docs/learn/models/coefficients/index.html new file mode 100644 index 00000000..0979240e --- /dev/null +++ b/docs/learn/models/coefficients/index.html @@ -0,0 +1,1231 @@ + + + + + + + + + + +tidymodels - Working with model coefficients + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Working with model coefficients

    +
    +
    model fitting
    +
    tidying results
    +
    linear regression
    +
    model tuning
    +
    +
    + +
    +
    +

    Create models that use coefficients, extract them from fitted models, and visualize them.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    There are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients.

    +

    This article describes how to retrieve the estimated coefficients from models fit using tidymodels. To use code in this article, you will need to install the following packages: glmnet and tidymodels.

    +
    +
    +

    Linear regression

    +

    Let’s start with a linear regression model:

    +

    \[\hat{y} = \hat{\beta}_0 + \hat{\beta}_1x_1 + \ldots + \hat{\beta}_px_p\]

    +

    The \(\beta\) values are the coefficients and the \(x_j\) are model predictors, or features.

    +

    Let’s use the Chicago train data where we predict the ridership at the Clark and Lake station (column name: ridership) with the previous ridership data 14 days prior at three of the stations.

    +

    The data are in the modeldata package:

    +
    +
    library(tidymodels)
    +tidymodels_prefer()
    +theme_set(theme_bw())
    +
    +data(Chicago)
    +
    +Chicago <- Chicago %>% select(ridership, Clark_Lake, Austin, Harlem)
    +
    +
    +

    A single model

    +

    Let’s start by fitting only a single parsnip model object. We’ll create a model specification using linear_reg().

    +
    +
    +
    + +
    +
    +Note +
    +
    +
    +

    The default engine is "lm" so no call to set_engine() is required.

    +
    +
    +

    The fit() function estimates the model coefficients, given a formula and data set.

    +
    +
    lm_spec <- linear_reg()
    +lm_fit <- fit(lm_spec, ridership ~ ., data = Chicago)
    +lm_fit
    +#> parsnip model object
    +#> 
    +#> 
    +#> Call:
    +#> stats::lm(formula = ridership ~ ., data = data)
    +#> 
    +#> Coefficients:
    +#> (Intercept)   Clark_Lake       Austin       Harlem  
    +#>      1.6778       0.9035       0.6123      -0.5550
    +
    +

    The best way to retrieve the fitted parameters is to use the tidy() method. This function, in the broom package, returns the coefficients and their associated statistics in a data frame with standardized column names:

    +
    +
    tidy(lm_fit)
    +#> # A tibble: 4 × 5
    +#>   term        estimate std.error statistic   p.value
    +#>   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
    +#> 1 (Intercept)    1.68     0.156      10.7  1.11e- 26
    +#> 2 Clark_Lake     0.904    0.0280     32.3  5.14e-210
    +#> 3 Austin         0.612    0.320       1.91 5.59e-  2
    +#> 4 Harlem        -0.555    0.165      -3.36 7.85e-  4
    +
    +

    We’ll use this function in subsequent sections.

    +
    +
    +

    Resampled or tuned models

    +

    The tidymodels framework emphasizes the use of resampling methods to evaluate and characterize how well a model works. While time series resampling methods are appropriate for these data, we can also use the bootstrap to resample the data. This is a standard resampling approach when evaluating the uncertainty in statistical estimates.

    +

    We’ll use five bootstrap resamples of the data to simplify the plots and output (normally, we would use a larger number of resamples for more reliable estimates).

    +
    +
    set.seed(123)
    +bt <- bootstraps(Chicago, times = 5)
    +
    +

    With resampling, we fit the same model to the different simulated versions of the data set produced by resampling. The tidymodels function fit_resamples() is the recommended approach for doing so.

    +
    +
    +
    + +
    +
    +Warning +
    +
    +
    +

    The fit_resamples() function does not automatically save the model objects for each resample since these can be quite large and its main purpose is estimating performance. However, we can pass a function to fit_resamples() that can save the model object or any other aspect of the fit.

    +
    +
    +

    This function takes a single argument that represents the fitted workflow object (even if you don’t give fit_resamples() a workflow).

    +

    From this, we can extract the model fit. There are two “levels” of model objects that are available:

    +
      +
    • The parsnip model object, which wraps the underlying model object. We retrieve this using the extract_fit_parsnip() function.

    • +
    • The underlying model object (a.k.a. the engine fit) via the extract_fit_engine().

    • +
    +

    We’ll use the latter option and then tidy this model object as we did in the previous section. Let’s add this to the control function so that we can re-use it.

    +
    +
    get_lm_coefs <- function(x) {
    +  x %>% 
    +    # get the lm model object
    +    extract_fit_engine() %>% 
    +    # transform its format
    +    tidy()
    +}
    +tidy_ctrl <- control_grid(extract = get_lm_coefs)
    +
    +

    This argument is then passed to fit_resamples():

    +
    +
    lm_res <- 
    +  lm_spec %>% 
    +  fit_resamples(ridership ~ ., resamples = bt, control = tidy_ctrl)
    +lm_res
    +#> # Resampling results
    +#> # Bootstrap sampling 
    +#> # A tibble: 5 × 5
    +#>   splits              id         .metrics         .notes           .extracts
    +#>   <list>              <chr>      <list>           <list>           <list>   
    +#> 1 <split [5698/2076]> Bootstrap1 <tibble [2 × 4]> <tibble [0 × 3]> <tibble> 
    +#> 2 <split [5698/2098]> Bootstrap2 <tibble [2 × 4]> <tibble [0 × 3]> <tibble> 
    +#> 3 <split [5698/2064]> Bootstrap3 <tibble [2 × 4]> <tibble [0 × 3]> <tibble> 
    +#> 4 <split [5698/2082]> Bootstrap4 <tibble [2 × 4]> <tibble [0 × 3]> <tibble> 
    +#> 5 <split [5698/2088]> Bootstrap5 <tibble [2 × 4]> <tibble [0 × 3]> <tibble>
    +
    +

    Note that there is a .extracts column in our resampling results. This object contains the output of our get_lm_coefs() function for each resample. The structure of the elements of this column is a little complex. Let’s start by looking at the first element (which corresponds to the first resample):

    +
    +
    lm_res$.extracts[[1]]
    +#> # A tibble: 1 × 2
    +#>   .extracts        .config             
    +#>   <list>           <chr>               
    +#> 1 <tibble [4 × 5]> Preprocessor1_Model1
    +
    +

    There is another column in this element called .extracts that has the results of the tidy() function call:

    +
    +
    lm_res$.extracts[[1]]$.extracts[[1]]
    +#> # A tibble: 4 × 5
    +#>   term        estimate std.error statistic   p.value
    +#>   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
    +#> 1 (Intercept)    1.40     0.157       8.90 7.23e- 19
    +#> 2 Clark_Lake     0.842    0.0280     30.1  2.39e-184
    +#> 3 Austin         1.46     0.320       4.54 5.70e-  6
    +#> 4 Harlem        -0.637    0.163      -3.92 9.01e-  5
    +
    +

    These nested columns can be flattened via the purrr unnest() function:

    +
    +
    lm_res %>% 
    +  select(id, .extracts) %>% 
    +  unnest(.extracts) 
    +#> # A tibble: 5 × 3
    +#>   id         .extracts        .config             
    +#>   <chr>      <list>           <chr>               
    +#> 1 Bootstrap1 <tibble [4 × 5]> Preprocessor1_Model1
    +#> 2 Bootstrap2 <tibble [4 × 5]> Preprocessor1_Model1
    +#> 3 Bootstrap3 <tibble [4 × 5]> Preprocessor1_Model1
    +#> 4 Bootstrap4 <tibble [4 × 5]> Preprocessor1_Model1
    +#> 5 Bootstrap5 <tibble [4 × 5]> Preprocessor1_Model1
    +
    +

    We still have a column of nested tibbles, so we can run the same command again to get the data into a more useful format:

    +
    +
    lm_coefs <- 
    +  lm_res %>% 
    +  select(id, .extracts) %>% 
    +  unnest(.extracts) %>% 
    +  unnest(.extracts)
    +
    +lm_coefs %>% select(id, term, estimate, p.value)
    +#> # A tibble: 20 × 4
    +#>    id         term        estimate   p.value
    +#>    <chr>      <chr>          <dbl>     <dbl>
    +#>  1 Bootstrap1 (Intercept)    1.40  7.23e- 19
    +#>  2 Bootstrap1 Clark_Lake     0.842 2.39e-184
    +#>  3 Bootstrap1 Austin         1.46  5.70e-  6
    +#>  4 Bootstrap1 Harlem        -0.637 9.01e-  5
    +#>  5 Bootstrap2 (Intercept)    1.69  2.87e- 28
    +#>  6 Bootstrap2 Clark_Lake     0.911 1.06e-219
    +#>  7 Bootstrap2 Austin         0.595 5.93e-  2
    +#>  8 Bootstrap2 Harlem        -0.580 3.88e-  4
    +#>  9 Bootstrap3 (Intercept)    1.27  3.43e- 16
    +#> 10 Bootstrap3 Clark_Lake     0.859 5.03e-194
    +#> 11 Bootstrap3 Austin         1.09  6.77e-  4
    +#> 12 Bootstrap3 Harlem        -0.470 4.34e-  3
    +#> 13 Bootstrap4 (Intercept)    1.95  2.91e- 34
    +#> 14 Bootstrap4 Clark_Lake     0.974 1.47e-233
    +#> 15 Bootstrap4 Austin        -0.116 7.21e-  1
    +#> 16 Bootstrap4 Harlem        -0.620 2.11e-  4
    +#> 17 Bootstrap5 (Intercept)    1.87  1.98e- 33
    +#> 18 Bootstrap5 Clark_Lake     0.901 1.16e-210
    +#> 19 Bootstrap5 Austin         0.494 1.15e-  1
    +#> 20 Bootstrap5 Harlem        -0.512 1.73e-  3
    +
    +

    That’s better! Now, let’s plot the model coefficients for each resample:

    +
    +
    lm_coefs %>%
    +  filter(term != "(Intercept)") %>% 
    +  ggplot(aes(x = term, y = estimate, group = id, col = id)) +  
    +  geom_hline(yintercept = 0, lty = 3) + 
    +  geom_line(alpha = 0.3, lwd = 1.2) + 
    +  labs(y = "Coefficient", x = NULL) +
    +  theme(legend.position = "top")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    There seems to be a lot of uncertainty in the coefficient for the Austin station data, but less for the other two.

    +

    Looking at the code for unnesting the results, you may find the double-nesting structure excessive or cumbersome. However, the extraction functionality is flexible, and a simpler structure would prevent many use cases.

    +
    +
    +
    +

    More complex: a glmnet model

    +

    The glmnet model can fit the same linear regression model structure shown above. It uses regularization (a.k.a penalization) to estimate the model parameters. This has the benefit of shrinking the coefficients towards zero, important in situations where there are strong correlations between predictors or if some feature selection is required. Both of these cases are true for our Chicago train data set.

    +

    There are two types of penalization that this model uses:

    +
      +
    • Lasso (a.k.a. \(L_1\)) penalties can shrink the model terms so much that they are absolute zero (i.e. their effect is entirely removed from the model).

    • +
    • Weight decay (a.k.a ridge regression or \(L_2\)) uses a different type of penalty that is most useful for highly correlated predictors.

    • +
    +

    The glmnet model has two primary tuning parameters, the total amount of penalization and the mixture of the two penalty types. For example, this specification:

    +
    +
    glmnet_spec <- 
    +  linear_reg(penalty = 0.1, mixture = 0.95) %>% 
    +  set_engine("glmnet")
    +
    +

    has a penalty that is 95% lasso and 5% weight decay. The total amount of these two penalties is 0.1 (which is fairly high).

    +
    +
    +
    + +
    +
    +Note +
    +
    +
    +

    Models with regularization require that predictors are all on the same scale. The ridership at our three stations are very different, but glmnet automatically centers and scales the data. You can use recipes to center and scale your data yourself.

    +
    +
    +

    Let’s combine the model specification with a formula in a model workflow() and then fit the model to the data:

    +
    +
    glmnet_wflow <- 
    +  workflow() %>% 
    +  add_model(glmnet_spec) %>% 
    +  add_formula(ridership ~ .)
    +
    +glmnet_fit <- fit(glmnet_wflow, Chicago)
    +glmnet_fit
    +#> ══ Workflow [trained] ════════════════════════════════════════════════
    +#> Preprocessor: Formula
    +#> Model: linear_reg()
    +#> 
    +#> ── Preprocessor ──────────────────────────────────────────────────────
    +#> ridership ~ .
    +#> 
    +#> ── Model ─────────────────────────────────────────────────────────────
    +#> 
    +#> Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian",      alpha = ~0.95) 
    +#> 
    +#>    Df  %Dev Lambda
    +#> 1   0  0.00 6.1040
    +#> 2   1 12.75 5.5620
    +#> 3   1 23.45 5.0680
    +#> 4   1 32.43 4.6180
    +#> 5   1 39.95 4.2070
    +#> 6   1 46.25 3.8340
    +#> 7   1 51.53 3.4930
    +#> 8   1 55.94 3.1830
    +#> 9   1 59.62 2.9000
    +#> 10  1 62.70 2.6420
    +#> 11  2 65.28 2.4080
    +#> 12  2 67.44 2.1940
    +#> 13  2 69.23 1.9990
    +#> 14  2 70.72 1.8210
    +#> 15  2 71.96 1.6600
    +#> 16  2 73.00 1.5120
    +#> 17  2 73.86 1.3780
    +#> 18  2 74.57 1.2550
    +#> 19  2 75.17 1.1440
    +#> 20  2 75.66 1.0420
    +#> 21  2 76.07 0.9496
    +#> 22  2 76.42 0.8653
    +#> 23  2 76.70 0.7884
    +#> 24  2 76.94 0.7184
    +#> 25  2 77.13 0.6545
    +#> 26  2 77.30 0.5964
    +#> 27  2 77.43 0.5434
    +#> 28  2 77.55 0.4951
    +#> 29  2 77.64 0.4512
    +#> 30  2 77.72 0.4111
    +#> 31  2 77.78 0.3746
    +#> 32  2 77.84 0.3413
    +#> 33  2 77.88 0.3110
    +#> 34  2 77.92 0.2833
    +#> 35  2 77.95 0.2582
    +#> 36  2 77.98 0.2352
    +#> 37  2 78.00 0.2143
    +#> 38  2 78.01 0.1953
    +#> 39  2 78.03 0.1779
    +#> 40  2 78.04 0.1621
    +#> 41  2 78.05 0.1477
    +#> 42  2 78.06 0.1346
    +#> 43  2 78.07 0.1226
    +#> 44  2 78.07 0.1118
    +#> 45  2 78.08 0.1018
    +#> 46  2 78.08 0.0928
    +#> 
    +#> ...
    +#> and 9 more lines.
    +
    +

    In this output, the term lambda is used to represent the penalty.

    +

    Note that the output shows many values of the penalty despite our specification of penalty = 0.1. It turns out that this model fits a “path” of penalty values. Even though we are interested in a value of 0.1, we can get the model coefficients for many associated values of the penalty from the same model object.

    +

    Let’s look at two different approaches to obtaining the coefficients. Both will use the tidy() method. One will tidy a glmnet object and the other will tidy a tidymodels object.

    +
    +

    Using glmnet penalty values

    +

    This glmnet fit contains multiple penalty values which depend on the data set; changing the data (or the mixture amount) often produces a different set of values. For this data set, there are 55 penalties available. To get the set of penalties produced for this data set, we can extract the engine fit and tidy:

    +
    +
    glmnet_fit %>% 
    +  extract_fit_engine() %>% 
    +  tidy() %>% 
    +  rename(penalty = lambda) %>%   # <- for consistent naming
    +  filter(term != "(Intercept)")
    +#> # A tibble: 99 × 5
    +#>    term        step estimate penalty dev.ratio
    +#>    <chr>      <dbl>    <dbl>   <dbl>     <dbl>
    +#>  1 Clark_Lake     2   0.0753    5.56     0.127
    +#>  2 Clark_Lake     3   0.145     5.07     0.234
    +#>  3 Clark_Lake     4   0.208     4.62     0.324
    +#>  4 Clark_Lake     5   0.266     4.21     0.400
    +#>  5 Clark_Lake     6   0.319     3.83     0.463
    +#>  6 Clark_Lake     7   0.368     3.49     0.515
    +#>  7 Clark_Lake     8   0.413     3.18     0.559
    +#>  8 Clark_Lake     9   0.454     2.90     0.596
    +#>  9 Clark_Lake    10   0.491     2.64     0.627
    +#> 10 Clark_Lake    11   0.526     2.41     0.653
    +#> # ℹ 89 more rows
    +
    +

    This works well but, it turns out that our penalty value (0.1) is not in the list produced by the model! The underlying package has functions that use interpolation to produce coefficients for this specific value, but the tidy() method for glmnet objects does not use it.

    +
    +
    +

    Using specific penalty values

    +

    If we run the tidy() method on the workflow or parsnip object, a different function is used that returns the coefficients for the penalty value that we specified:

    +
    +
    tidy(glmnet_fit)
    +#> # A tibble: 4 × 3
    +#>   term        estimate penalty
    +#>   <chr>          <dbl>   <dbl>
    +#> 1 (Intercept)    1.69      0.1
    +#> 2 Clark_Lake     0.846     0.1
    +#> 3 Austin         0.271     0.1
    +#> 4 Harlem         0         0.1
    +
    +

    For any another (single) penalty, we can use an additional argument:

    +
    +
    tidy(glmnet_fit, penalty = 5.5620)  # A value from above
    +#> # A tibble: 4 × 3
    +#>   term        estimate penalty
    +#>   <chr>          <dbl>   <dbl>
    +#> 1 (Intercept)  12.6       5.56
    +#> 2 Clark_Lake    0.0753    5.56
    +#> 3 Austin        0         5.56
    +#> 4 Harlem        0         5.56
    +
    +

    The reason for having two tidy() methods is that, with tidymodels, the focus is on using a specific penalty value.

    +
    +
    +

    Tuning a glmnet model

    +

    If we know a priori acceptable values for penalty and mixture, we can use the fit_resamples() function as we did before with linear regression. Otherwise, we can tune those parameters with the tidymodels tune_*() functions.

    +

    Let’s tune our glmnet model over both parameters with this grid:

    +
    +
    pen_vals <- 10^seq(-3, 0, length.out = 10)
    +grid <- crossing(penalty = pen_vals, mixture = c(0.1, 1.0))
    +
    +

    Here is where more glmnet-related complexity comes in: we know that each resample and each value of mixture will probably produce a different set of penalty values contained in the model object. How can we look at the coefficients at the specific penalty values that we are using to tune?

    +

    The approach that we suggest is to use the special path_values option for glmnet. Details are described in the technical documentation about glmnet and tidymodels but in short, this parameter will assign the collection of penalty values used by each glmnet fit (regardless of the data or value of mixture).

    +

    We can pass these as an engine argument and then update our previous workflow object:

    +
    +
    glmnet_tune_spec <- 
    +  linear_reg(penalty = tune(), mixture = tune()) %>% 
    +  set_engine("glmnet", path_values = pen_vals)
    +
    +glmnet_wflow <- 
    +  glmnet_wflow %>% 
    +  update_model(glmnet_tune_spec)
    +
    +

    Now we will use an extraction function similar to when we used ordinary least squares. We add an additional argument to retain coefficients that are shrunk to zero by the lasso penalty:

    +
    +
    get_glmnet_coefs <- function(x) {
    +  x %>% 
    +    extract_fit_engine() %>% 
    +    tidy(return_zeros = TRUE) %>% 
    +    rename(penalty = lambda)
    +}
    +parsnip_ctrl <- control_grid(extract = get_glmnet_coefs)
    +
    +glmnet_res <- 
    +  glmnet_wflow %>% 
    +  tune_grid(
    +    resamples = bt,
    +    grid = grid,
    +    control = parsnip_ctrl
    +  )
    +glmnet_res
    +#> # Tuning results
    +#> # Bootstrap sampling 
    +#> # A tibble: 5 × 5
    +#>   splits              id         .metrics          .notes           .extracts
    +#>   <list>              <chr>      <list>            <list>           <list>   
    +#> 1 <split [5698/2076]> Bootstrap1 <tibble [40 × 6]> <tibble [0 × 3]> <tibble> 
    +#> 2 <split [5698/2098]> Bootstrap2 <tibble [40 × 6]> <tibble [0 × 3]> <tibble> 
    +#> 3 <split [5698/2064]> Bootstrap3 <tibble [40 × 6]> <tibble [0 × 3]> <tibble> 
    +#> 4 <split [5698/2082]> Bootstrap4 <tibble [40 × 6]> <tibble [0 × 3]> <tibble> 
    +#> 5 <split [5698/2088]> Bootstrap5 <tibble [40 × 6]> <tibble [0 × 3]> <tibble>
    +
    +

    As noted before, the elements of the main .extracts column have an embedded list column with the results of get_glmnet_coefs():

    +
    +
    glmnet_res$.extracts[[1]] %>% head()
    +#> # A tibble: 6 × 4
    +#>   penalty mixture .extracts         .config              
    +#>     <dbl>   <dbl> <list>            <chr>                
    +#> 1       1     0.1 <tibble [40 × 5]> Preprocessor1_Model01
    +#> 2       1     0.1 <tibble [40 × 5]> Preprocessor1_Model02
    +#> 3       1     0.1 <tibble [40 × 5]> Preprocessor1_Model03
    +#> 4       1     0.1 <tibble [40 × 5]> Preprocessor1_Model04
    +#> 5       1     0.1 <tibble [40 × 5]> Preprocessor1_Model05
    +#> 6       1     0.1 <tibble [40 × 5]> Preprocessor1_Model06
    +
    +glmnet_res$.extracts[[1]]$.extracts[[1]] %>% head()
    +#> # A tibble: 6 × 5
    +#>   term         step estimate penalty dev.ratio
    +#>   <chr>       <dbl>    <dbl>   <dbl>     <dbl>
    +#> 1 (Intercept)     1    0.568  1          0.769
    +#> 2 (Intercept)     2    0.432  0.464      0.775
    +#> 3 (Intercept)     3    0.607  0.215      0.779
    +#> 4 (Intercept)     4    0.846  0.1        0.781
    +#> 5 (Intercept)     5    1.06   0.0464     0.782
    +#> 6 (Intercept)     6    1.22   0.0215     0.783
    +
    +

    As before, we’ll have to use a double unnest(). Since the penalty value is in both the top-level and lower-level .extracts, we’ll use select() to get rid of the first version (but keep mixture):

    +
    +
    glmnet_res %>% 
    +  select(id, .extracts) %>% 
    +  unnest(.extracts) %>% 
    +  select(id, mixture, .extracts) %>%  # <- removes the first penalty column
    +  unnest(.extracts)
    +
    +

    But wait! We know that each glmnet fit contains all of the coefficients. This means, for a specific resample and value of mixture, the results are the same:

    +
    +
    all.equal(
    +  # First bootstrap, first `mixture`, first `penalty`
    +  glmnet_res$.extracts[[1]]$.extracts[[1]],
    +  # First bootstrap, first `mixture`, second `penalty`
    +  glmnet_res$.extracts[[1]]$.extracts[[2]]
    +)
    +#> [1] TRUE
    +
    +

    For this reason, we’ll add a slice(1) when grouping by id and mixture. This will get rid of the replicated results.

    +
    +
    glmnet_coefs <- 
    +  glmnet_res %>% 
    +  select(id, .extracts) %>% 
    +  unnest(.extracts) %>% 
    +  select(id, mixture, .extracts) %>% 
    +  group_by(id, mixture) %>%          # ┐
    +  slice(1) %>%                       # │ Remove the redundant results
    +  ungroup() %>%                      # ┘
    +  unnest(.extracts)
    +
    +glmnet_coefs %>% 
    +  select(id, penalty, mixture, term, estimate) %>% 
    +  filter(term != "(Intercept)")
    +#> # A tibble: 300 × 5
    +#>    id         penalty mixture term       estimate
    +#>    <chr>        <dbl>   <dbl> <chr>         <dbl>
    +#>  1 Bootstrap1 1           0.1 Clark_Lake    0.391
    +#>  2 Bootstrap1 0.464       0.1 Clark_Lake    0.485
    +#>  3 Bootstrap1 0.215       0.1 Clark_Lake    0.590
    +#>  4 Bootstrap1 0.1         0.1 Clark_Lake    0.680
    +#>  5 Bootstrap1 0.0464      0.1 Clark_Lake    0.746
    +#>  6 Bootstrap1 0.0215      0.1 Clark_Lake    0.793
    +#>  7 Bootstrap1 0.01        0.1 Clark_Lake    0.817
    +#>  8 Bootstrap1 0.00464     0.1 Clark_Lake    0.828
    +#>  9 Bootstrap1 0.00215     0.1 Clark_Lake    0.834
    +#> 10 Bootstrap1 0.001       0.1 Clark_Lake    0.837
    +#> # ℹ 290 more rows
    +
    +

    Now we have the coefficients. Let’s look at how they behave as more regularization is used:

    +
    +
    glmnet_coefs %>% 
    +  filter(term != "(Intercept)") %>% 
    +  mutate(mixture = format(mixture)) %>% 
    +  ggplot(aes(x = penalty, y = estimate, col = mixture, groups = id)) + 
    +  geom_hline(yintercept = 0, lty = 3) +
    +  geom_line(alpha = 0.5, lwd = 1.2) + 
    +  facet_wrap(~ term) + 
    +  scale_x_log10() +
    +  scale_color_brewer(palette = "Accent") +
    +  labs(y = "coefficient") +
    +  theme(legend.position = "top")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Notice a couple of things:

    +
      +
    • With a pure lasso model (i.e., mixture = 1), the Austin station predictor is selected out in each resample. With a mixture of both penalties, its influence increases. Also, as the penalty increases, the uncertainty in this coefficient decreases.

    • +
    • The Harlem predictor is either quickly selected out of the model or goes from negative to positive.

    • +
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  glmnet     * 4.1-7   2023-03-23 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/models/parsnip-nnet/figs/biv-boundary-1.svg b/docs/learn/models/parsnip-nnet/figs/biv-boundary-1.svg new file mode 100644 index 00000000..34fda99a --- /dev/null +++ b/docs/learn/models/parsnip-nnet/figs/biv-boundary-1.svg @@ -0,0 +1,587 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-2.5 +0.0 +2.5 +5.0 + + + + + + + + +-2 +0 +2 +4 +A +B + +class + + + + +Class1 +Class2 + + diff --git a/docs/learn/models/parsnip-nnet/figs/biv-plot-1.svg b/docs/learn/models/parsnip-nnet/figs/biv-plot-1.svg new file mode 100644 index 00000000..24c122c9 --- /dev/null +++ b/docs/learn/models/parsnip-nnet/figs/biv-plot-1.svg @@ -0,0 +1,2089 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-5.0 +-2.5 +0.0 +2.5 +5.0 + + + + + + + + + +-2 +0 +2 +4 +A +B + +class + + + + +Class1 +Class2 + + diff --git a/docs/learn/models/parsnip-nnet/index.html b/docs/learn/models/parsnip-nnet/index.html new file mode 100644 index 00000000..fb56df31 --- /dev/null +++ b/docs/learn/models/parsnip-nnet/index.html @@ -0,0 +1,845 @@ + + + + + + + + + + +tidymodels - Classification models using a neural network + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Classification models using a neural network

    +
    +
    model fitting
    +
    torch
    +
    neural networks
    +
    +
    + +
    +
    +

    Train a classification model and evaluate its performance.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: AppliedPredictiveModeling, brulee, and tidymodels. You will also need the python torch library installed (see ?torch::install_torch()).

    +

    We can create classification models with the tidymodels package parsnip to predict categorical quantities or class labels. Here, let’s fit a single classification model using a neural network and evaluate using a validation set. While the tune package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage.

    +
    +
    +

    Fitting a neural network

    +

    Let’s fit a model to a small, two predictor classification data set. The data are in the modeldata package (part of tidymodels) and have been split into training, validation, and test data sets. In this analysis, the test set is left untouched; this article tries to emulate a good data usage methodology where the test set would only be evaluated once at the end after a variety of models have been considered.

    +
    +
    library(AppliedPredictiveModeling)
    +
    +set.seed(321)
    +cls_train <- quadBoundaryFunc(2000) %>% select(A = X1, B = X2, class)
    +cls_val   <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)
    +cls_test  <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)
    +
    +

    A plot of the data shows two right-skewed predictors:

    +
    +
    ggplot(cls_train, aes(x = A, y = B, col = class)) + 
    +  geom_point(alpha = 1 / 4, cex = 3) + 
    +  coord_fixed()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Let’s use a single hidden layer neural network to predict the outcome. To do this, we transform the predictor columns to be more symmetric (via the step_BoxCox() function) and on a common scale (using step_normalize()). We can use recipes to do so:

    +
    +
    biv_rec <- 
    +  recipe(class ~ ., data = cls_train) %>%
    +  step_normalize(all_predictors())
    +
    +

    This recipe is not directly executed; the steps will be estimated when the model is fit.

    +

    We can use the brulee package to fit a model with 5 hidden units and a 10% dropout rate, to regularize the model:

    +
    +
    nnet_spec <- 
    +  mlp(epochs = 1000, hidden_units = 10, penalty = 0.01, learn_rate = 0.1) %>% 
    +  set_engine("brulee", validation = 0) %>% 
    +  set_mode("classification")
    +
    +nnet_wflow <- 
    +  biv_rec %>% 
    +  workflow(nnet_spec)
    +
    +set.seed(987)
    +nnet_fit <- fit(nnet_wflow, cls_train)
    +nnet_fit %>% extract_fit_engine()
    +#> Multilayer perceptron
    +#> 
    +#> relu activation
    +#> 10 hidden units,  52 model parameters
    +#> 2,000 samples, 2 features, 2 classes 
    +#> class weights Class1=1, Class2=1 
    +#> weight decay: 0.01 
    +#> dropout proportion: 0 
    +#> batch size: 2000 
    +#> learn rate: 0.1 
    +#> training set loss after 1000 epochs: 0.375
    +
    +
    +
    +

    Model performance

    +

    In parsnip, the predict() function can be used to characterize performance on the validation set. Since parsnip always produces tibble outputs, these can just be column bound to the original data:

    +
    +
    val_results <- 
    +  cls_val %>%
    +  bind_cols(
    +    predict(nnet_fit, new_data = cls_val),
    +    predict(nnet_fit, new_data = cls_val, type = "prob")
    +  )
    +val_results %>% slice(1:5)
    +#>           A           B  class .pred_class .pred_Class1 .pred_Class2
    +#> 1 0.7632082 -0.04012164 Class2      Class2   0.06255509   0.93744493
    +#> 2 0.9823745 -0.16911637 Class2      Class2   0.05721300   0.94278705
    +#> 3 1.0558147  0.52817699 Class2      Class2   0.10368267   0.89631736
    +#> 4 1.2424507  1.10902951 Class2      Class2   0.34966809   0.65033191
    +#> 5 1.5889815  2.71047720 Class1      Class1   0.97951710   0.02048291
    +
    +val_results %>% roc_auc(truth = class, .pred_Class1)
    +#> # A tibble: 1 × 3
    +#>   .metric .estimator .estimate
    +#>   <chr>   <chr>          <dbl>
    +#> 1 roc_auc binary         0.957
    +
    +val_results %>% accuracy(truth = class, .pred_class)
    +#> # A tibble: 1 × 3
    +#>   .metric  .estimator .estimate
    +#>   <chr>    <chr>          <dbl>
    +#> 1 accuracy binary          0.91
    +
    +val_results %>% conf_mat(truth = class, .pred_class)
    +#>           Truth
    +#> Prediction Class1 Class2
    +#>     Class1    175     18
    +#>     Class2     27    280
    +
    +

    Let’s also create a grid to get a visual sense of the class boundary for the test set.

    +
    +
    a_rng <- range(cls_train$A)
    +b_rng <- range(cls_train$B)
    +x_grid <-
    +  expand.grid(A = seq(a_rng[1], a_rng[2], length.out = 100),
    +              B = seq(b_rng[1], b_rng[2], length.out = 100))
    +
    +
    +# Make predictions using the transformed predictors but 
    +# attach them to the predictors in the original units: 
    +x_grid <- 
    +  x_grid %>% 
    +  bind_cols(predict(nnet_fit, x_grid, type = "prob"))
    +
    +ggplot(x_grid, aes(x = A, y = B)) + 
    +  geom_point(data = cls_test, aes(col = class), alpha = 1 / 2, cex = 3) +
    +  geom_contour(aes(z = .pred_Class1), breaks = .5, col = "black", linewidth = 1) + 
    +  coord_fixed()
    +
    +
    +
    +

    +
    +
    +
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package                   * version date (UTC) lib source
    +#>  AppliedPredictiveModeling * 1.1-7   2018-05-22 [1] CRAN (R 4.3.0)
    +#>  broom                     * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  brulee                      0.2.0   2022-09-19 [1] CRAN (R 4.3.0)
    +#>  dials                     * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr                     * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2                   * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer                     * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip                   * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr                     * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes                   * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang                       1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample                   * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble                    * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels                * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune                      * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows                 * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick                 * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/models/parsnip-ranger-glmnet/figs/glmn-pred-1.svg b/docs/learn/models/parsnip-ranger-glmnet/figs/glmn-pred-1.svg similarity index 100% rename from content/learn/models/parsnip-ranger-glmnet/figs/glmn-pred-1.svg rename to docs/learn/models/parsnip-ranger-glmnet/figs/glmn-pred-1.svg diff --git a/docs/learn/models/parsnip-ranger-glmnet/index.html b/docs/learn/models/parsnip-ranger-glmnet/index.html new file mode 100644 index 00000000..62ccec31 --- /dev/null +++ b/docs/learn/models/parsnip-ranger-glmnet/index.html @@ -0,0 +1,1056 @@ + + + + + + + + + + +tidymodels - Regression models two ways + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Regression models two ways

    +
    +
    model fitting
    +
    random forests
    +
    linear regression
    +
    +
    + +
    +
    +

    Create and train different kinds of regression models with different computational engines.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: glmnet, randomForest, ranger, and tidymodels.

    +

    We can create regression models with the tidymodels package parsnip to predict continuous or numeric quantities. Here, let’s first fit a random forest model, which does not require all numeric input (see discussion here) and discuss how to use fit() and fit_xy(), as well as data descriptors.

    +

    Second, let’s fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip.

    +
    +
    +

    The Ames housing data

    +

    We’ll use the Ames housing data set to demonstrate how to create regression models using parsnip. First, set up the data set and create a simple training/test set split:

    +
    +
    library(tidymodels)
    +
    +data(ames)
    +
    +set.seed(4595)
    +data_split <- initial_split(ames, strata = "Sale_Price", prop = 0.75)
    +
    +ames_train <- training(data_split)
    +ames_test  <- testing(data_split)
    +
    +

    The use of the test set here is only for illustration; normally in a data analysis these data would be saved to the very end after many models have been evaluated.

    +
    +
    +

    Random forest

    +

    We’ll start by fitting a random forest model to a small set of parameters. Let’s create a model with the predictors Longitude, Latitude, Lot_Area, Neighborhood, and Year_Sold. A simple random forest model can be specified via:

    +
    +
    rf_defaults <- rand_forest(mode = "regression")
    +rf_defaults
    +#> Random Forest Model Specification (regression)
    +#> 
    +#> Computational engine: ranger
    +
    +

    The model will be fit with the ranger package by default. Since we didn’t add any extra arguments to fit, many of the arguments will be set to their defaults from the function ranger::ranger(). The help pages for the model function describe the default parameters and you can also use the translate() function to check out such details.

    +

    The parsnip package provides two different interfaces to fit a model:

    +
      +
    • the formula interface (fit()), and
    • +
    • the non-formula interface (fit_xy()).
    • +
    +

    Let’s start with the non-formula interface:

    +
    +
    preds <- c("Longitude", "Latitude", "Lot_Area", "Neighborhood", "Year_Sold")
    +
    +rf_xy_fit <- 
    +  rf_defaults %>%
    +  set_engine("ranger") %>%
    +  fit_xy(
    +    x = ames_train[, preds],
    +    y = log10(ames_train$Sale_Price)
    +  )
    +
    +rf_xy_fit
    +#> parsnip model object
    +#> 
    +#> Ranger result
    +#> 
    +#> Call:
    +#>  ranger::ranger(x = maybe_data_frame(x), y = y, num.threads = 1,      verbose = FALSE, seed = sample.int(10^5, 1)) 
    +#> 
    +#> Type:                             Regression 
    +#> Number of trees:                  500 
    +#> Sample size:                      2197 
    +#> Number of independent variables:  5 
    +#> Mtry:                             2 
    +#> Target node size:                 5 
    +#> Variable importance mode:         none 
    +#> Splitrule:                        variance 
    +#> OOB prediction error (MSE):       0.008500188 
    +#> R squared (OOB):                  0.7239116
    +
    +

    The non-formula interface doesn’t do anything to the predictors before passing them to the underlying model function. This particular model does not require indicator variables (sometimes called “dummy variables”) to be created prior to fitting the model. Note that the output shows “Number of independent variables: 5”.

    +

    For regression models, we can use the basic predict() method, which returns a tibble with a column named .pred:

    +
    +
    test_results <- 
    +  ames_test %>%
    +  select(Sale_Price) %>%
    +  mutate(Sale_Price = log10(Sale_Price)) %>%
    +  bind_cols(
    +    predict(rf_xy_fit, new_data = ames_test[, preds])
    +  )
    +test_results %>% slice(1:5)
    +#> # A tibble: 5 × 2
    +#>   Sale_Price .pred
    +#>        <dbl> <dbl>
    +#> 1       5.39  5.25
    +#> 2       5.28  5.29
    +#> 3       5.23  5.26
    +#> 4       5.21  5.30
    +#> 5       5.60  5.51
    +
    +# summarize performance
    +test_results %>% metrics(truth = Sale_Price, estimate = .pred) 
    +#> # A tibble: 3 × 3
    +#>   .metric .estimator .estimate
    +#>   <chr>   <chr>          <dbl>
    +#> 1 rmse    standard      0.0945
    +#> 2 rsq     standard      0.733 
    +#> 3 mae     standard      0.0629
    +
    +

    Note that:

    +
      +
    • If the model required indicator variables, we would have to create them manually prior to using fit() (perhaps using the recipes package).
    • +
    • We had to manually log the outcome prior to modeling.
    • +
    +

    Now, for illustration, let’s use the formula method using some new parameter values:

    +
    +
    rand_forest(mode = "regression", mtry = 3, trees = 1000) %>%
    +  set_engine("ranger") %>%
    +  fit(
    +    log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,
    +    data = ames_train
    +  )
    +#> parsnip model object
    +#> 
    +#> Ranger result
    +#> 
    +#> Call:
    +#>  ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~3,      x), num.trees = ~1000, num.threads = 1, verbose = FALSE,      seed = sample.int(10^5, 1)) 
    +#> 
    +#> Type:                             Regression 
    +#> Number of trees:                  1000 
    +#> Sample size:                      2197 
    +#> Number of independent variables:  5 
    +#> Mtry:                             3 
    +#> Target node size:                 5 
    +#> Variable importance mode:         none 
    +#> Splitrule:                        variance 
    +#> OOB prediction error (MSE):       0.008402569 
    +#> R squared (OOB):                  0.7270823
    +
    +

    Suppose that we would like to use the randomForest package instead of ranger. To do so, the only part of the syntax that needs to change is the set_engine() argument:

    +
    +
    rand_forest(mode = "regression", mtry = 3, trees = 1000) %>%
    +  set_engine("randomForest") %>%
    +  fit(
    +    log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,
    +    data = ames_train
    +  )
    +#> parsnip model object
    +#> 
    +#> 
    +#> Call:
    +#>  randomForest(x = maybe_data_frame(x), y = y, ntree = ~1000, mtry = min_cols(~3,      x)) 
    +#>                Type of random forest: regression
    +#>                      Number of trees: 1000
    +#> No. of variables tried at each split: 3
    +#> 
    +#>           Mean of squared residuals: 0.008472074
    +#>                     % Var explained: 72.47
    +
    +

    Look at the formula code that was printed out; one function uses the argument name ntree and the other uses num.trees. The parsnip models don’t require you to know the specific names of the main arguments.

    +

    Now suppose that we want to modify the value of mtry based on the number of predictors in the data. Usually, a good default value is floor(sqrt(num_predictors)) but a pure bagging model requires an mtry value equal to the total number of parameters. There may be cases where you may not know how many predictors are going to be present when the model will be fit (perhaps due to the generation of indicator variables or a variable filter) so this might be difficult to know exactly ahead of time when you write your code.

    +

    When the model it being fit by parsnip, data descriptors are made available. These attempt to let you know what you will have available when the model is fit. When a model object is created (say using rand_forest()), the values of the arguments that you give it are immediately evaluated unless you delay them. To delay the evaluation of any argument, you can used rlang::expr() to make an expression.

    +

    Two relevant data descriptors for our example model are:

    +
      +
    • .preds(): the number of predictor variables in the data set that are associated with the predictors prior to dummy variable creation.
    • +
    • .cols(): the number of predictor columns after dummy variables (or other encodings) are created.
    • +
    +

    Since ranger won’t create indicator values, .preds() would be appropriate for mtry for a bagging model.

    +

    For example, let’s use an expression with the .preds() descriptor to fit a bagging model:

    +
    +
    rand_forest(mode = "regression", mtry = .preds(), trees = 1000) %>%
    +  set_engine("ranger") %>%
    +  fit(
    +    log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,
    +    data = ames_train
    +  )
    +#> parsnip model object
    +#> 
    +#> Ranger result
    +#> 
    +#> Call:
    +#>  ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~.preds(),      x), num.trees = ~1000, num.threads = 1, verbose = FALSE,      seed = sample.int(10^5, 1)) 
    +#> 
    +#> Type:                             Regression 
    +#> Number of trees:                  1000 
    +#> Sample size:                      2197 
    +#> Number of independent variables:  5 
    +#> Mtry:                             5 
    +#> Target node size:                 5 
    +#> Variable importance mode:         none 
    +#> Splitrule:                        variance 
    +#> OOB prediction error (MSE):       0.00867085 
    +#> R squared (OOB):                  0.7183685
    +
    +
    +
    +

    Regularized regression

    +

    A linear model might work for this data set as well. We can use the linear_reg() parsnip model. There are two engines that can perform regularization/penalization, the glmnet and sparklyr packages. Let’s use the former here. The glmnet package only implements a non-formula method, but parsnip will allow either one to be used.

    +

    When regularization is used, the predictors should first be centered and scaled before being passed to the model. The formula method won’t do that automatically so we will need to do this ourselves. We’ll use the recipes package for these steps.

    +
    +
    norm_recipe <- 
    +  recipe(
    +    Sale_Price ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, 
    +    data = ames_train
    +  ) %>%
    +  step_other(Neighborhood) %>% 
    +  step_dummy(all_nominal()) %>%
    +  step_center(all_predictors()) %>%
    +  step_scale(all_predictors()) %>%
    +  step_log(Sale_Price, base = 10) %>% 
    +  # estimate the means and standard deviations
    +  prep(training = ames_train, retain = TRUE)
    +
    +# Now let's fit the model using the processed version of the data
    +
    +glmn_fit <- 
    +  linear_reg(penalty = 0.001, mixture = 0.5) %>% 
    +  set_engine("glmnet") %>%
    +  fit(Sale_Price ~ ., data = bake(norm_recipe, new_data = NULL))
    +glmn_fit
    +#> parsnip model object
    +#> 
    +#> 
    +#> Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "gaussian",      alpha = ~0.5) 
    +#> 
    +#>    Df  %Dev   Lambda
    +#> 1   0  0.00 0.138300
    +#> 2   1  1.96 0.126000
    +#> 3   1  3.72 0.114800
    +#> 4   1  5.28 0.104600
    +#> 5   2  7.07 0.095320
    +#> 6   3  9.64 0.086850
    +#> 7   4 12.58 0.079140
    +#> 8   5 15.45 0.072110
    +#> 9   5 17.93 0.065700
    +#> 10  7 20.81 0.059860
    +#> 11  7 23.51 0.054550
    +#> 12  7 25.82 0.049700
    +#> 13  8 28.20 0.045290
    +#> 14  8 30.31 0.041260
    +#> 15  8 32.12 0.037600
    +#> 16  8 33.66 0.034260
    +#> 17  8 34.97 0.031210
    +#> 18  8 36.08 0.028440
    +#> 19  8 37.02 0.025910
    +#> 20  9 37.90 0.023610
    +#> 21  9 38.65 0.021510
    +#> 22  9 39.29 0.019600
    +#> 23  9 39.83 0.017860
    +#> 24  9 40.28 0.016270
    +#> 25 10 40.68 0.014830
    +#> 26 11 41.06 0.013510
    +#> 27 11 41.38 0.012310
    +#> 28 11 41.65 0.011220
    +#> 29 11 41.88 0.010220
    +#> 30 12 42.09 0.009313
    +#> 31 12 42.27 0.008486
    +#> 32 12 42.43 0.007732
    +#> 33 12 42.56 0.007045
    +#> 34 12 42.66 0.006419
    +#> 35 12 42.75 0.005849
    +#> 36 12 42.83 0.005329
    +#> 37 12 42.90 0.004856
    +#> 38 12 42.95 0.004424
    +#> 39 12 42.99 0.004031
    +#> 40 12 43.03 0.003673
    +#> 41 12 43.06 0.003347
    +#> 42 12 43.09 0.003050
    +#> 43 12 43.11 0.002779
    +#> 44 12 43.13 0.002532
    +#> 45 12 43.15 0.002307
    +#> 46 12 43.16 0.002102
    +#> 47 12 43.17 0.001915
    +#> 48 12 43.18 0.001745
    +#> 49 12 43.19 0.001590
    +#> 50 12 43.19 0.001449
    +#> 51 12 43.20 0.001320
    +#> 52 12 43.20 0.001203
    +#> 53 12 43.21 0.001096
    +#> 54 12 43.21 0.000999
    +#> 55 12 43.21 0.000910
    +#> 56 12 43.21 0.000829
    +#> 57 12 43.22 0.000755
    +#> 58 12 43.22 0.000688
    +#> 59 12 43.22 0.000627
    +#> 60 12 43.22 0.000571
    +#> 61 12 43.22 0.000521
    +#> 62 12 43.22 0.000474
    +#> 63 12 43.22 0.000432
    +#> 64 12 43.22 0.000394
    +#> 65 12 43.22 0.000359
    +
    +

    If penalty were not specified, all of the lambda values would be computed.

    +

    To get the predictions for this specific value of lambda (aka penalty):

    +
    +
    # First, get the processed version of the test set predictors:
    +test_normalized <- bake(norm_recipe, new_data = ames_test, all_predictors())
    +
    +test_results <- 
    +  test_results %>%
    +  rename(`random forest` = .pred) %>%
    +  bind_cols(
    +    predict(glmn_fit, new_data = test_normalized) %>%
    +      rename(glmnet = .pred)
    +  )
    +test_results
    +#> # A tibble: 733 × 3
    +#>    Sale_Price `random forest` glmnet
    +#>         <dbl>           <dbl>  <dbl>
    +#>  1       5.39            5.25   5.16
    +#>  2       5.28            5.29   5.27
    +#>  3       5.23            5.26   5.24
    +#>  4       5.21            5.30   5.24
    +#>  5       5.60            5.51   5.24
    +#>  6       5.32            5.29   5.26
    +#>  7       5.17            5.14   5.18
    +#>  8       5.06            5.13   5.17
    +#>  9       4.98            5.01   5.18
    +#> 10       5.11            5.14   5.19
    +#> # ℹ 723 more rows
    +
    +test_results %>% metrics(truth = Sale_Price, estimate = glmnet) 
    +#> # A tibble: 3 × 3
    +#>   .metric .estimator .estimate
    +#>   <chr>   <chr>          <dbl>
    +#> 1 rmse    standard      0.142 
    +#> 2 rsq     standard      0.391 
    +#> 3 mae     standard      0.0979
    +
    +test_results %>% 
    +  gather(model, prediction, -Sale_Price) %>% 
    +  ggplot(aes(x = prediction, y = Sale_Price)) + 
    +  geom_abline(col = "green", lty = 2) + 
    +  geom_point(alpha = .4) + 
    +  facet_wrap(~model) + 
    +  coord_fixed()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    This final plot compares the performance of the random forest and regularized regression models.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package      * version date (UTC) lib source
    +#>  broom        * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials        * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr        * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2      * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  glmnet       * 4.1-7   2023-03-23 [1] CRAN (R 4.3.0)
    +#>  infer        * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip      * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr        * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  randomForest * 4.7-1.1 2022-05-23 [1] CRAN (R 4.3.0)
    +#>  ranger       * 0.15.1  2023-04-03 [1] CRAN (R 4.3.0)
    +#>  recipes      * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang          1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample      * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble       * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels   * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune         * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows    * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick    * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/models/pls/figs/plot-1.svg b/docs/learn/models/pls/figs/plot-1.svg similarity index 100% rename from content/learn/models/pls/figs/plot-1.svg rename to docs/learn/models/pls/figs/plot-1.svg diff --git a/docs/learn/models/pls/index.html b/docs/learn/models/pls/index.html new file mode 100644 index 00000000..411481c3 --- /dev/null +++ b/docs/learn/models/pls/index.html @@ -0,0 +1,850 @@ + + + + + + + + + + +tidymodels - Multivariate analysis using partial least squares + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Multivariate analysis using partial least squares

    +
    +
    pre-processing
    +
    multivariate analysis
    +
    partial least squares
    +
    +
    + +
    +
    +

    Build and fit a predictive model with more than one outcome.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: modeldata, pls, and tidymodels.

    +

    “Multivariate analysis” usually refers to multiple outcomes being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns y1 and y2 representing two outcomes to be predicted. The lm() function would look something like:

    +
    +
    lm(cbind(y1, y2) ~ ., data = dat)
    +
    +

    This cbind() call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes.

    +

    The data that we’ll use has three outcomes. From ?modeldata::meats:

    +
    +

    “These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents.

    +
    +
    +

    “For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is -log10 of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry.”

    +
    +

    The goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here.

    +

    To start, let’s take the two data matrices (called endpoints and absorp) and bind them together in a data frame:

    +
    +
    library(modeldata)
    +data(meats)
    +
    +

    The three outcomes have fairly high correlations also.

    +
    +
    +

    Preprocessing the data

    +

    If the outcomes can be predicted using a linear model, partial least squares (PLS) is an ideal method. PLS models the data as a function of a set of unobserved latent variables that are derived in a manner similar to principal component analysis (PCA).

    +

    PLS, unlike PCA, also incorporates the outcome data when creating the PLS components. Like PCA, it tries to maximize the variance of the predictors that are explained by the components but it also tries to simultaneously maximize the correlation between those components and the outcomes. In this way, PLS chases variation of the predictors and outcomes.

    +

    Since we are working with variances and covariances, we need to standardize the data. The recipe will center and scale all of the variables.

    +

    Many base R functions that deal with multivariate outcomes using a formula require the use of cbind() on the left-hand side of the formula to work with the traditional formula methods. In tidymodels, recipes do not; the outcomes can be symbolically “added” together on the left-hand side:

    +
    +
    norm_rec <- 
    +  recipe(water + fat + protein ~ ., data = meats) %>%
    +  step_normalize(everything()) 
    +
    +

    Before we can finalize the PLS model, the number of PLS components to retain must be determined. This can be done using performance metrics such as the root mean squared error. However, we can also calculate the proportion of variance explained by the components for the predictors and each of the outcomes. This allows an informed choice to be made based on the level of evidence that the situation requires.

    +

    Since the data set isn’t large, let’s use resampling to measure these proportions. With ten repeats of 10-fold cross-validation, we build the PLS model on 90% of the data and evaluate on the heldout 10%. For each of the 100 models, we extract and save the proportions.

    +

    The folds can be created using the rsample package and the recipe can be estimated for each resample using the prepper() function:

    +
    +
    set.seed(57343)
    +folds <- vfold_cv(meats, repeats = 10)
    +
    +folds <- 
    +  folds %>%
    +  mutate(recipes = map(splits, prepper, recipe = norm_rec))
    +
    +
    +
    +

    Partial least squares

    +

    The complicated parts for moving forward are:

    +
      +
    1. Formatting the predictors and outcomes into the format that the pls package requires, and
    2. +
    3. Estimating the proportions.
    4. +
    +

    For the first part, the standardized outcomes and predictors need to be formatted into two separate matrices. Since we used retain = TRUE when prepping the recipes, we can bake() with new_data = NULl to get the processed data back out. To save the data as a matrix, the option composition = "matrix" will avoid saving the data as tibbles and use the required format.

    +

    The pls package expects a simple formula to specify the model, but each side of the formula should represent a matrix. In other words, we need a data set with two columns where each column is a matrix. The secret to doing this is to “protect” the two matrices using I() when adding them to the data frame.

    +

    The calculation for the proportion of variance explained is straightforward for the predictors; the function pls::explvar() will compute that. For the outcomes, the process is more complicated. A ready-made function to compute these is not obvious but there is some code inside of the summary function to do the computation (see below).

    +

    The function get_var_explained() shown here will do all these computations and return a data frame with columns components, source (for the predictors, water, etc), and the proportion of variance that is explained by the components.

    +
    +
    library(pls)
    +
    +get_var_explained <- function(recipe, ...) {
    +  
    +  # Extract the predictors and outcomes into their own matrices
    +  y_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_outcomes())
    +  x_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_predictors())
    +  
    +  # The pls package prefers the data in a data frame where the outcome
    +  # and predictors are in _matrices_. To make sure this is formatted
    +  # properly, use the `I()` function to inhibit `data.frame()` from making
    +  # all the individual columns. `pls_format` should have two columns.
    +  pls_format <- data.frame(
    +    endpoints = I(y_mat),
    +    measurements = I(x_mat)
    +  )
    +  # Fit the model
    +  mod <- plsr(endpoints ~ measurements, data = pls_format)
    +  
    +  # Get the proportion of the predictor variance that is explained
    +  # by the model for different number of components. 
    +  xve <- explvar(mod)/100 
    +
    +  # To do the same for the outcome, it is more complex. This code 
    +  # was extracted from pls:::summary.mvr. 
    +  explained <- 
    +    drop(pls::R2(mod, estimate = "train", intercept = FALSE)$val) %>% 
    +    # transpose so that components are in rows
    +    t() %>% 
    +    as_tibble() %>%
    +    # Add the predictor proportions
    +    mutate(predictors = cumsum(xve) %>% as.vector(),
    +           components = seq_along(xve)) %>%
    +    # Put into a tidy format that is tall
    +    pivot_longer(
    +      cols = c(-components),
    +      names_to = "source",
    +      values_to = "proportion"
    +    )
    +}
    +
    +

    We compute this data frame for each resample and save the results in the different columns.

    +
    +
    folds <- 
    +  folds %>%
    +  mutate(var = map(recipes, get_var_explained),
    +         var = unname(var))
    +
    +

    To extract and aggregate these data, simple row binding can be used to stack the data vertically. Most of the action happens in the first 15 components so let’s filter the data and compute the average proportion.

    +
    +
    variance_data <- 
    +  bind_rows(folds[["var"]]) %>%
    +  filter(components <= 15) %>%
    +  group_by(components, source) %>%
    +  summarize(proportion = mean(proportion))
    +#> `summarise()` has grouped output by 'components'. You can override
    +#> using the `.groups` argument.
    +
    +

    The plot below shows that, if the protein measurement is important, you might require 10 or so components to achieve a good representation of that outcome. Note that the predictor variance is captured extremely well using a single component. This is due to the high degree of correlation in those data.

    +
    +
    ggplot(variance_data, aes(x = components, y = proportion, col = source)) + 
    +  geom_line(alpha = 0.5, size = 1.2) + 
    +  geom_point() 
    +#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
    +#> ℹ Please use `linewidth` instead.
    +
    +
    +
    +

    +
    +
    +
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  modeldata  * 1.1.0   2023-01-25 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  pls        * 2.8-1   2022-07-16 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/models/sub-sampling/figs/merge-metrics-1.svg b/docs/learn/models/sub-sampling/figs/merge-metrics-1.svg new file mode 100644 index 00000000..97d00373 --- /dev/null +++ b/docs/learn/models/sub-sampling/figs/merge-metrics-1.svg @@ -0,0 +1,215 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +j_index + + + + + + + + + + +roc_auc + + + + +no_sampling +rose + + +no_sampling +rose +0.85 +0.90 +0.95 +1.00 + + + + +0.00 +0.25 +0.50 +0.75 + + + + +sampling +.estimate + + diff --git a/docs/learn/models/sub-sampling/index.html b/docs/learn/models/sub-sampling/index.html new file mode 100644 index 00000000..70436d7f --- /dev/null +++ b/docs/learn/models/sub-sampling/index.html @@ -0,0 +1,874 @@ + + + + + + + + + + +tidymodels - Subsampling for class imbalances + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Subsampling for class imbalances

    +
    +
    model fitting
    +
    pre-processing
    +
    class imbalances
    +
    discriminant analysis
    +
    +
    + +
    +
    +

    Improve model performance in imbalanced data sets through undersampling or oversampling.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: discrim, klaR, readr, ROSE, themis, and tidymodels.

    +

    Subsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance.

    +

    This article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in Kuhn and Johnson (2019) for more information on these metrics.

    +
    +
    +

    Simulated data

    +

    Consider a two-class problem where the first class has a very low rate of occurrence. The data were simulated and can be imported into R using the code below:

    +
    +
    imbal_data <- 
    +  readr::read_csv("https://bit.ly/imbal_data") %>% 
    +  mutate(Class = factor(Class))
    +dim(imbal_data)
    +#> [1] 1200   16
    +table(imbal_data$Class)
    +#> 
    +#> Class1 Class2 
    +#>     60   1140
    +
    +

    If “Class1” is the event of interest, it is very likely that a classification model would be able to achieve very good specificity since almost all of the data are of the second class. Sensitivity, however, would likely be poor since the models will optimize accuracy (or other loss functions) by predicting everything to be the majority class.

    +

    One result of class imbalance when there are two classes is that the default probability cutoff of 50% is inappropriate; a different cutoff that is more extreme might be able to achieve good performance.

    +
    +
    +

    Subsampling the data

    +

    One way to alleviate this issue is to subsample the data. There are a number of ways to do this but the most simple one is to sample down (undersample) the majority class data until it occurs with the same frequency as the minority class. While it may seem counterintuitive, throwing out a large percentage of your data can be effective at producing a useful model that can recognize both the majority and minority classes. In some cases, this even means that the overall performance of the model is better (e.g. improved area under the ROC curve). However, subsampling almost always produces models that are better calibrated, meaning that the distributions of the class probabilities are more well behaved. As a result, the default 50% cutoff is much more likely to produce better sensitivity and specificity values than they would otherwise.

    +

    Let’s explore subsampling using themis::step_rose() in a recipe for the simulated data. It uses the ROSE (random over sampling examples) method from Menardi, G. and Torelli, N. (2014). This is an example of an oversampling strategy, rather than undersampling.

    +

    In terms of workflow:

    +
      +
    • It is extremely important that subsampling occurs inside of resampling. Otherwise, the resampling process can produce poor estimates of model performance.
    • +
    • The subsampling process should only be applied to the analysis set. The assessment set should reflect the event rates seen “in the wild” and, for this reason, the skip argument to step_downsample() and other subsampling recipes steps has a default of TRUE.
    • +
    +

    Here is a simple recipe implementing oversampling:

    +
    +
    library(tidymodels)
    +library(themis)
    +imbal_rec <- 
    +  recipe(Class ~ ., data = imbal_data) %>%
    +  step_rose(Class)
    +
    +

    For a model, let’s use a quadratic discriminant analysis (QDA) model. From the discrim package, this model can be specified using:

    +
    +
    library(discrim)
    +qda_mod <- 
    +  discrim_regularized(frac_common_cov = 0, frac_identity = 0) %>% 
    +  set_engine("klaR")
    +
    +

    To keep these objects bound together, they can be combined in a workflow:

    +
    +
    qda_rose_wflw <- 
    +  workflow() %>% 
    +  add_model(qda_mod) %>% 
    +  add_recipe(imbal_rec)
    +qda_rose_wflw
    +#> ══ Workflow ══════════════════════════════════════════════════════════
    +#> Preprocessor: Recipe
    +#> Model: discrim_regularized()
    +#> 
    +#> ── Preprocessor ──────────────────────────────────────────────────────
    +#> 1 Recipe Step
    +#> 
    +#> • step_rose()
    +#> 
    +#> ── Model ─────────────────────────────────────────────────────────────
    +#> Regularized Discriminant Model Specification (classification)
    +#> 
    +#> Main Arguments:
    +#>   frac_common_cov = 0
    +#>   frac_identity = 0
    +#> 
    +#> Computational engine: klaR
    +
    +
    +
    +

    Model performance

    +

    Stratified, repeated 10-fold cross-validation is used to resample the model:

    +
    +
    set.seed(5732)
    +cv_folds <- vfold_cv(imbal_data, strata = "Class", repeats = 5)
    +
    +

    To measure model performance, let’s use two metrics:

    +
      +
    • The area under the ROC curve is an overall assessment of performance across all cutoffs. Values near one indicate very good results while values near 0.5 would imply that the model is very poor.
    • +
    • The J index (a.k.a. Youden’s J statistic) is sensitivity + specificity - 1. Values near one are once again best.
    • +
    +

    If a model is poorly calibrated, the ROC curve value might not show diminished performance. However, the J index would be lower for models with pathological distributions for the class probabilities. The yardstick package will be used to compute these metrics.

    +
    +
    cls_metrics <- metric_set(roc_auc, j_index)
    +
    +

    Now, we train the models and generate the results using tune::fit_resamples():

    +
    +
    set.seed(2180)
    +qda_rose_res <- fit_resamples(
    +  qda_rose_wflw, 
    +  resamples = cv_folds, 
    +  metrics = cls_metrics
    +)
    +
    +collect_metrics(qda_rose_res)
    +#> # A tibble: 2 × 6
    +#>   .metric .estimator  mean     n std_err .config             
    +#>   <chr>   <chr>      <dbl> <int>   <dbl> <chr>               
    +#> 1 j_index binary     0.749    50 0.0234  Preprocessor1_Model1
    +#> 2 roc_auc binary     0.949    50 0.00510 Preprocessor1_Model1
    +
    +

    What do the results look like without using ROSE? We can create another workflow and fit the QDA model along the same resamples:

    +
    +
    qda_wflw <- 
    +  workflow() %>% 
    +  add_model(qda_mod) %>% 
    +  add_formula(Class ~ .)
    +
    +set.seed(2180)
    +qda_only_res <- fit_resamples(qda_wflw, resamples = cv_folds, metrics = cls_metrics)
    +collect_metrics(qda_only_res)
    +#> # A tibble: 2 × 6
    +#>   .metric .estimator  mean     n std_err .config             
    +#>   <chr>   <chr>      <dbl> <int>   <dbl> <chr>               
    +#> 1 j_index binary     0.250    50 0.0288  Preprocessor1_Model1
    +#> 2 roc_auc binary     0.953    50 0.00479 Preprocessor1_Model1
    +
    +

    It looks like ROSE helped a lot, especially with the J-index. Class imbalance sampling methods tend to greatly improve metrics based on the hard class predictions (i.e., the categorical predictions) because the default cutoff tends to be a better balance of sensitivity and specificity.

    +

    Let’s plot the metrics for each resample to see how the individual results changed.

    +
    +
    no_sampling <- 
    +  qda_only_res %>% 
    +  collect_metrics(summarize = FALSE) %>% 
    +  dplyr::select(-.estimator) %>% 
    +  mutate(sampling = "no_sampling")
    +
    +with_sampling <- 
    +  qda_rose_res %>% 
    +  collect_metrics(summarize = FALSE) %>% 
    +  dplyr::select(-.estimator) %>% 
    +  mutate(sampling = "rose")
    +
    +bind_rows(no_sampling, with_sampling) %>% 
    +  mutate(label = paste(id2, id)) %>%  
    +  ggplot(aes(x = sampling, y = .estimate, group = label)) + 
    +  geom_line(alpha = .4) + 
    +  facet_wrap(~ .metric, scales = "free_y")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    This visually demonstrates that the subsampling mostly affects metrics that use the hard class predictions.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  discrim    * 1.0.1   2023-03-08 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  klaR       * 1.7-2   2023-03-17 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  readr      * 2.1.4   2023-02-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  ROSE       * 0.0-4   2021-06-14 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  themis     * 1.0.1   2023-04-14 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/models/time-series/figs/plot-1.svg b/docs/learn/models/time-series/figs/plot-1.svg similarity index 100% rename from content/learn/models/time-series/figs/plot-1.svg rename to docs/learn/models/time-series/figs/plot-1.svg diff --git a/docs/learn/models/time-series/index.html b/docs/learn/models/time-series/index.html new file mode 100644 index 00000000..7fe67118 --- /dev/null +++ b/docs/learn/models/time-series/index.html @@ -0,0 +1,941 @@ + + + + + + + + + + +tidymodels - Modeling time series with tidy resampling + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Modeling time series with tidy resampling

    +
    +
    model fitting
    +
    time series
    +
    +
    + +
    +
    +

    Calculate performance estimates for time series forecasts using resampling.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: forecast, sweep, tidymodels, timetk, and zoo.

    +

    Demo Week: Tidy Forecasting with sweep” is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using rolling forecast origin resampling.

    +
    +
    +

    Example data

    +

    The data for this article are sales of alcoholic beverages originally from the Federal Reserve Bank of St. Louis website.

    +
    +
    library(tidymodels)
    +library(modeldata)
    +data("drinks")
    +glimpse(drinks)
    +#> Rows: 309
    +#> Columns: 2
    +#> $ date           <date> 1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-0…
    +#> $ S4248SM144NCEN <dbl> 3459, 3458, 4002, 4564, 4221, 4529, 4466, 4137, 4126, 4…
    +
    +

    Each row represents one month of sales (in millions of US dollars).

    +
    +
    +

    Time series resampling

    +

    Suppose that we need predictions for one year ahead and our model should use the most recent data from the last 20 years. To set up this resampling scheme:

    +
    +
    roll_rs <- rolling_origin(
    +  drinks, 
    +  initial = 12 * 20, 
    +  assess = 12,
    +  cumulative = FALSE
    +  )
    +
    +nrow(roll_rs)
    +#> [1] 58
    +
    +roll_rs
    +#> # Rolling origin forecast resampling 
    +#> # A tibble: 58 × 2
    +#>    splits           id     
    +#>    <list>           <chr>  
    +#>  1 <split [240/12]> Slice01
    +#>  2 <split [240/12]> Slice02
    +#>  3 <split [240/12]> Slice03
    +#>  4 <split [240/12]> Slice04
    +#>  5 <split [240/12]> Slice05
    +#>  6 <split [240/12]> Slice06
    +#>  7 <split [240/12]> Slice07
    +#>  8 <split [240/12]> Slice08
    +#>  9 <split [240/12]> Slice09
    +#> 10 <split [240/12]> Slice10
    +#> # ℹ 48 more rows
    +
    +

    Each split element contains the information about that resample:

    +
    +
    roll_rs$splits[[1]]
    +#> <Analysis/Assess/Total>
    +#> <240/12/309>
    +
    +

    For plotting, let’s index each split by the first day of the assessment set:

    +
    +
    get_date <- function(x) {
    +  min(assessment(x)$date)
    +}
    +
    +start_date <- map(roll_rs$splits, get_date)
    +roll_rs$start_date <- do.call("c", start_date)
    +head(roll_rs$start_date)
    +#> [1] "2012-01-01" "2012-02-01" "2012-03-01" "2012-04-01" "2012-05-01"
    +#> [6] "2012-06-01"
    +
    +

    This resampling scheme has 58 splits of the data so that there will be 58 ARIMA models that are fit. To create the models, we use the auto.arima() function from the forecast package. The rsample functions analysis() and assessment() return a data frame, so another step converts the data to a ts object called mod_dat using a function in the timetk package.

    +
    +
    library(forecast)  # for `auto.arima`
    +library(timetk)    # for `tk_ts`
    +library(zoo)       # for `as.yearmon`
    +
    +fit_model <- function(x, ...) {
    +  # suggested by Matt Dancho:
    +  x %>%
    +    analysis() %>%
    +    # Since the first day changes over resamples, adjust it
    +    # based on the first date value in the data frame 
    +    tk_ts(start = .$date[[1]] %>% as.yearmon(), 
    +          frequency = 12, 
    +          silent = TRUE) %>%
    +    auto.arima(...)
    +}
    +
    +

    Save each model in a new column:

    +
    +
    roll_rs$arima <- map(roll_rs$splits, fit_model)
    +
    +# For example:
    +roll_rs$arima[[1]]
    +#> Series: . 
    +#> ARIMA(4,1,1)(0,1,2)[12] 
    +#> 
    +#> Coefficients:
    +#>           ar1      ar2     ar3      ar4      ma1    sma1     sma2
    +#>       -0.1852  -0.0238  0.3577  -0.1517  -0.8311  -0.193  -0.3244
    +#> s.e.   0.1466   0.1656  0.1440   0.0809   0.1377   0.067   0.0640
    +#> 
    +#> sigma^2 = 72198:  log likelihood = -1591.15
    +#> AIC=3198.3   AICc=3198.97   BIC=3225.7
    +
    +

    (There are some warnings produced by these regarding extra columns in the data that can be ignored.)

    +
    +
    +

    Model performance

    +

    Using the model fits, let’s measure performance in two ways:

    +
      +
    • Interpolation error will measure how well the model fits to the data that were used to create the model. This is most likely optimistic since no holdout method is used.
    • +
    • Extrapolation or forecast error evaluates the performance of the model on the data from the following year (that were not used in the model fit).
    • +
    +

    In each case, the mean absolute percent error (MAPE) is the statistic used to characterize the model fits. The interpolation error can be computed from the Arima object. To make things easy, let’s use the sweep package’s sw_glance() function:

    +
    +
    library(sweep)
    +
    +roll_rs$interpolation <- map_dbl(
    +  roll_rs$arima,
    +  function(x) 
    +    sw_glance(x)[["MAPE"]]
    +  )
    +
    +summary(roll_rs$interpolation)
    +#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    +#>   2.841   2.921   2.950   2.947   2.969   3.135
    +
    +

    For the extrapolation error, the model and split objects are required. Using these:

    +
    +
    get_extrap <- function(split, mod) {
    +  n <- nrow(assessment(split))
    +  # Get assessment data
    +  pred_dat <- assessment(split) %>%
    +    mutate(
    +      pred = as.vector(forecast(mod, h = n)$mean),
    +      pct_error = ( S4248SM144NCEN - pred ) / S4248SM144NCEN * 100
    +    )
    +  mean(abs(pred_dat$pct_error))
    +}
    +
    +roll_rs$extrapolation <- 
    +  map2_dbl(roll_rs$splits, roll_rs$arima, get_extrap)
    +
    +summary(roll_rs$extrapolation)
    +#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    +#>   2.371   3.231   3.629   3.654   4.113   5.453
    +
    +

    What do these error estimates look like over time?

    +
    +
    roll_rs %>%
    +  select(interpolation, extrapolation, start_date) %>%
    +  pivot_longer(cols = matches("ation"), names_to = "error", values_to = "MAPE") %>%
    +  ggplot(aes(x = start_date, y = MAPE, col = error)) + 
    +  geom_point() + 
    +  geom_line()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    It is likely that the interpolation error is an underestimate to some degree, as mentioned above.

    +

    It is also worth noting that rolling_origin() can be used over calendar periods, rather than just over a fixed window size. This is especially useful for irregular series where a fixed window size might not make sense because of missing data points, or because of calendar features like different months having a different number of days.

    +

    The example below demonstrates this idea by splitting drinks into a nested set of 26 years, and rolling over years rather than months. Note that the end result accomplishes a different task than the original example; in this new case, each slice moves forward an entire year, rather than just one month.

    +
    +
    # The idea is to nest by the period to roll over,
    +# which in this case is the year.
    +roll_rs_annual <- drinks %>%
    +  mutate(year = as.POSIXlt(date)$year + 1900) %>%
    +  nest(data = c(date, S4248SM144NCEN)) %>%
    +  rolling_origin(
    +    initial = 20, 
    +    assess = 1, 
    +    cumulative = FALSE
    +  )
    +
    +analysis(roll_rs_annual$splits[[1]])
    +#> # A tibble: 20 × 2
    +#>     year data             
    +#>    <dbl> <list>           
    +#>  1  1992 <tibble [12 × 2]>
    +#>  2  1993 <tibble [12 × 2]>
    +#>  3  1994 <tibble [12 × 2]>
    +#>  4  1995 <tibble [12 × 2]>
    +#>  5  1996 <tibble [12 × 2]>
    +#>  6  1997 <tibble [12 × 2]>
    +#>  7  1998 <tibble [12 × 2]>
    +#>  8  1999 <tibble [12 × 2]>
    +#>  9  2000 <tibble [12 × 2]>
    +#> 10  2001 <tibble [12 × 2]>
    +#> 11  2002 <tibble [12 × 2]>
    +#> 12  2003 <tibble [12 × 2]>
    +#> 13  2004 <tibble [12 × 2]>
    +#> 14  2005 <tibble [12 × 2]>
    +#> 15  2006 <tibble [12 × 2]>
    +#> 16  2007 <tibble [12 × 2]>
    +#> 17  2008 <tibble [12 × 2]>
    +#> 18  2009 <tibble [12 × 2]>
    +#> 19  2010 <tibble [12 × 2]>
    +#> 20  2011 <tibble [12 × 2]>
    +
    +

    The workflow to access these calendar slices is to use bind_rows() to join each analysis set together.

    +
    +
    mutate(
    +  roll_rs_annual,
    +  extracted_slice = map(splits, ~ bind_rows(analysis(.x)$data))
    +)
    +#> # Rolling origin forecast resampling 
    +#> # A tibble: 6 × 3
    +#>   splits         id     extracted_slice   
    +#>   <list>         <chr>  <list>            
    +#> 1 <split [20/1]> Slice1 <tibble [240 × 2]>
    +#> 2 <split [20/1]> Slice2 <tibble [240 × 2]>
    +#> 3 <split [20/1]> Slice3 <tibble [240 × 2]>
    +#> 4 <split [20/1]> Slice4 <tibble [240 × 2]>
    +#> 5 <split [20/1]> Slice5 <tibble [240 × 2]>
    +#> 6 <split [20/1]> Slice6 <tibble [240 × 2]>
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  forecast   * 8.21    2023-02-27 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  sweep      * 0.2.4   2023-05-25 [1] Github (business-science/sweep@d0327bc)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  timetk     * 2.8.3   2023-03-30 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#>  zoo        * 1.8-12  2023-04-13 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/statistics/bootstrap/figs/unnamed-chunk-8-1.svg b/docs/learn/statistics/bootstrap/figs/unnamed-chunk-11-1.svg similarity index 100% rename from content/learn/statistics/bootstrap/figs/unnamed-chunk-8-1.svg rename to docs/learn/statistics/bootstrap/figs/unnamed-chunk-11-1.svg diff --git a/content/learn/statistics/bootstrap/figs/unnamed-chunk-9-1.svg b/docs/learn/statistics/bootstrap/figs/unnamed-chunk-12-1.svg similarity index 100% rename from content/learn/statistics/bootstrap/figs/unnamed-chunk-9-1.svg rename to docs/learn/statistics/bootstrap/figs/unnamed-chunk-12-1.svg diff --git a/content/learn/statistics/bootstrap/figs/unnamed-chunk-1-1.svg b/docs/learn/statistics/bootstrap/figs/unnamed-chunk-3-1.svg similarity index 100% rename from content/learn/statistics/bootstrap/figs/unnamed-chunk-1-1.svg rename to docs/learn/statistics/bootstrap/figs/unnamed-chunk-3-1.svg diff --git a/content/learn/statistics/bootstrap/figs/unnamed-chunk-2-1.svg b/docs/learn/statistics/bootstrap/figs/unnamed-chunk-4-1.svg similarity index 100% rename from content/learn/statistics/bootstrap/figs/unnamed-chunk-2-1.svg rename to docs/learn/statistics/bootstrap/figs/unnamed-chunk-4-1.svg diff --git a/content/learn/statistics/bootstrap/figs/unnamed-chunk-6-1.svg b/docs/learn/statistics/bootstrap/figs/unnamed-chunk-9-1.svg similarity index 100% rename from content/learn/statistics/bootstrap/figs/unnamed-chunk-6-1.svg rename to docs/learn/statistics/bootstrap/figs/unnamed-chunk-9-1.svg diff --git a/docs/learn/statistics/bootstrap/index.html b/docs/learn/statistics/bootstrap/index.html new file mode 100644 index 00000000..0a3c5da7 --- /dev/null +++ b/docs/learn/statistics/bootstrap/index.html @@ -0,0 +1,923 @@ + + + + + + + + + + +tidymodels - Bootstrap resampling and tidy regression models + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Bootstrap resampling and tidy regression models

    +
    +
    statistical analysis
    +
    bootstraping
    +
    tidying results
    +
    confidence intervals
    +
    +
    + +
    +
    +

    Apply bootstrap resampling to estimate uncertainty in model parameters.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    This article only requires the tidymodels package.

    +

    Combining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by Andrew MacDonald here, and Hadley has explored efficient support for bootstrapping as a potential enhancement to dplyr. The tidymodels package broom fits naturally with dplyr in performing these analyses.

    +

    Bootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate.

    +

    Let’s say we want to fit a nonlinear model to the weight/mileage relationship in the mtcars data set.

    +
    +
    library(tidymodels)
    +
    +ggplot(mtcars, aes(mpg, wt)) + 
    +    geom_point()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    We might use the method of nonlinear least squares (via the nls() function) to fit a model.

    +
    +
    nlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0))
    +summary(nlsfit)
    +#> 
    +#> Formula: mpg ~ k/wt + b
    +#> 
    +#> Parameters:
    +#>   Estimate Std. Error t value Pr(>|t|)    
    +#> k   45.829      4.249  10.786 7.64e-12 ***
    +#> b    4.386      1.536   2.855  0.00774 ** 
    +#> ---
    +#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    +#> 
    +#> Residual standard error: 2.774 on 30 degrees of freedom
    +#> 
    +#> Number of iterations to convergence: 1 
    +#> Achieved convergence tolerance: 6.813e-09
    +
    +ggplot(mtcars, aes(wt, mpg)) +
    +    geom_point() +
    +    geom_line(aes(y = predict(nlsfit)))
    +
    +
    +
    +

    +
    +
    +
    +
    +

    While this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data.

    +
    +
    +

    Bootstrapping models

    +

    We can use the bootstraps() function in the rsample package to sample bootstrap replications. First, we construct 2000 bootstrap replicates of the data, each of which has been randomly sampled with replacement. The resulting object is an rset, which is a data frame with a column of rsplit objects.

    +

    An rsplit object has two main components: an analysis data set and an assessment data set, accessible via analysis(rsplit) and assessment(rsplit) respectively. For bootstrap samples, the analysis data set is the bootstrap sample itself, and the assessment data set consists of all the out-of-bag samples.

    +
    +
    set.seed(27)
    +boots <- bootstraps(mtcars, times = 2000, apparent = TRUE)
    +boots
    +#> # Bootstrap sampling with apparent sample 
    +#> # A tibble: 2,001 × 2
    +#>    splits          id           
    +#>    <list>          <chr>        
    +#>  1 <split [32/13]> Bootstrap0001
    +#>  2 <split [32/10]> Bootstrap0002
    +#>  3 <split [32/13]> Bootstrap0003
    +#>  4 <split [32/11]> Bootstrap0004
    +#>  5 <split [32/9]>  Bootstrap0005
    +#>  6 <split [32/10]> Bootstrap0006
    +#>  7 <split [32/11]> Bootstrap0007
    +#>  8 <split [32/13]> Bootstrap0008
    +#>  9 <split [32/11]> Bootstrap0009
    +#> 10 <split [32/11]> Bootstrap0010
    +#> # ℹ 1,991 more rows
    +
    +

    Let’s create a helper function to fit an nls() model on each bootstrap sample, and then use purrr::map() to apply this function to all the bootstrap samples at once. Similarly, we create a column of tidy coefficient information by unnesting.

    +
    +
    fit_nls_on_bootstrap <- function(split) {
    +    nls(mpg ~ k / wt + b, analysis(split), start = list(k = 1, b = 0))
    +}
    +
    +boot_models <-
    +  boots %>% 
    +  mutate(model = map(splits, fit_nls_on_bootstrap),
    +         coef_info = map(model, tidy))
    +
    +boot_coefs <- 
    +  boot_models %>% 
    +  unnest(coef_info)
    +
    +

    The unnested coefficient information contains a summary of each replication combined in a single data frame:

    +
    +
    boot_coefs
    +#> # A tibble: 4,002 × 8
    +#>    splits          id          model term  estimate std.error statistic  p.value
    +#>    <list>          <chr>       <lis> <chr>    <dbl>     <dbl>     <dbl>    <dbl>
    +#>  1 <split [32/13]> Bootstrap0… <nls> k        42.1       4.05     10.4  1.91e-11
    +#>  2 <split [32/13]> Bootstrap0… <nls> b         5.39      1.43      3.78 6.93e- 4
    +#>  3 <split [32/10]> Bootstrap0… <nls> k        49.9       5.66      8.82 7.82e-10
    +#>  4 <split [32/10]> Bootstrap0… <nls> b         3.73      1.92      1.94 6.13e- 2
    +#>  5 <split [32/13]> Bootstrap0… <nls> k        37.8       2.68     14.1  9.01e-15
    +#>  6 <split [32/13]> Bootstrap0… <nls> b         6.73      1.17      5.75 2.78e- 6
    +#>  7 <split [32/11]> Bootstrap0… <nls> k        45.6       4.45     10.2  2.70e-11
    +#>  8 <split [32/11]> Bootstrap0… <nls> b         4.75      1.62      2.93 6.38e- 3
    +#>  9 <split [32/9]>  Bootstrap0… <nls> k        43.6       4.63      9.41 1.85e-10
    +#> 10 <split [32/9]>  Bootstrap0… <nls> b         5.89      1.68      3.51 1.44e- 3
    +#> # ℹ 3,992 more rows
    +
    +
    +
    +

    Confidence intervals

    +

    We can then calculate confidence intervals (using what is called the percentile method):

    +
    +
    percentile_intervals <- int_pctl(boot_models, coef_info)
    +percentile_intervals
    +#> # A tibble: 2 × 6
    +#>   term   .lower .estimate .upper .alpha .method   
    +#>   <chr>   <dbl>     <dbl>  <dbl>  <dbl> <chr>     
    +#> 1 b      0.0475      4.12   7.31   0.05 percentile
    +#> 2 k     37.6        46.7   59.8    0.05 percentile
    +
    +

    Or we can use histograms to get a more detailed idea of the uncertainty in each estimate:

    +
    +
    ggplot(boot_coefs, aes(estimate)) +
    +  geom_histogram(bins = 30) +
    +  facet_wrap( ~ term, scales = "free") +
    +  geom_vline(aes(xintercept = .lower), data = percentile_intervals, col = "blue") +
    +  geom_vline(aes(xintercept = .upper), data = percentile_intervals, col = "blue")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    The rsample package also has functions for other types of confidence intervals.

    +
    +
    +

    Possible model fits

    +

    We can use augment() to visualize the uncertainty in the fitted curve. Since there are so many bootstrap samples, we’ll only show a sample of the model fits in our visualization:

    +
    +
    boot_aug <- 
    +  boot_models %>% 
    +  sample_n(200) %>% 
    +  mutate(augmented = map(model, augment)) %>% 
    +  unnest(augmented)
    +
    +boot_aug
    +#> # A tibble: 6,400 × 8
    +#>    splits          id            model  coef_info   mpg    wt .fitted .resid
    +#>    <list>          <chr>         <list> <list>    <dbl> <dbl>   <dbl>  <dbl>
    +#>  1 <split [32/11]> Bootstrap1644 <nls>  <tibble>   16.4  4.07    15.6  0.829
    +#>  2 <split [32/11]> Bootstrap1644 <nls>  <tibble>   19.7  2.77    21.9 -2.21 
    +#>  3 <split [32/11]> Bootstrap1644 <nls>  <tibble>   19.2  3.84    16.4  2.84 
    +#>  4 <split [32/11]> Bootstrap1644 <nls>  <tibble>   21.4  2.78    21.8 -0.437
    +#>  5 <split [32/11]> Bootstrap1644 <nls>  <tibble>   26    2.14    27.8 -1.75 
    +#>  6 <split [32/11]> Bootstrap1644 <nls>  <tibble>   33.9  1.84    32.0  1.88 
    +#>  7 <split [32/11]> Bootstrap1644 <nls>  <tibble>   32.4  2.2     27.0  5.35 
    +#>  8 <split [32/11]> Bootstrap1644 <nls>  <tibble>   30.4  1.62    36.1 -5.70 
    +#>  9 <split [32/11]> Bootstrap1644 <nls>  <tibble>   21.5  2.46    24.4 -2.86 
    +#> 10 <split [32/11]> Bootstrap1644 <nls>  <tibble>   26    2.14    27.8 -1.75 
    +#> # ℹ 6,390 more rows
    +
    +
    +
    ggplot(boot_aug, aes(wt, mpg)) +
    +  geom_line(aes(y = .fitted, group = id), alpha = .2, col = "blue") +
    +  geom_point()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    With only a few small changes, we could easily perform bootstrapping with other kinds of predictive or hypothesis testing models, since the tidy() and augment() functions works for many statistical outputs. As another example, we could use smooth.spline(), which fits a cubic smoothing spline to data:

    +
    +
    fit_spline_on_bootstrap <- function(split) {
    +    data <- analysis(split)
    +    smooth.spline(data$wt, data$mpg, df = 4)
    +}
    +
    +boot_splines <- 
    +  boots %>% 
    +  sample_n(200) %>% 
    +  mutate(spline = map(splits, fit_spline_on_bootstrap),
    +         aug_train = map(spline, augment))
    +
    +splines_aug <- 
    +  boot_splines %>% 
    +  unnest(aug_train)
    +
    +ggplot(splines_aug, aes(x, y)) +
    +  geom_line(aes(y = .fitted, group = id), alpha = 0.2, col = "blue") +
    +  geom_point()
    +
    +
    +
    +

    +
    +
    +
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/statistics/infer/figs/unnamed-chunk-22-1.svg b/docs/learn/statistics/infer/figs/unnamed-chunk-22-1.svg new file mode 100644 index 00000000..02ac008b --- /dev/null +++ b/docs/learn/statistics/infer/figs/unnamed-chunk-22-1.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.0 +0.2 +0.4 +0.6 + + + + + + + +0 +2 +4 +F stat +density +Theoretical F Null Distribution + + diff --git a/docs/learn/statistics/infer/figs/unnamed-chunk-23-1.svg b/docs/learn/statistics/infer/figs/unnamed-chunk-23-1.svg new file mode 100644 index 00000000..92fb2ce6 --- /dev/null +++ b/docs/learn/statistics/infer/figs/unnamed-chunk-23-1.svg @@ -0,0 +1,5090 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.0 +0.2 +0.4 +0.6 + + + + + + + + +0 +2 +4 +6 +F stat +density +Simulation-Based and Theoretical F Null Distributions + + diff --git a/docs/learn/statistics/infer/figs/visualize-1.svg b/docs/learn/statistics/infer/figs/visualize-1.svg new file mode 100644 index 00000000..7ff02340 --- /dev/null +++ b/docs/learn/statistics/infer/figs/visualize-1.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +250 +500 +750 +1000 + + + + + + + + + + + +38 +39 +40 +41 +42 +43 +stat +count +Simulation-Based Null Distribution + + diff --git a/docs/learn/statistics/infer/figs/visualize2-1.svg b/docs/learn/statistics/infer/figs/visualize2-1.svg new file mode 100644 index 00000000..89148058 --- /dev/null +++ b/docs/learn/statistics/infer/figs/visualize2-1.svg @@ -0,0 +1,5102 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +250 +500 +750 +1000 + + + + + + + + + + + +38 +39 +40 +41 +42 +43 +stat +count +Simulation-Based Null Distribution + + diff --git a/docs/learn/statistics/infer/index.html b/docs/learn/statistics/infer/index.html new file mode 100644 index 00000000..0300d459 --- /dev/null +++ b/docs/learn/statistics/infer/index.html @@ -0,0 +1,1148 @@ + + + + + + + + + + +tidymodels - Hypothesis testing using resampling and tidy data + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Hypothesis testing using resampling and tidy data

    +
    +
    statistical analysis
    +
    hypothesis testing
    +
    bootstraping
    +
    +
    + +
    +
    +

    Perform common hypothesis tests for statistical inference using flexible functions.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    This article only requires the tidymodels package.

    +

    The tidymodels package infer implements an expressive grammar to perform statistical inference that coheres with the tidyverse design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs.

    +

    Regardless of which hypothesis test we’re using, we’re still asking the same kind of question:

    +
    +

    Is the effect or difference in our observed data real, or due to chance?

    +
    +

    To answer this question, we start by assuming that the observed data came from some world where “nothing is going on” (i.e. the observed effect was simply due to random chance), and call this assumption our null hypothesis. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the alternate hypothesis, which supposes that the effect present in the observed data is actually due to the fact that “something is going on.”) We then calculate a test statistic from our data that describes the observed effect. We can use this test statistic to calculate a p-value, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined significance level \(\alpha\), then we can reject our null hypothesis.

    +

    If you are new to hypothesis testing, take a look at

    + +

    The workflow of this package is designed around these ideas. Starting from some data set,

    +
      +
    • specify() allows you to specify the variable, or relationship between variables, that you’re interested in,
    • +
    • hypothesize() allows you to declare the null hypothesis,
    • +
    • generate() allows you to generate data reflecting the null hypothesis, and
    • +
    • calculate() allows you to calculate a distribution of statistics from the generated data to form the null distribution.
    • +
    +

    Throughout this vignette, we make use of gss, a data set available in infer containing a sample of 500 observations of 11 variables from the General Social Survey.

    +
    +
    library(tidymodels) # Includes the infer package
    +
    +# load in the data set
    +data(gss)
    +
    +# take a look at its structure
    +dplyr::glimpse(gss)
    +#> Rows: 500
    +#> Columns: 11
    +#> $ year    <dbl> 2014, 1994, 1998, 1996, 1994, 1996, 1990, 2016, 2000, 1998, 20…
    +#> $ age     <dbl> 36, 34, 24, 42, 31, 32, 48, 36, 30, 33, 21, 30, 38, 49, 25, 56…
    +#> $ sex     <fct> male, female, male, male, male, female, female, female, female…
    +#> $ college <fct> degree, no degree, degree, no degree, degree, no degree, no de…
    +#> $ partyid <fct> ind, rep, ind, ind, rep, rep, dem, ind, rep, dem, dem, ind, de…
    +#> $ hompop  <dbl> 3, 4, 1, 4, 2, 4, 2, 1, 5, 2, 4, 3, 4, 4, 2, 2, 3, 2, 1, 2, 5,…
    +#> $ hours   <dbl> 50, 31, 40, 40, 40, 53, 32, 20, 40, 40, 23, 52, 38, 72, 48, 40…
    +#> $ income  <ord> $25000 or more, $20000 - 24999, $25000 or more, $25000 or more…
    +#> $ class   <fct> middle class, working class, working class, working class, mid…
    +#> $ finrela <fct> below average, below average, below average, above average, ab…
    +#> $ weight  <dbl> 0.8960034, 1.0825000, 0.5501000, 1.0864000, 1.0825000, 1.08640…
    +
    +

    Each row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See ?gss for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let’s suppose that this data set is a representative sample of a population we want to learn about: American adults.

    +
    +
    +

    Specify variables

    +

    The specify() function can be used to specify which of the variables in the data set you’re interested in. If you’re only interested in, say, the age of the respondents, you might write:

    +
    +
    gss %>%
    +  specify(response = age)
    +#> Response: age (numeric)
    +#> # A tibble: 500 × 1
    +#>      age
    +#>    <dbl>
    +#>  1    36
    +#>  2    34
    +#>  3    24
    +#>  4    42
    +#>  5    31
    +#>  6    32
    +#>  7    48
    +#>  8    36
    +#>  9    30
    +#> 10    33
    +#> # ℹ 490 more rows
    +
    +

    On the front end, the output of specify() just looks like it selects off the columns in the dataframe that you’ve specified. What do we see if we check the class of this object, though?

    +
    +
    gss %>%
    +  specify(response = age) %>%
    +  class()
    +#> [1] "infer"      "tbl_df"     "tbl"        "data.frame"
    +
    +

    We can see that the infer class has been appended on top of the dataframe classes; this new class stores some extra metadata.

    +

    If you’re interested in two variables (age and partyid, for example) you can specify() their relationship in one of two (equivalent) ways:

    +
    +
    # as a formula
    +gss %>%
    +  specify(age ~ partyid)
    +#> Response: age (numeric)
    +#> Explanatory: partyid (factor)
    +#> # A tibble: 500 × 2
    +#>      age partyid
    +#>    <dbl> <fct>  
    +#>  1    36 ind    
    +#>  2    34 rep    
    +#>  3    24 ind    
    +#>  4    42 ind    
    +#>  5    31 rep    
    +#>  6    32 rep    
    +#>  7    48 dem    
    +#>  8    36 ind    
    +#>  9    30 rep    
    +#> 10    33 dem    
    +#> # ℹ 490 more rows
    +
    +# with the named arguments
    +gss %>%
    +  specify(response = age, explanatory = partyid)
    +#> Response: age (numeric)
    +#> Explanatory: partyid (factor)
    +#> # A tibble: 500 × 2
    +#>      age partyid
    +#>    <dbl> <fct>  
    +#>  1    36 ind    
    +#>  2    34 rep    
    +#>  3    24 ind    
    +#>  4    42 ind    
    +#>  5    31 rep    
    +#>  6    32 rep    
    +#>  7    48 dem    
    +#>  8    36 ind    
    +#>  9    30 rep    
    +#> 10    33 dem    
    +#> # ℹ 490 more rows
    +
    +

    If you’re doing inference on one proportion or a difference in proportions, you will need to use the success argument to specify which level of your response variable is a success. For instance, if you’re interested in the proportion of the population with a college degree, you might use the following code:

    +
    +
    # specifying for inference on proportions
    +gss %>%
    +  specify(response = college, success = "degree")
    +#> Response: college (factor)
    +#> # A tibble: 500 × 1
    +#>    college  
    +#>    <fct>    
    +#>  1 degree   
    +#>  2 no degree
    +#>  3 degree   
    +#>  4 no degree
    +#>  5 degree   
    +#>  6 no degree
    +#>  7 no degree
    +#>  8 degree   
    +#>  9 degree   
    +#> 10 no degree
    +#> # ℹ 490 more rows
    +
    +
    +
    +

    Declare the hypothesis

    +

    The next step in the infer pipeline is often to declare a null hypothesis using hypothesize(). The first step is to supply one of “independence” or “point” to the null argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to hypothesize():

    +
    +
    gss %>%
    +  specify(college ~ partyid, success = "degree") %>%
    +  hypothesize(null = "independence")
    +#> Response: college (factor)
    +#> Explanatory: partyid (factor)
    +#> Null Hypothesis: independence
    +#> # A tibble: 500 × 2
    +#>    college   partyid
    +#>    <fct>     <fct>  
    +#>  1 degree    ind    
    +#>  2 no degree rep    
    +#>  3 degree    ind    
    +#>  4 no degree ind    
    +#>  5 degree    rep    
    +#>  6 no degree rep    
    +#>  7 no degree dem    
    +#>  8 degree    ind    
    +#>  9 degree    rep    
    +#> 10 no degree dem    
    +#> # ℹ 490 more rows
    +
    +

    If you’re doing inference on a point estimate, you will also need to provide one of p (the true proportion of successes, between 0 and 1), mu (the true mean), med (the true median), or sigma (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write:

    +
    +
    gss %>%
    +  specify(response = hours) %>%
    +  hypothesize(null = "point", mu = 40)
    +#> Response: hours (numeric)
    +#> Null Hypothesis: point
    +#> # A tibble: 500 × 1
    +#>    hours
    +#>    <dbl>
    +#>  1    50
    +#>  2    31
    +#>  3    40
    +#>  4    40
    +#>  5    40
    +#>  6    53
    +#>  7    32
    +#>  8    20
    +#>  9    40
    +#> 10    40
    +#> # ℹ 490 more rows
    +
    +

    Again, from the front-end, the dataframe outputted from hypothesize() looks almost exactly the same as it did when it came out of specify(), but infer now “knows” your null hypothesis.

    +
    +
    +

    Generate the distribution

    +

    Once we’ve asserted our null hypothesis using hypothesize(), we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the type argument:

    +
      +
    • bootstrap: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data.
      +
    • +
    • permute: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample.
      +
    • +
    • simulate: A value will be sampled from a theoretical distribution with parameters specified in hypothesize() for each replicate. (This option is currently only applicable for testing point estimates.)
    • +
    +

    Continuing on with our example above, about the average number of hours worked a week, we might write:

    +
    +
    gss %>%
    +  specify(response = hours) %>%
    +  hypothesize(null = "point", mu = 40) %>%
    +  generate(reps = 5000, type = "bootstrap")
    +#> Response: hours (numeric)
    +#> Null Hypothesis: point
    +#> # A tibble: 2,500,000 × 2
    +#> # Groups:   replicate [5,000]
    +#>    replicate hours
    +#>        <int> <dbl>
    +#>  1         1 28.6 
    +#>  2         1  8.62
    +#>  3         1 38.6 
    +#>  4         1 18.6 
    +#>  5         1  6.62
    +#>  6         1 38.6 
    +#>  7         1 53.6 
    +#>  8         1 38.6 
    +#>  9         1 35.6 
    +#> 10         1 28.6 
    +#> # ℹ 2,499,990 more rows
    +
    +

    In the above example, we take 5000 bootstrap samples to form our null distribution.

    +

    To generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 5000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age:

    +
    +
    gss %>%
    +  specify(partyid ~ age) %>%
    +  hypothesize(null = "independence") %>%
    +  generate(reps = 5000, type = "permute")
    +#> Response: partyid (factor)
    +#> Explanatory: age (numeric)
    +#> Null Hypothesis: independence
    +#> # A tibble: 2,500,000 × 3
    +#> # Groups:   replicate [5,000]
    +#>    partyid   age replicate
    +#>    <fct>   <dbl>     <int>
    +#>  1 dem        36         1
    +#>  2 ind        34         1
    +#>  3 dem        24         1
    +#>  4 dem        42         1
    +#>  5 ind        31         1
    +#>  6 ind        32         1
    +#>  7 ind        48         1
    +#>  8 rep        36         1
    +#>  9 rep        30         1
    +#> 10 dem        33         1
    +#> # ℹ 2,499,990 more rows
    +
    +
    +
    +

    Calculate statistics

    +

    Depending on whether you’re carrying out computation-based inference or theory-based inference, you will either supply calculate() with the output of generate() or hypothesize(), respectively. The function, for one, takes in a stat argument, which is currently one of "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "t", "z", "slope", or "correlation". For example, continuing our example above to calculate the null distribution of mean hours worked per week:

    +
    +
    gss %>%
    +  specify(response = hours) %>%
    +  hypothesize(null = "point", mu = 40) %>%
    +  generate(reps = 5000, type = "bootstrap") %>%
    +  calculate(stat = "mean")
    +#> Response: hours (numeric)
    +#> Null Hypothesis: point
    +#> # A tibble: 5,000 × 2
    +#>    replicate  stat
    +#>        <int> <dbl>
    +#>  1         1  40.9
    +#>  2         2  40.3
    +#>  3         3  39.2
    +#>  4         4  39.2
    +#>  5         5  39.3
    +#>  6         6  39.4
    +#>  7         7  40.7
    +#>  8         8  41.0
    +#>  9         9  39.2
    +#> 10        10  40.2
    +#> # ℹ 4,990 more rows
    +
    +

    The output of calculate() here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you’re carrying out inference on differences in means, medians, or proportions, or \(t\) and \(z\) statistics, you will need to supply an order argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don’t, we might write:

    +
    +
    gss %>%
    +  specify(age ~ college) %>%
    +  hypothesize(null = "independence") %>%
    +  generate(reps = 5000, type = "permute") %>%
    +  calculate("diff in means", order = c("degree", "no degree"))
    +#> Response: age (numeric)
    +#> Explanatory: college (factor)
    +#> Null Hypothesis: independence
    +#> # A tibble: 5,000 × 2
    +#>    replicate   stat
    +#>        <int>  <dbl>
    +#>  1         1 -0.796
    +#>  2         2 -0.558
    +#>  3         3  0.773
    +#>  4         4 -0.390
    +#>  5         5 -2.26 
    +#>  6         6 -0.355
    +#>  7         7 -1.11 
    +#>  8         8 -0.628
    +#>  9         9  1.14 
    +#> 10        10 -0.928
    +#> # ℹ 4,990 more rows
    +
    +
    +
    +

    Other utilities

    +

    The infer package also offers several utilities to extract meaning out of summary statistics and null distributions; the package provides functions to visualize where a statistic is relative to a distribution (with visualize()), calculate p-values (with get_p_value()), and calculate confidence intervals (with get_confidence_interval()).

    +

    To illustrate, we’ll go back to the example of determining whether the mean number of hours worked per week is 40 hours.

    +
    +
    # find the point estimate
    +point_estimate <- gss %>%
    +  specify(response = hours) %>%
    +  calculate(stat = "mean")
    +
    +# generate a null distribution
    +null_dist <- gss %>%
    +  specify(response = hours) %>%
    +  hypothesize(null = "point", mu = 40) %>%
    +  generate(reps = 5000, type = "bootstrap") %>%
    +  calculate(stat = "mean")
    +
    +

    (Notice the warning: Removed 1244 rows containing missing values. This would be worth noting if you were actually carrying out this hypothesis test.)

    +

    Our point estimate 41.382 seems pretty close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn’t 40.

    +

    We could initially just visualize the null distribution.

    +
    +
    null_dist %>%
    +  visualize()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Where does our sample’s observed statistic lie on this distribution? We can use the obs_stat argument to specify this.

    +
    +
    null_dist %>%
    +  visualize() +
    +  shade_p_value(obs_stat = point_estimate, direction = "two_sided")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Notice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the + operator to apply the shade_p_value() function. This is because visualize() outputs a plot object from ggplot2 instead of a dataframe, and the + operator is needed to add the p-value layer to the plot object.) The red bar looks like it’s slightly far out on the right tail of the null distribution, so observing a sample mean of 41.382 hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though?

    +
    +
    # get a two-tailed p-value
    +p_value <- null_dist %>%
    +  get_p_value(obs_stat = point_estimate, direction = "two_sided")
    +
    +p_value
    +#> # A tibble: 1 × 1
    +#>   p_value
    +#>     <dbl>
    +#> 1  0.0416
    +
    +

    It looks like the p-value is 0.0416, which is pretty small—if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (1.382 hours) from 40 would be 0.0416. This may or may not be statistically significantly different, depending on the significance level \(\alpha\) you decided on before you ran this analysis. If you had set \(\alpha = .05\), then this difference would be statistically significant, but if you had set \(\alpha = .01\), then it would not be.

    +

    To get a confidence interval around our estimate, we can write:

    +
    +
    # start with the null distribution
    +null_dist %>%
    +  # calculate the confidence interval around the point estimate
    +  get_confidence_interval(point_estimate = point_estimate,
    +                          # at the 95% confidence level
    +                          level = .95,
    +                          # using the standard error
    +                          type = "se")
    +#> # A tibble: 1 × 2
    +#>   lower_ci upper_ci
    +#>      <dbl>    <dbl>
    +#> 1     40.1     42.7
    +
    +

    As you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level \(\alpha = .05\).

    +
    +
    +

    Theoretical methods

    +

    The infer package also provides functionality to use theoretical methods for "Chisq", "F" and "t" test statistics.

    +

    Generally, to find a null distribution using theory-based methods, use the same code that you would use to find the null distribution using randomization-based methods, but skip the generate() step. For example, if we wanted to find a null distribution for the relationship between age (age) and party identification (partyid) using randomization, we could write:

    +
    +
    null_f_distn <- gss %>%
    +   specify(age ~ partyid) %>%
    +   hypothesize(null = "independence") %>%
    +   generate(reps = 5000, type = "permute") %>%
    +   calculate(stat = "F")
    +
    +

    To find the null distribution using theory-based methods, instead, skip the generate() step entirely:

    +
    +
    null_f_distn_theoretical <- gss %>%
    +   specify(age ~ partyid) %>%
    +   hypothesize(null = "independence") %>%
    +   calculate(stat = "F")
    +
    +

    We’ll calculate the observed statistic to make use of in the following visualizations; this procedure is the same, regardless of the methods used to find the null distribution.

    +
    +
    F_hat <- gss %>% 
    +  specify(age ~ partyid) %>%
    +  calculate(stat = "F")
    +
    +

    Now, instead of just piping the null distribution into visualize(), as we would do if we wanted to visualize the randomization-based null distribution, we also need to provide method = "theoretical" to visualize().

    +
    +
    visualize(null_f_distn_theoretical, method = "theoretical") +
    +  shade_p_value(obs_stat = F_hat, direction = "greater")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    To get a sense of how the theory-based and randomization-based null distributions relate, we can pipe the randomization-based null distribution into visualize() and also specify method = "both"

    +
    +
    visualize(null_f_distn, method = "both") +
    +  shade_p_value(obs_stat = F_hat, direction = "greater")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    That’s it! This vignette covers most all of the key functionality of infer. See help(package = "infer") for a full list of functions and vignettes.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/statistics/k-means/figs/unnamed-chunk-11-1.svg b/docs/learn/statistics/k-means/figs/unnamed-chunk-11-1.svg new file mode 100644 index 00000000..1c39d9e5 --- /dev/null +++ b/docs/learn/statistics/k-means/figs/unnamed-chunk-11-1.svg @@ -0,0 +1,3108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +7 + + + + + + + + + + +8 + + + + + + + + + + +9 + + + + + + + + + + +4 + + + + + + + + + + +5 + + + + + + + + + + +6 + + + + + + + + + + +1 + + + + + + + + + + +2 + + + + + + + + + + +3 + + + + + +-5 +0 +5 + + + +-5 +0 +5 + + + +-5 +0 +5 +-2.5 +0.0 +2.5 + + + +-2.5 +0.0 +2.5 + + + +-2.5 +0.0 +2.5 + + + +x1 +x2 + +.cluster + + + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 +6 +7 +8 +9 + + diff --git a/content/learn/statistics/k-means/figs/unnamed-chunk-9-1.svg b/docs/learn/statistics/k-means/figs/unnamed-chunk-12-1.svg similarity index 100% rename from content/learn/statistics/k-means/figs/unnamed-chunk-9-1.svg rename to docs/learn/statistics/k-means/figs/unnamed-chunk-12-1.svg diff --git a/content/learn/statistics/k-means/figs/unnamed-chunk-10-1.svg b/docs/learn/statistics/k-means/figs/unnamed-chunk-13-1.svg similarity index 100% rename from content/learn/statistics/k-means/figs/unnamed-chunk-10-1.svg rename to docs/learn/statistics/k-means/figs/unnamed-chunk-13-1.svg diff --git a/content/learn/statistics/k-means/figs/unnamed-chunk-1-1.svg b/docs/learn/statistics/k-means/figs/unnamed-chunk-3-1.svg similarity index 100% rename from content/learn/statistics/k-means/figs/unnamed-chunk-1-1.svg rename to docs/learn/statistics/k-means/figs/unnamed-chunk-3-1.svg diff --git a/docs/learn/statistics/k-means/index.html b/docs/learn/statistics/k-means/index.html new file mode 100644 index 00000000..0c68041d --- /dev/null +++ b/docs/learn/statistics/k-means/index.html @@ -0,0 +1,948 @@ + + + + + + + + + + +tidymodels - K-means clustering with tidy data principles + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    K-means clustering with tidy data principles

    +
    +
    statistical analysis
    +
    clustering
    +
    tidying results
    +
    +
    + +
    +
    +

    Summarize clustering characteristics and estimate the best number of clusters for a data set.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    This article only requires the tidymodels package.

    +

    K-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions:

    +
      +
    • tidy()
    • +
    • augment()
    • +
    • glance()
    • +
    +

    Let’s start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster:

    +
    +
    library(tidymodels)
    +
    +set.seed(27)
    +
    +centers <- tibble(
    +  cluster = factor(1:3), 
    +  num_points = c(100, 150, 50),  # number points in each cluster
    +  x1 = c(5, 0, -3),              # x1 coordinate of cluster center
    +  x2 = c(-1, 1, -2)              # x2 coordinate of cluster center
    +)
    +
    +labelled_points <- 
    +  centers %>%
    +  mutate(
    +    x1 = map2(num_points, x1, rnorm),
    +    x2 = map2(num_points, x2, rnorm)
    +  ) %>% 
    +  select(-num_points) %>% 
    +  unnest(cols = c(x1, x2))
    +
    +ggplot(labelled_points, aes(x1, x2, color = cluster)) +
    +  geom_point(alpha = 0.3)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    This is an ideal case for k-means clustering.

    +
    +
    +

    How does K-means work?

    +

    Rather than using equations, this short animation using the artwork of Allison Horst explains the clustering process:

    +
    +
    +
    +

    +
    +
    +
    +
    +
    +

    Clustering in R

    +

    We’ll use the built-in kmeans() function, which accepts a data frame with all numeric columns as it’s primary argument.

    +
    +
    points <- 
    +  labelled_points %>% 
    +  select(-cluster)
    +
    +kclust <- kmeans(points, centers = 3)
    +kclust
    +#> K-means clustering with 3 clusters of sizes 148, 51, 101
    +#> 
    +#> Cluster means:
    +#>            x1        x2
    +#> 1  0.08853475  1.045461
    +#> 2 -3.14292460 -2.000043
    +#> 3  5.00401249 -1.045811
    +#> 
    +#> Clustering vector:
    +#>   [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
    +#>  [38] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
    +#>  [75] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1
    +#> [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    +#> [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    +#> [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
    +#> [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2
    +#> [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
    +#> [297] 2 2 2 2
    +#> 
    +#> Within cluster sum of squares by cluster:
    +#> [1] 298.9415 108.8112 243.2092
    +#>  (between_SS / total_SS =  82.5 %)
    +#> 
    +#> Available components:
    +#> 
    +#> [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
    +#> [6] "betweenss"    "size"         "iter"         "ifault"
    +summary(kclust)
    +#>              Length Class  Mode   
    +#> cluster      300    -none- numeric
    +#> centers        6    -none- numeric
    +#> totss          1    -none- numeric
    +#> withinss       3    -none- numeric
    +#> tot.withinss   1    -none- numeric
    +#> betweenss      1    -none- numeric
    +#> size           3    -none- numeric
    +#> iter           1    -none- numeric
    +#> ifault         1    -none- numeric
    +
    +

    The output is a list of vectors, where each component has a different length. There’s one of length 300, the same as our original data set. There are two elements of length 3 (withinss and tot.withinss) and centers is a matrix with 3 rows. And then there are the elements of length 1: totss, tot.withinss, betweenss, and iter. (The value ifault indicates possible algorithm problems.)

    +

    These differing lengths have important meaning when we want to tidy our data set; they signify that each type of component communicates a different kind of information.

    +
      +
    • cluster (300 values) contains information about each point
    • +
    • centers, withinss, and size (3 values) contain information about each cluster
    • +
    • totss, tot.withinss, betweenss, and iter (1 value) contain information about the full clustering
    • +
    +

    Which of these do we want to extract? There is no right answer; each of them may be interesting to an analyst. Because they communicate entirely different information (not to mention there’s no straightforward way to combine them), they are extracted by separate functions. augment adds the point classifications to the original data set:

    +
    +
    augment(kclust, points)
    +#> # A tibble: 300 × 3
    +#>       x1     x2 .cluster
    +#>    <dbl>  <dbl> <fct>   
    +#>  1  6.91 -2.74  3       
    +#>  2  6.14 -2.45  3       
    +#>  3  4.24 -0.946 3       
    +#>  4  3.54  0.287 3       
    +#>  5  3.91  0.408 3       
    +#>  6  5.30 -1.58  3       
    +#>  7  5.01 -1.77  3       
    +#>  8  6.16 -1.68  3       
    +#>  9  7.13 -2.17  3       
    +#> 10  5.24 -2.42  3       
    +#> # ℹ 290 more rows
    +
    +

    The tidy() function summarizes on a per-cluster level:

    +
    +
    tidy(kclust)
    +#> # A tibble: 3 × 5
    +#>        x1    x2  size withinss cluster
    +#>     <dbl> <dbl> <int>    <dbl> <fct>  
    +#> 1  0.0885  1.05   148     299. 1      
    +#> 2 -3.14   -2.00    51     109. 2      
    +#> 3  5.00   -1.05   101     243. 3
    +
    +

    And as it always does, the glance() function extracts a single-row summary:

    +
    +
    glance(kclust)
    +#> # A tibble: 1 × 4
    +#>   totss tot.withinss betweenss  iter
    +#>   <dbl>        <dbl>     <dbl> <int>
    +#> 1 3724.         651.     3073.     2
    +
    +
    +
    +

    Exploratory clustering

    +

    While these summaries are useful, they would not have been too difficult to extract out from the data set yourself. The real power comes from combining these analyses with other tools like dplyr.

    +

    Let’s say we want to explore the effect of different choices of k, from 1 to 9, on this clustering. First cluster the data 9 times, each using a different value of k, then create columns containing the tidied, glanced and augmented data:

    +
    +
    kclusts <- 
    +  tibble(k = 1:9) %>%
    +  mutate(
    +    kclust = map(k, ~kmeans(points, .x)),
    +    tidied = map(kclust, tidy),
    +    glanced = map(kclust, glance),
    +    augmented = map(kclust, augment, points)
    +  )
    +
    +kclusts
    +#> # A tibble: 9 × 5
    +#>       k kclust   tidied           glanced          augmented         
    +#>   <int> <list>   <list>           <list>           <list>            
    +#> 1     1 <kmeans> <tibble [1 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +#> 2     2 <kmeans> <tibble [2 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +#> 3     3 <kmeans> <tibble [3 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +#> 4     4 <kmeans> <tibble [4 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +#> 5     5 <kmeans> <tibble [5 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +#> 6     6 <kmeans> <tibble [6 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +#> 7     7 <kmeans> <tibble [7 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +#> 8     8 <kmeans> <tibble [8 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +#> 9     9 <kmeans> <tibble [9 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>
    +
    +

    We can turn these into three separate data sets each representing a different type of data: using tidy(), using augment(), and using glance(). Each of these goes into a separate data set as they represent different types of data.

    +
    +
    clusters <- 
    +  kclusts %>%
    +  unnest(cols = c(tidied))
    +
    +assignments <- 
    +  kclusts %>% 
    +  unnest(cols = c(augmented))
    +
    +clusterings <- 
    +  kclusts %>%
    +  unnest(cols = c(glanced))
    +
    +

    Now we can plot the original points using the data from augment(), with each point colored according to the predicted cluster.

    +
    +
    p1 <- 
    +  ggplot(assignments, aes(x = x1, y = x2)) +
    +  geom_point(aes(color = .cluster), alpha = 0.8) + 
    +  facet_wrap(~ k)
    +p1
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Already we get a good sense of the proper number of clusters (3), and how the k-means algorithm functions when k is too high or too low. We can then add the centers of the cluster using the data from tidy():

    +
    +
    p2 <- p1 + geom_point(data = clusters, size = 10, shape = "x")
    +p2
    +
    +
    +
    +

    +
    +
    +
    +
    +

    The data from glance() fills a different but equally important purpose; it lets us view trends of some summary statistics across values of k. Of particular interest is the total within sum of squares, saved in the tot.withinss column.

    +
    +
    ggplot(clusterings, aes(k, tot.withinss)) +
    +  geom_line() +
    +  geom_point()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    This represents the variance within the clusters. It decreases as k increases, but notice a bend (or “elbow”) around k = 3. This bend indicates that additional clusters beyond the third have little value. (See here for a more mathematically rigorous interpretation and implementation of this method). Thus, all three methods of tidying data provided by broom are useful for summarizing clustering output.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/statistics/k-means/kmeans.gif b/docs/learn/statistics/k-means/kmeans.gif similarity index 100% rename from content/learn/statistics/k-means/kmeans.gif rename to docs/learn/statistics/k-means/kmeans.gif diff --git a/content/learn/statistics/tidy-analysis/figs/unnamed-chunk-2-1.svg b/docs/learn/statistics/tidy-analysis/figs/unnamed-chunk-4-1.svg similarity index 100% rename from content/learn/statistics/tidy-analysis/figs/unnamed-chunk-2-1.svg rename to docs/learn/statistics/tidy-analysis/figs/unnamed-chunk-4-1.svg diff --git a/docs/learn/statistics/tidy-analysis/index.html b/docs/learn/statistics/tidy-analysis/index.html new file mode 100644 index 00000000..6a6d01f7 --- /dev/null +++ b/docs/learn/statistics/tidy-analysis/index.html @@ -0,0 +1,1019 @@ + + + + + + + + + + +tidymodels - Correlation and regression fundamentals with tidy data principles + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Correlation and regression fundamentals with tidy data principles

    +
    +
    statistical analysis
    +
    correlation
    +
    tidying results
    +
    +
    + +
    +
    +

    Analyze the results of correlation tests and simple regression models for many data sets at once.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    This article only requires the tidymodels package.

    +

    While the tidymodels package broom is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the nest()/unnest() functions from tidyr and the map() function in purrr.

    +
    +
    +

    Correlation analysis

    +

    Let’s demonstrate this with a simple data set, the built-in Orange. We start by coercing Orange to a tibble. This gives a nicer print method that will be especially useful later on when we start working with list-columns.

    +
    +
    library(tidymodels)
    +
    +data(Orange)
    +
    +Orange <- as_tibble(Orange)
    +Orange
    +#> # A tibble: 35 × 3
    +#>    Tree    age circumference
    +#>    <ord> <dbl>         <dbl>
    +#>  1 1       118            30
    +#>  2 1       484            58
    +#>  3 1       664            87
    +#>  4 1      1004           115
    +#>  5 1      1231           120
    +#>  6 1      1372           142
    +#>  7 1      1582           145
    +#>  8 2       118            33
    +#>  9 2       484            69
    +#> 10 2       664           111
    +#> # ℹ 25 more rows
    +
    +

    This contains 35 observations of three variables: Tree, age, and circumference. Tree is a factor with five levels describing five trees. As might be expected, age and circumference are correlated:

    +
    +
    cor(Orange$age, Orange$circumference)
    +#> [1] 0.9135189
    +
    +library(ggplot2)
    +
    +ggplot(Orange, aes(age, circumference, color = Tree)) +
    +  geom_line()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Suppose you want to test for correlations individually within each tree. You can do this with dplyr’s group_by:

    +
    +
    Orange %>% 
    +  group_by(Tree) %>%
    +  summarize(correlation = cor(age, circumference))
    +#> # A tibble: 5 × 2
    +#>   Tree  correlation
    +#>   <ord>       <dbl>
    +#> 1 3           0.988
    +#> 2 1           0.985
    +#> 3 5           0.988
    +#> 4 2           0.987
    +#> 5 4           0.984
    +
    +

    (Note that the correlations are much higher than the aggregated one, and also we can now see the correlation is similar across trees).

    +

    Suppose that instead of simply estimating a correlation, we want to perform a hypothesis test with cor.test():

    +
    +
    ct <- cor.test(Orange$age, Orange$circumference)
    +ct
    +#> 
    +#>  Pearson's product-moment correlation
    +#> 
    +#> data:  Orange$age and Orange$circumference
    +#> t = 12.9, df = 33, p-value = 1.931e-14
    +#> alternative hypothesis: true correlation is not equal to 0
    +#> 95 percent confidence interval:
    +#>  0.8342364 0.9557955
    +#> sample estimates:
    +#>       cor 
    +#> 0.9135189
    +
    +

    This test output contains multiple values we may be interested in. Some are vectors of length 1, such as the p-value and the estimate, and some are longer, such as the confidence interval. We can get this into a nicely organized tibble using the tidy() function:

    +
    +
    tidy(ct)
    +#> # A tibble: 1 × 8
    +#>   estimate statistic  p.value parameter conf.low conf.high method    alternative
    +#>      <dbl>     <dbl>    <dbl>     <int>    <dbl>     <dbl> <chr>     <chr>      
    +#> 1    0.914      12.9 1.93e-14        33    0.834     0.956 Pearson'… two.sided
    +
    +

    Often, we want to perform multiple tests or fit multiple models, each on a different part of the data. In this case, we recommend a nest-map-unnest workflow. For example, suppose we want to perform correlation tests for each different tree. We start by nesting our data based on the group of interest:

    +
    +
    nested <- 
    +  Orange %>% 
    +  nest(data = c(age, circumference))
    +
    +

    Then we perform a correlation test for each nested tibble using purrr::map():

    +
    +
    nested %>% 
    +  mutate(test = map(data, ~ cor.test(.x$age, .x$circumference)))
    +#> # A tibble: 5 × 3
    +#>   Tree  data             test   
    +#>   <ord> <list>           <list> 
    +#> 1 1     <tibble [7 × 2]> <htest>
    +#> 2 2     <tibble [7 × 2]> <htest>
    +#> 3 3     <tibble [7 × 2]> <htest>
    +#> 4 4     <tibble [7 × 2]> <htest>
    +#> 5 5     <tibble [7 × 2]> <htest>
    +
    +

    This results in a list-column of S3 objects. We want to tidy each of the objects, which we can also do with map().

    +
    +
    nested %>% 
    +  mutate(
    +    test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col
    +    tidied = map(test, tidy)
    +  ) 
    +#> # A tibble: 5 × 4
    +#>   Tree  data             test    tidied          
    +#>   <ord> <list>           <list>  <list>          
    +#> 1 1     <tibble [7 × 2]> <htest> <tibble [1 × 8]>
    +#> 2 2     <tibble [7 × 2]> <htest> <tibble [1 × 8]>
    +#> 3 3     <tibble [7 × 2]> <htest> <tibble [1 × 8]>
    +#> 4 4     <tibble [7 × 2]> <htest> <tibble [1 × 8]>
    +#> 5 5     <tibble [7 × 2]> <htest> <tibble [1 × 8]>
    +
    +

    Finally, we want to unnest the tidied data frames so we can see the results in a flat tibble. All together, this looks like:

    +
    +
    Orange %>% 
    +  nest(data = c(age, circumference)) %>% 
    +  mutate(
    +    test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col
    +    tidied = map(test, tidy)
    +  ) %>% 
    +  unnest(cols = tidied) %>% 
    +  select(-data, -test)
    +#> # A tibble: 5 × 9
    +#>   Tree  estimate statistic   p.value parameter conf.low conf.high method        
    +#>   <ord>    <dbl>     <dbl>     <dbl>     <int>    <dbl>     <dbl> <chr>         
    +#> 1 1        0.985      13.0 0.0000485         5    0.901     0.998 Pearson's pro…
    +#> 2 2        0.987      13.9 0.0000343         5    0.914     0.998 Pearson's pro…
    +#> 3 3        0.988      14.4 0.0000290         5    0.919     0.998 Pearson's pro…
    +#> 4 4        0.984      12.5 0.0000573         5    0.895     0.998 Pearson's pro…
    +#> 5 5        0.988      14.1 0.0000318         5    0.916     0.998 Pearson's pro…
    +#> # ℹ 1 more variable: alternative <chr>
    +
    +
    +
    +

    Regression models

    +

    This type of workflow becomes even more useful when applied to regressions. Untidy output for a regression looks like:

    +
    +
    lm_fit <- lm(age ~ circumference, data = Orange)
    +summary(lm_fit)
    +#> 
    +#> Call:
    +#> lm(formula = age ~ circumference, data = Orange)
    +#> 
    +#> Residuals:
    +#>     Min      1Q  Median      3Q     Max 
    +#> -317.88 -140.90  -17.20   96.54  471.16 
    +#> 
    +#> Coefficients:
    +#>               Estimate Std. Error t value Pr(>|t|)    
    +#> (Intercept)    16.6036    78.1406   0.212    0.833    
    +#> circumference   7.8160     0.6059  12.900 1.93e-14 ***
    +#> ---
    +#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    +#> 
    +#> Residual standard error: 203.1 on 33 degrees of freedom
    +#> Multiple R-squared:  0.8345, Adjusted R-squared:  0.8295 
    +#> F-statistic: 166.4 on 1 and 33 DF,  p-value: 1.931e-14
    +
    +

    When we tidy these results, we get multiple rows of output for each model:

    +
    +
    tidy(lm_fit)
    +#> # A tibble: 2 × 5
    +#>   term          estimate std.error statistic  p.value
    +#>   <chr>            <dbl>     <dbl>     <dbl>    <dbl>
    +#> 1 (Intercept)      16.6     78.1       0.212 8.33e- 1
    +#> 2 circumference     7.82     0.606    12.9   1.93e-14
    +
    +

    Now we can handle multiple regressions at once using exactly the same workflow as before:

    +
    +
    Orange %>%
    +  nest(data = c(-Tree)) %>% 
    +  mutate(
    +    fit = map(data, ~ lm(age ~ circumference, data = .x)),
    +    tidied = map(fit, tidy)
    +  ) %>% 
    +  unnest(tidied) %>% 
    +  select(-data, -fit)
    +#> # A tibble: 10 × 6
    +#>    Tree  term          estimate std.error statistic   p.value
    +#>    <ord> <chr>            <dbl>     <dbl>     <dbl>     <dbl>
    +#>  1 1     (Intercept)    -265.      98.6      -2.68  0.0436   
    +#>  2 1     circumference    11.9      0.919    13.0   0.0000485
    +#>  3 2     (Intercept)    -132.      83.1      -1.59  0.172    
    +#>  4 2     circumference     7.80     0.560    13.9   0.0000343
    +#>  5 3     (Intercept)    -210.      85.3      -2.46  0.0574   
    +#>  6 3     circumference    12.0      0.835    14.4   0.0000290
    +#>  7 4     (Intercept)     -76.5     88.3      -0.867 0.426    
    +#>  8 4     circumference     7.17     0.572    12.5   0.0000573
    +#>  9 5     (Intercept)     -54.5     76.9      -0.709 0.510    
    +#> 10 5     circumference     8.79     0.621    14.1   0.0000318
    +
    +

    You can just as easily use multiple predictors in the regressions, as shown here on the mtcars dataset. We nest the data into automatic vs. manual cars (the am column), then perform the regression within each nested tibble.

    +
    +
    data(mtcars)
    +mtcars <- as_tibble(mtcars)  # to play nicely with list-cols
    +mtcars
    +#> # A tibble: 32 × 11
    +#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
    +#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
    +#>  1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
    +#>  2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
    +#>  3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
    +#>  4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
    +#>  5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
    +#>  6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
    +#>  7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
    +#>  8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2
    +#>  9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
    +#> 10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
    +#> # ℹ 22 more rows
    +
    +mtcars %>%
    +  nest(data = c(-am)) %>% 
    +  mutate(
    +    fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)),  # S3 list-col
    +    tidied = map(fit, tidy)
    +  ) %>% 
    +  unnest(tidied) %>% 
    +  select(-data, -fit)
    +#> # A tibble: 8 × 6
    +#>      am term        estimate std.error statistic  p.value
    +#>   <dbl> <chr>          <dbl>     <dbl>     <dbl>    <dbl>
    +#> 1     1 (Intercept)   4.28      3.46      1.24   0.247   
    +#> 2     1 mpg          -0.101     0.0294   -3.43   0.00750 
    +#> 3     1 qsec          0.0398    0.151     0.264  0.798   
    +#> 4     1 gear         -0.0229    0.349    -0.0656 0.949   
    +#> 5     0 (Intercept)   4.92      1.40      3.52   0.00309 
    +#> 6     0 mpg          -0.192     0.0443   -4.33   0.000591
    +#> 7     0 qsec          0.0919    0.0983    0.935  0.365   
    +#> 8     0 gear          0.147     0.368     0.398  0.696
    +
    +

    What if you want not just the tidy() output, but the augment() and glance() outputs as well, while still performing each regression only once? Since we’re using list-columns, we can just fit the model once and use multiple list-columns to store the tidied, glanced and augmented outputs.

    +
    +
    regressions <- 
    +  mtcars %>%
    +  nest(data = c(-am)) %>% 
    +  mutate(
    +    fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)),
    +    tidied = map(fit, tidy),
    +    glanced = map(fit, glance),
    +    augmented = map(fit, augment)
    +  )
    +
    +regressions %>% 
    +  select(tidied) %>% 
    +  unnest(tidied)
    +#> # A tibble: 8 × 5
    +#>   term        estimate std.error statistic  p.value
    +#>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
    +#> 1 (Intercept)   4.28      3.46      1.24   0.247   
    +#> 2 mpg          -0.101     0.0294   -3.43   0.00750 
    +#> 3 qsec          0.0398    0.151     0.264  0.798   
    +#> 4 gear         -0.0229    0.349    -0.0656 0.949   
    +#> 5 (Intercept)   4.92      1.40      3.52   0.00309 
    +#> 6 mpg          -0.192     0.0443   -4.33   0.000591
    +#> 7 qsec          0.0919    0.0983    0.935  0.365   
    +#> 8 gear          0.147     0.368     0.398  0.696
    +
    +regressions %>% 
    +  select(glanced) %>% 
    +  unnest(glanced)
    +#> # A tibble: 2 × 12
    +#>   r.squared adj.r.squared sigma statistic  p.value    df    logLik   AIC   BIC
    +#>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <dbl>     <dbl> <dbl> <dbl>
    +#> 1     0.833         0.778 0.291     15.0  0.000759     3  -0.00580  10.0  12.8
    +#> 2     0.625         0.550 0.522      8.32 0.00170      3 -12.4      34.7  39.4
    +#> # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
    +
    +regressions %>% 
    +  select(augmented) %>% 
    +  unnest(augmented)
    +#> # A tibble: 32 × 10
    +#>       wt   mpg  qsec  gear .fitted  .resid  .hat .sigma  .cooksd .std.resid
    +#>    <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl> <dbl>  <dbl>    <dbl>      <dbl>
    +#>  1  2.62  21    16.5     4    2.73 -0.107  0.517  0.304 0.0744      -0.527 
    +#>  2  2.88  21    17.0     4    2.75  0.126  0.273  0.304 0.0243       0.509 
    +#>  3  2.32  22.8  18.6     4    2.63 -0.310  0.312  0.279 0.188       -1.29  
    +#>  4  2.2   32.4  19.5     4    1.70  0.505  0.223  0.233 0.278        1.97  
    +#>  5  1.62  30.4  18.5     4    1.86 -0.244  0.269  0.292 0.0889      -0.982 
    +#>  6  1.84  33.9  19.9     4    1.56  0.274  0.286  0.286 0.125        1.12  
    +#>  7  1.94  27.3  18.9     4    2.19 -0.253  0.151  0.293 0.0394      -0.942 
    +#>  8  2.14  26    16.7     5    2.21 -0.0683 0.277  0.307 0.00732     -0.276 
    +#>  9  1.51  30.4  16.9     5    1.77 -0.259  0.430  0.284 0.263       -1.18  
    +#> 10  3.17  15.8  14.5     5    3.15  0.0193 0.292  0.308 0.000644     0.0789
    +#> # ℹ 22 more rows
    +
    +

    By combining the estimates and p-values across all groups into the same tidy data frame (instead of a list of output model objects), a new class of analyses and visualizations becomes straightforward. This includes:

    +
      +
    • sorting by p-value or estimate to find the most significant terms across all tests,
    • +
    • p-value histograms, and
    • +
    • volcano plots comparing p-values to effect size estimates.
    • +
    +

    In each of these cases, we can easily filter, facet, or distinguish based on the term column. In short, this makes the tools of tidy data analysis available for the results of data analysis and models, not just the inputs.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/statistics/xtabs/figs/plot-indep-1.svg b/docs/learn/statistics/xtabs/figs/plot-indep-1.svg similarity index 100% rename from content/learn/statistics/xtabs/figs/plot-indep-1.svg rename to docs/learn/statistics/xtabs/figs/plot-indep-1.svg diff --git a/docs/learn/statistics/xtabs/figs/visualize-indep-1.svg b/docs/learn/statistics/xtabs/figs/visualize-indep-1.svg new file mode 100644 index 00000000..5dcc7b01 --- /dev/null +++ b/docs/learn/statistics/xtabs/figs/visualize-indep-1.svg @@ -0,0 +1,5092 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +500 +1000 + + + + + + + + + +0 +5 +10 +15 +20 +25 +stat +count +Simulation-Based Null Distribution + + diff --git a/docs/learn/statistics/xtabs/figs/visualize-indep-both-1.svg b/docs/learn/statistics/xtabs/figs/visualize-indep-both-1.svg new file mode 100644 index 00000000..61beecec --- /dev/null +++ b/docs/learn/statistics/xtabs/figs/visualize-indep-both-1.svg @@ -0,0 +1,5096 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.05 +0.10 +0.15 + + + + + + + + + + +0 +5 +10 +15 +20 +25 +Chi-Square stat +density +Simulation-Based and Theoretical Chi-Square Null Distributions + + diff --git a/docs/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg b/docs/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg new file mode 100644 index 00000000..7d7d7482 --- /dev/null +++ b/docs/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg @@ -0,0 +1,5081 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +500 +1000 + + + + + + +0 +10 +20 +stat +count +Simulation-Based Null Distribution + + diff --git a/content/learn/statistics/xtabs/figs/visualize-indep-theor-1.svg b/docs/learn/statistics/xtabs/figs/visualize-indep-theor-1.svg similarity index 100% rename from content/learn/statistics/xtabs/figs/visualize-indep-theor-1.svg rename to docs/learn/statistics/xtabs/figs/visualize-indep-theor-1.svg diff --git a/docs/learn/statistics/xtabs/index.html b/docs/learn/statistics/xtabs/index.html new file mode 100644 index 00000000..d69ea5df --- /dev/null +++ b/docs/learn/statistics/xtabs/index.html @@ -0,0 +1,933 @@ + + + + + + + + + + +tidymodels - Statistical analysis of contingency tables + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Statistical analysis of contingency tables

    +
    +
    statistical analysis
    +
    analysis of tables
    +
    hypothesis testing
    +
    +
    + +
    +
    +

    Use tests of independence and goodness of fit to analyze tables of counts.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    This article only requires that you have the tidymodels package installed.

    +

    In this vignette, we’ll walk through conducting a \(\chi^2\) (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We’ll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we’ll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution.

    +

    Throughout this vignette, we’ll make use of the ad_data data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from Craig-Schapiro et al (2011). See ?ad_data for more information on the variables included and their source. One of the main research questions in these data were how a person’s genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows:

    +
    +
    library(tidymodels) # Includes the infer package
    +
    +data(ad_data, package = "modeldata")
    +ad_data %>%
    +  select(Genotype, Class)
    +#> # A tibble: 333 × 2
    +#>    Genotype Class   
    +#>    <fct>    <fct>   
    +#>  1 E3E3     Control 
    +#>  2 E3E4     Control 
    +#>  3 E3E4     Control 
    +#>  4 E3E4     Control 
    +#>  5 E3E3     Control 
    +#>  6 E4E4     Impaired
    +#>  7 E2E3     Control 
    +#>  8 E2E3     Control 
    +#>  9 E3E3     Control 
    +#> 10 E2E3     Impaired
    +#> # ℹ 323 more rows
    +
    +

    The three main genetic variants are called E2, E3, and E4. The values in Genotype represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of “E2E4” means E2 from one parent and E4 from the other).

    +
    +
    +

    Test of independence

    +

    To carry out a chi-squared test of independence, we’ll examine the association between their cognitive ability (impaired and healthy) and the genetic makeup. This is what the relationship looks like in the sample data:

    +
    +
    +
    +
    +

    +
    +
    +
    +
    +

    If there were no relationship, we would expect to see the purple bars reaching to the same length, regardless of cognitive ability. Are the differences we see here, though, just due to random noise?

    +

    First, to calculate the observed statistic, we can use specify() and calculate().

    +
    +
    # calculate the observed statistic
    +observed_indep_statistic <- ad_data %>%
    +  specify(Genotype ~ Class) %>%
    +  calculate(stat = "Chisq")
    +
    +

    The observed \(\chi^2\) statistic is 21.5774809. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between cognitive ability and genetics.

    +

    We can generate() the null distribution in one of two ways: using randomization or theory-based methods. The randomization approach permutes the response and explanatory variables, so that each person’s genetics is matched up with a random cognitive rating from the sample in order to break up any association between the two.

    +
    +
    # generate the null distribution using randomization
    +null_distribution_simulated <- ad_data %>%
    +  specify(Genotype ~ Class) %>%
    +  hypothesize(null = "independence") %>%
    +  generate(reps = 5000, type = "permute") %>%
    +  calculate(stat = "Chisq")
    +
    +

    Note that, in the line specify(Genotype ~ Class) above, we could use the equivalent syntax specify(response = Genotype, explanatory = Class). The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization.

    +
    +
    # generate the null distribution by theoretical approximation
    +null_distribution_theoretical <- ad_data %>%
    +  specify(Genotype ~ Class) %>%
    +  hypothesize(null = "independence") %>%
    +  # note that we skip the generation step here!
    +  calculate(stat = "Chisq")
    +
    +

    To get a sense for what these distributions look like, and where our observed statistic falls, we can use visualize():

    +
    +
    # visualize the null distribution and test statistic!
    +null_distribution_simulated %>%
    +  visualize() + 
    +  shade_p_value(observed_indep_statistic,
    +                direction = "greater")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    We could also visualize the observed statistic against the theoretical null distribution. Note that we skip the generate() and calculate() steps when using the theoretical approach, and that we now need to provide method = "theoretical" to visualize().

    +
    +
    # visualize the theoretical null distribution and test statistic!
    +ad_data %>%
    +  specify(Genotype ~ Class) %>%
    +  hypothesize(null = "independence") %>%
    +  visualize(method = "theoretical") + 
    +  shade_p_value(observed_indep_statistic,
    +                direction = "greater")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    To visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into visualize(), and further provide method = "both".

    +
    +
    # visualize both null distributions and the test statistic!
    +null_distribution_simulated %>%
    +  visualize(method = "both") + 
    +  shade_p_value(observed_indep_statistic,
    +                direction = "greater")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Either way, it looks like our observed test statistic would be fairly unlikely if there were actually no association between cognition and genotype. More exactly, we can calculate the p-value:

    +
    +
    # calculate the p value from the observed statistic and null distribution
    +p_value_independence <- null_distribution_simulated %>%
    +  get_p_value(obs_stat = observed_indep_statistic,
    +              direction = "greater")
    +
    +p_value_independence
    +#> # A tibble: 1 × 1
    +#>   p_value
    +#>     <dbl>
    +#> 1  0.0008
    +
    +

    Thus, if there were really no relationship between cognition and genotype, the probability that we would see a statistic as or more extreme than 21.5774809 is approximately 8^{-4}.

    +

    Note that, equivalently to the steps shown above, the package supplies a wrapper function, chisq_test, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this:

    +
    +
    chisq_test(ad_data, Genotype ~ Class)
    +#> # A tibble: 1 × 3
    +#>   statistic chisq_df  p_value
    +#>       <dbl>    <int>    <dbl>
    +#> 1      21.6        5 0.000630
    +
    +
    +
    +

    Goodness of fit

    +

    Now, moving on to a chi-squared goodness of fit test, we’ll take a look at just the genotype data. Many papers have investigated the relationship of Apolipoprotein E to diseases. For example, Song et al (2004) conducted a meta-analysis of numerous studies that looked at this gene and heart disease. In their paper, they describe the frequency of the different genotypes across many samples. For the cognition study, it might be interesting to see if our sample of genotypes was consistent with this literature (treating the rates, for this analysis, as known).

    +

    The rates of the meta-analysis and our observed data are:

    +
    +
    # Song, Y., Stampfer, M. J., & Liu, S. (2004). Meta-Analysis: Apolipoprotein E 
    +# Genotypes and Risk for Coronary Heart Disease. Annals of Internal Medicine, 
    +# 141(2), 137.
    +meta_rates <- c("E2E2" = 0.71, "E2E3" = 11.4, "E2E4" = 2.32,
    +                "E3E3" = 61.0, "E3E4" = 22.6, "E4E4" = 2.22)
    +meta_rates <- meta_rates/sum(meta_rates) # these add up to slightly > 100%
    +
    +obs_rates <- table(ad_data$Genotype)/nrow(ad_data)
    +round(cbind(obs_rates, meta_rates) * 100, 2)
    +#>      obs_rates meta_rates
    +#> E2E2      0.60       0.71
    +#> E2E3     11.11      11.37
    +#> E2E4      2.40       2.31
    +#> E3E3     50.15      60.85
    +#> E3E4     31.83      22.54
    +#> E4E4      3.90       2.21
    +
    +

    Suppose our null hypothesis is that Genotype follows the same frequency distribution as the meta-analysis. Lets now test whether this difference in distributions is statistically significant.

    +

    First, to carry out this hypothesis test, we would calculate our observed statistic.

    +
    +
    # calculating the null distribution
    +observed_gof_statistic <- ad_data %>%
    +  specify(response = Genotype) %>%
    +  hypothesize(null = "point", p = meta_rates) %>%
    +  calculate(stat = "Chisq")
    +
    +

    The observed statistic is 23.3838483. Now, generating a null distribution, by just dropping in a call to generate():

    +
    +
    # generating a null distribution
    +null_distribution_gof <- ad_data %>%
    +  specify(response = Genotype) %>%
    +  hypothesize(null = "point", p = meta_rates) %>%
    +  generate(reps = 5000, type = "simulate") %>%
    +  calculate(stat = "Chisq")
    +
    +

    Again, to get a sense for what these distributions look like, and where our observed statistic falls, we can use visualize():

    +
    +
    # visualize the null distribution and test statistic!
    +null_distribution_gof %>%
    +  visualize() + 
    +  shade_p_value(observed_gof_statistic,
    +                direction = "greater")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    This statistic seems like it would be unlikely if our rates were the same as the rates from the meta-analysis! How unlikely, though? Calculating the p-value:

    +
    +
    # calculate the p-value
    +p_value_gof <- null_distribution_gof %>%
    +  get_p_value(observed_gof_statistic,
    +              direction = "greater")
    +
    +p_value_gof
    +#> # A tibble: 1 × 1
    +#>   p_value
    +#>     <dbl>
    +#> 1  0.0008
    +
    +

    Thus, if each genotype occurred at the same rate as the Song paper, the probability that we would see a distribution like the one we did is approximately 8^{-4}.

    +

    Again, equivalently to the steps shown above, the package supplies a wrapper function, chisq_test, to carry out chi-squared goodness of fit tests on tidy data. The syntax goes like this:

    +
    +
    chisq_test(ad_data, response = Genotype, p = meta_rates)
    +#> # A tibble: 1 × 3
    +#>   statistic chisq_df  p_value
    +#>       <dbl>    <dbl>    <dbl>
    +#> 1      23.4        5 0.000285
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/work/bayes-opt/figs/bo-param-plot-1.svg b/docs/learn/work/bayes-opt/figs/bo-param-plot-1.svg new file mode 100644 index 00000000..18abd7ea --- /dev/null +++ b/docs/learn/work/bayes-opt/figs/bo-param-plot-1.svg @@ -0,0 +1,359 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# Components + + + + + + + + + + +Cost (log-2) + + + + + + + + + + +Radial Basis Function sigma (log-10) + + + + + + + +0 +10 +20 +30 +40 + + + + + +0 +10 +20 +30 +40 + + + + + +0 +10 +20 +30 +40 +-7.5 +-5.0 +-2.5 +0.0 + + + + +-10 +-5 +0 +5 + + + + +0 +5 +10 +15 +20 + + + + + +Iterations + + diff --git a/docs/learn/work/bayes-opt/figs/bo-plot-1.svg b/docs/learn/work/bayes-opt/figs/bo-plot-1.svg new file mode 100644 index 00000000..127cb3e3 --- /dev/null +++ b/docs/learn/work/bayes-opt/figs/bo-plot-1.svg @@ -0,0 +1,265 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.25 +0.50 +0.75 + + + + + + + + +0 +10 +20 +30 +40 +Iteration +roc_auc + + diff --git a/docs/learn/work/bayes-opt/index.html b/docs/learn/work/bayes-opt/index.html new file mode 100644 index 00000000..e5863725 --- /dev/null +++ b/docs/learn/work/bayes-opt/index.html @@ -0,0 +1,1472 @@ + + + + + + + + + + +tidymodels - Iterative Bayesian optimization of a classification model + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Iterative Bayesian optimization of a classification model

    +
    +
    model tuning
    +
    Bayesian optimization
    +
    SVMs
    +
    +
    + +
    +
    +

    Identify the best hyperparameters for a model using Bayesian optimization of iterative search.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: kernlab, modeldata, themis, and tidymodels.

    +

    Many of the examples for model tuning focus on grid search. For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, iterative search can be used to analyze the existing tuning parameter results and then predict which tuning parameters to try next.

    +

    There are a variety of methods for iterative search and the focus in this article is on Bayesian optimization. For more information on this method, these resources might be helpful:

    + +
    +
    +

    Cell segmenting revisited

    +

    To demonstrate this approach to tuning models, let’s return to the cell segmentation data from the Getting Started article on resampling:

    +
    +
    library(tidymodels)
    +library(modeldata)
    +
    +# Load data
    +data(cells)
    +
    +set.seed(2369)
    +tr_te_split <- initial_split(cells %>% select(-case), prop = 3/4)
    +cell_train <- training(tr_te_split)
    +cell_test  <- testing(tr_te_split)
    +
    +set.seed(1697)
    +folds <- vfold_cv(cell_train, v = 10)
    +
    +
    +
    +

    The tuning scheme

    +

    Since the predictors are highly correlated, we can used a recipe to convert the original predictors to principal component scores. There is also slight class imbalance in these data; about 64% of the data are poorly segmented. To mitigate this, the data will be down-sampled at the end of the pre-processing so that the number of poorly and well segmented cells occur with equal frequency. We can use a recipe for all this pre-processing, but the number of principal components will need to be tuned so that we have enough (but not too many) representations of the data.

    +
    +
    library(themis)
    +
    +cell_pre_proc <-
    +  recipe(class ~ ., data = cell_train) %>%
    +  step_YeoJohnson(all_predictors()) %>%
    +  step_normalize(all_predictors()) %>%
    +  step_pca(all_predictors(), num_comp = tune()) %>%
    +  step_downsample(class)
    +
    +

    In this analysis, we will use a support vector machine to model the data. Let’s use a radial basis function (RBF) kernel and tune its main parameter (\(\sigma\)). Additionally, the main SVM parameter, the cost value, also needs optimization.

    +
    +
    svm_mod <-
    +  svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) %>%
    +  set_engine("kernlab")
    +
    +

    These two objects (the recipe and model) will be combined into a single object via the workflow() function from the workflows package; this object will be used in the optimization process.

    +
    +
    svm_wflow <-
    +  workflow() %>%
    +  add_model(svm_mod) %>%
    +  add_recipe(cell_pre_proc)
    +
    +

    From this object, we can derive information about what parameters are slated to be tuned. A parameter set is derived by:

    +
    +
    svm_set <- extract_parameter_set_dials(svm_wflow)
    +svm_set
    +#> Collection of 3 parameters for tuning
    +#> 
    +#>  identifier      type    object
    +#>        cost      cost nparam[+]
    +#>   rbf_sigma rbf_sigma nparam[+]
    +#>    num_comp  num_comp nparam[+]
    +
    +

    The default range for the number of PCA components is rather small for this data set. A member of the parameter set can be modified using the update() function. Let’s constrain the search to one to twenty components by updating the num_comp parameter. Additionally, the lower bound of this parameter is set to zero which specifies that the original predictor set should also be evaluated (i.e., with no PCA step at all):

    +
    +
    svm_set <- 
    +  svm_set %>% 
    +  update(num_comp = num_comp(c(0L, 20L)))
    +
    +
    +
    +

    Sequential tuning

    +

    Bayesian optimization is a sequential method that uses a model to predict new candidate parameters for assessment. When scoring potential parameter value, the mean and variance of performance are predicted. The strategy used to define how these two statistical quantities are used is defined by an acquisition function.

    +

    For example, one approach for scoring new candidates is to use a confidence bound. Suppose accuracy is being optimized. For a metric that we want to maximize, a lower confidence bound can be used. The multiplier on the standard error (denoted as \(\kappa\)) is a value that can be used to make trade-offs between exploration and exploitation.

    +
      +
    • Exploration means that the search will consider candidates in untested space.

    • +
    • Exploitation focuses in areas where the previous best results occurred.

    • +
    +

    The variance predicted by the Bayesian model is mostly spatial variation; the value will be large for candidate values that are not close to values that have already been evaluated. If the standard error multiplier is high, the search process will be more likely to avoid areas without candidate values in the vicinity.

    +

    We’ll use another acquisition function, expected improvement, that determines which candidates are likely to be helpful relative to the current best results. This is the default acquisition function. More information on these functions can be found in the package vignette for acquisition functions.

    +
    +
    set.seed(12)
    +search_res <-
    +  svm_wflow %>% 
    +  tune_bayes(
    +    resamples = folds,
    +    # To use non-default parameter ranges
    +    param_info = svm_set,
    +    # Generate five at semi-random to start
    +    initial = 5,
    +    iter = 50,
    +    # How to measure performance?
    +    metrics = metric_set(roc_auc),
    +    control = control_bayes(no_improve = 30, verbose = TRUE)
    +  )
    +#> 
    +#> ❯  Generating a set of 5 initial parameter results
    +#> ✓ Initialization complete
    +#> 
    +#> 
    +#> ── Iteration 1 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8794 (@iter 0)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=29.2, rbf_sigma=0.707, num_comp=17
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.7883 (+/-0.0111)
    +#> 
    +#> ── Iteration 2 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8794 (@iter 0)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=30.4, rbf_sigma=0.0087, num_comp=13
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ♥ Newest results:    roc_auc=0.8954 (+/-0.0101)
    +#> 
    +#> ── Iteration 3 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8954 (@iter 2)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.0374, rbf_sigma=0.00425, num_comp=11
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8749 (+/-0.0123)
    +#> 
    +#> ── Iteration 4 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8954 (@iter 2)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=28.8, rbf_sigma=0.00386, num_comp=4
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8738 (+/-0.012)
    +#> 
    +#> ── Iteration 5 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8954 (@iter 2)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=21.5, rbf_sigma=0.0738, num_comp=11
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8523 (+/-0.0115)
    +#> 
    +#> ── Iteration 6 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8954 (@iter 2)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=24.1, rbf_sigma=0.0111, num_comp=18
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8829 (+/-0.0101)
    +#> 
    +#> ── Iteration 7 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8954 (@iter 2)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=1.48, rbf_sigma=0.00629, num_comp=13
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8801 (+/-0.0118)
    +#> 
    +#> ── Iteration 8 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8954 (@iter 2)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=25.3, rbf_sigma=0.011, num_comp=11
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ♥ Newest results:    roc_auc=0.8985 (+/-0.0102)
    +#> 
    +#> ── Iteration 9 ───────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8985 (@iter 8)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=14.8, rbf_sigma=0.628, num_comp=0
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.7852 (+/-0.0173)
    +#> 
    +#> ── Iteration 10 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8985 (@iter 8)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=30.1, rbf_sigma=0.0102, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ♥ Newest results:    roc_auc=0.899 (+/-0.00955)
    +#> 
    +#> ── Iteration 11 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.899 (@iter 10)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=25.1, rbf_sigma=0.0111, num_comp=9
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ♥ Newest results:    roc_auc=0.8994 (+/-0.00996)
    +#> 
    +#> ── Iteration 12 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8994 (@iter 11)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=28.3, rbf_sigma=0.0118, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8989 (+/-0.00928)
    +#> 
    +#> ── Iteration 13 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8994 (@iter 11)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=28.5, rbf_sigma=0.0026, num_comp=19
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8879 (+/-0.00951)
    +#> 
    +#> ── Iteration 14 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8994 (@iter 11)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=30.4, rbf_sigma=0.00245, num_comp=9
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.886 (+/-0.0111)
    +#> 
    +#> ── Iteration 15 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8994 (@iter 11)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=31.5, rbf_sigma=0.0179, num_comp=9
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8964 (+/-0.00967)
    +#> 
    +#> ── Iteration 16 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8994 (@iter 11)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=31.1, rbf_sigma=0.00933, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8994 (+/-0.00968)
    +#> 
    +#> ── Iteration 17 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8994 (@iter 11)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=27.3, rbf_sigma=0.00829, num_comp=9
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ♥ Newest results:    roc_auc=0.8995 (+/-0.00996)
    +#> 
    +#> ── Iteration 18 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=31.7, rbf_sigma=0.00363, num_comp=12
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8909 (+/-0.00973)
    +#> 
    +#> ── Iteration 19 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=29.6, rbf_sigma=0.0119, num_comp=8
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8964 (+/-0.0104)
    +#> 
    +#> ── Iteration 20 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=23.6, rbf_sigma=0.0121, num_comp=0
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8636 (+/-0.0122)
    +#> 
    +#> ── Iteration 21 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=27.6, rbf_sigma=0.00824, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8993 (+/-0.00961)
    +#> 
    +#> ── Iteration 22 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=27.6, rbf_sigma=0.00901, num_comp=9
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8993 (+/-0.01)
    +#> 
    +#> ── Iteration 23 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=24.1, rbf_sigma=0.0133, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8988 (+/-0.00935)
    +#> 
    +#> ── Iteration 24 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=18.8, rbf_sigma=0.00058, num_comp=20
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8784 (+/-0.0112)
    +#> 
    +#> ── Iteration 25 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=29.3, rbf_sigma=0.00958, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8994 (+/-0.00959)
    +#> 
    +#> ── Iteration 26 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=30.6, rbf_sigma=0.00841, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8993 (+/-0.00949)
    +#> 
    +#> ── Iteration 27 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00169, rbf_sigma=0.0201, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8794 (+/-0.0119)
    +#> 
    +#> ── Iteration 28 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.0012, rbf_sigma=0.000867, num_comp=20
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.3452 (+/-0.116)
    +#> 
    +#> ── Iteration 29 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00128, rbf_sigma=0.0138, num_comp=4
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8671 (+/-0.0132)
    +#> 
    +#> ── Iteration 30 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.0319, rbf_sigma=0.0279, num_comp=9
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8805 (+/-0.0121)
    +#> 
    +#> ── Iteration 31 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.0011, rbf_sigma=0.00787, num_comp=8
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8762 (+/-0.0121)
    +#> 
    +#> ── Iteration 32 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=7.06, rbf_sigma=0.00645, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.89 (+/-0.0102)
    +#> 
    +#> ── Iteration 33 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.000998, rbf_sigma=0.305, num_comp=7
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8757 (+/-0.0126)
    +#> 
    +#> ── Iteration 34 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00615, rbf_sigma=0.0134, num_comp=8
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.877 (+/-0.0122)
    +#> 
    +#> ── Iteration 35 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.208, rbf_sigma=0.00946, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8793 (+/-0.0122)
    +#> 
    +#> ── Iteration 36 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=31.6, rbf_sigma=0.00481, num_comp=15
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8961 (+/-0.00885)
    +#> 
    +#> ── Iteration 37 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00108, rbf_sigma=0.653, num_comp=11
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.3649 (+/-0.106)
    +#> 
    +#> ── Iteration 38 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00156, rbf_sigma=0.13, num_comp=5
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8816 (+/-0.0121)
    +#> 
    +#> ── Iteration 39 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=7.03, rbf_sigma=0.235, num_comp=16
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8572 (+/-0.0117)
    +#> 
    +#> ── Iteration 40 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00466, rbf_sigma=0.211, num_comp=1
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.7714 (+/-0.0105)
    +#> 
    +#> ── Iteration 41 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.0357, rbf_sigma=0.00126, num_comp=1
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.7725 (+/-0.0106)
    +#> 
    +#> ── Iteration 42 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=23.1, rbf_sigma=0.0332, num_comp=16
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8612 (+/-0.0137)
    +#> 
    +#> ── Iteration 43 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=3.56, rbf_sigma=0.0294, num_comp=3
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8724 (+/-0.0126)
    +#> 
    +#> ── Iteration 44 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00168, rbf_sigma=0.0337, num_comp=7
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8791 (+/-0.0123)
    +#> 
    +#> ── Iteration 45 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00128, rbf_sigma=0.00258, num_comp=10
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.3444 (+/-0.114)
    +#> 
    +#> ── Iteration 46 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.506, rbf_sigma=0.000548, num_comp=4
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.8653 (+/-0.0131)
    +#> 
    +#> ── Iteration 47 ──────────────────────────────────────────────────────
    +#> 
    +#> i Current best:      roc_auc=0.8995 (@iter 17)
    +#> i Gaussian process model
    +#> ✓ Gaussian process model
    +#> i Generating 5000 candidates
    +#> i Predicted candidates
    +#> i cost=0.00142, rbf_sigma=0.204, num_comp=18
    +#> i Estimating performance
    +#> ✓ Estimating performance
    +#> ⓧ Newest results:    roc_auc=0.3586 (+/-0.113)
    +#> ! No improvement for 30 iterations; returning current results.
    +
    +

    The resulting tibble is a stacked set of rows of the rsample object with an additional column for the iteration number:

    +
    +
    search_res
    +#> # Tuning results
    +#> # 10-fold cross-validation 
    +#> # A tibble: 480 × 5
    +#>    splits             id     .metrics         .notes           .iter
    +#>    <list>             <chr>  <list>           <list>           <int>
    +#>  1 <split [1362/152]> Fold01 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#>  2 <split [1362/152]> Fold02 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#>  3 <split [1362/152]> Fold03 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#>  4 <split [1362/152]> Fold04 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#>  5 <split [1363/151]> Fold05 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#>  6 <split [1363/151]> Fold06 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#>  7 <split [1363/151]> Fold07 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#>  8 <split [1363/151]> Fold08 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#>  9 <split [1363/151]> Fold09 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#> 10 <split [1363/151]> Fold10 <tibble [5 × 7]> <tibble [0 × 3]>     0
    +#> # ℹ 470 more rows
    +
    +

    As with grid search, we can summarize the results over resamples:

    +
    +
    estimates <- 
    +  collect_metrics(search_res) %>% 
    +  arrange(.iter)
    +
    +estimates
    +#> # A tibble: 52 × 10
    +#>        cost    rbf_sigma num_comp .metric .estimator  mean     n std_err .config
    +#>       <dbl>        <dbl>    <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>  
    +#>  1  0.00383      2.72e-6       17 roc_auc binary     0.348    10  0.114  Prepro…
    +#>  2  0.250        1.55e-2        7 roc_auc binary     0.879    10  0.0122 Prepro…
    +#>  3  0.0372       1.02e-9        3 roc_auc binary     0.242    10  0.0574 Prepro…
    +#>  4  1.28         8.13e-8        8 roc_auc binary     0.344    10  0.114  Prepro…
    +#>  5 10.3          1.37e-3       14 roc_auc binary     0.877    10  0.0117 Prepro…
    +#>  6 29.2          7.07e-1       17 roc_auc binary     0.788    10  0.0111 Iter1  
    +#>  7 30.4          8.70e-3       13 roc_auc binary     0.895    10  0.0101 Iter2  
    +#>  8  0.0374       4.25e-3       11 roc_auc binary     0.875    10  0.0123 Iter3  
    +#>  9 28.8          3.86e-3        4 roc_auc binary     0.874    10  0.0120 Iter4  
    +#> 10 21.5          7.38e-2       11 roc_auc binary     0.852    10  0.0115 Iter5  
    +#> # ℹ 42 more rows
    +#> # ℹ 1 more variable: .iter <int>
    +
    +

    The best performance of the initial set of candidate values was AUC = 0.8793995. The best results were achieved at iteration 17 with a corresponding AUC value of 0.8995344. The five best results are:

    +
    +
    show_best(search_res, metric = "roc_auc")
    +#> # A tibble: 5 × 10
    +#>    cost rbf_sigma num_comp .metric .estimator  mean     n std_err .config .iter
    +#>   <dbl>     <dbl>    <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>   <int>
    +#> 1  27.3   0.00829        9 roc_auc binary     0.900    10 0.00996 Iter17     17
    +#> 2  29.3   0.00958       10 roc_auc binary     0.899    10 0.00959 Iter25     25
    +#> 3  25.1   0.0111         9 roc_auc binary     0.899    10 0.00996 Iter11     11
    +#> 4  31.1   0.00933       10 roc_auc binary     0.899    10 0.00968 Iter16     16
    +#> 5  27.6   0.00901        9 roc_auc binary     0.899    10 0.0100  Iter22     22
    +
    +

    A plot of the search iterations can be created via:

    +
    +
    autoplot(search_res, type = "performance")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    There are many parameter combinations have roughly equivalent results.

    +

    How did the parameters change over iterations?

    +
    +
    autoplot(search_res, type = "parameters") + 
    +  labs(x = "Iterations", y = NULL)
    +
    +
    +
    +

    +
    +
    +
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  kernlab    * 0.9-32  2023-01-31 [1] CRAN (R 4.3.0)
    +#>  modeldata  * 1.1.0   2023-01-25 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang      * 1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  themis     * 1.0.1   2023-04-14 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/work/case-weights/figs/unnamed-chunk-3-1.svg b/docs/learn/work/case-weights/figs/unnamed-chunk-5-1.svg similarity index 100% rename from content/learn/work/case-weights/figs/unnamed-chunk-3-1.svg rename to docs/learn/work/case-weights/figs/unnamed-chunk-5-1.svg diff --git a/content/learn/work/case-weights/figs/unnamed-chunk-4-1.svg b/docs/learn/work/case-weights/figs/unnamed-chunk-6-1.svg similarity index 100% rename from content/learn/work/case-weights/figs/unnamed-chunk-4-1.svg rename to docs/learn/work/case-weights/figs/unnamed-chunk-6-1.svg diff --git a/docs/learn/work/case-weights/index.html b/docs/learn/work/case-weights/index.html new file mode 100644 index 00000000..d6415992 --- /dev/null +++ b/docs/learn/work/case-weights/index.html @@ -0,0 +1,899 @@ + + + + + + + + + + +tidymodels - Creating case weights based on time + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Creating case weights based on time

    +
    +
    model fitting
    +
    case weights
    +
    time series
    +
    +
    + +
    +
    +

    Create models that use coefficients, extract them from fitted models, and visualize them.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: tidymodels.

    +

    This article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others.

    +
    +
    +

    Example Data

    +

    To demonstrate we will use the Chicago data from the modeldata package.

    +
    +
    library(tidymodels)
    +data(Chicago)
    +
    +Chicago <- Chicago %>%
    +  select(ridership, date, one_of(stations))
    +
    +

    From ?Chicago

    +
    +

    These data are from Kuhn and Johnson (2020) and contain an abbreviated training set for modeling the number of people (in thousands) who enter the Clark and Lake L station.

    +
    +
    +

    The date column corresponds to the current date. The columns with station names (Austin through California) are a sample of the columns used in the original analysis (for filesize reasons). These are 14 day lag variables (i.e. date - 14 days). There are columns related to weather and sports team schedules.

    +
    +

    For simplicity, we have limited our view to the date and station variables.

    +
    +
    +

    Creating weights

    +

    This data set contains daily information from 2001-01-22 to 2016-08-28. We will pretend that it is January 1st, 2016 and we want to predict the ridership for the remainder of 2016 using the date and station variables as predictors. Without any weighting, all the previous observations would have the same influence on the model. This may not be ideal since some observations appear a long time ago and not be as representative of the future as more recent observations.

    +

    We could just use recent observations to fit the model, ensuring that the training data stays as close to the testing data as possible. While a tempting idea, it would throw out a lot of informative data. Instead let us assign a weight to each observation, related to how long ago the observation was taken. This way we are not completely throwing away any observation; we are only giving less weight to data farther in the past.

    +

    We need to decide on a way to calculate the case weights. The main thing constraint is that the weight cannot be negative, and it would be nice if today was weighted as 1. So we need a function that is 1 when x = 0 and decreasing otherwise. There are many kinds of functions like that, and we will be using this exponential decay function

    +

    \[ weight = base ^ x \]

    +

    where base is some constant and x is the number of days. To make sure that we select a reasonable base, we need to do some manual testing, starting with looking at how old the oldest observation is.

    +
    +
    difftime("2016-01-01", min(Chicago$date))
    +#> Time difference of 5457.333 days
    +
    +

    Using this information we can visualize the weight curve, to see if we like the value of base.

    +
    +
    tibble_days <- tibble(days = 0:5457)
    +
    +tibble_days %>%
    +  ggplot(aes(days)) +
    +  geom_function(fun = ~ 0.99 ^ .x)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    setting base to 0.99 appears to be down weighted too much. Any observation more than a year old would have no influence.

    +

    Let us try a few more values to find

    +
    +
    map_dfr(
    +  c(0.99, 0.999, 0.9999),
    +  ~ tibble_days %>% mutate(base = factor(.x), value = .x ^ days)
    +) %>%
    +  ggplot(aes(days, value, group = base, color = base)) +
    +  geom_line()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    From this, we could pick something around 0.999 since it gives a better balance. Let’s create a small function to help us encode this weight.

    +
    +
    weights_from_dates <- function(x, ref) {
    +  if_else(
    +    condition = x >= ref,
    +    true = 1,     # <- Notice that I'm setting any future weight to 1.
    +    false = 0.999 ^ as.numeric(difftime(ref, x, units = "days"))
    +  )
    +}
    +
    +

    We then modify Chicago to add a weight column, explicitly making it an importance weight with importance_weight().

    +
    +
    Chicago <- Chicago %>%
    +  mutate(weight = weights_from_dates(date, "2016-01-01"),
    +         weight = importance_weights(weight))
    +
    +

    This approach to creating importance weights from dates is not limited to cases where we have daily observations. You are free to create similar weights if you have gaps or repeated observations within the same day. Likewise, you don’t need to use days as the unit. Seconds, weeks, or years could be used as well.

    +
    +
    +

    Modeling

    +

    We start by splitting up our data into a training and testing set based on the day "2016-01-01". We added weights to the data set before splitting it so each set has weights.

    +
    +
    Chicago_train <- Chicago %>% filter(date < "2016-01-01")
    +Chicago_test <- Chicago %>% filter(date >= "2016-01-01")
    +
    +

    Next, we are going to create a recipe. The weights won’t have any influence on the preprocessing since none of these operations are supervised and we are using importance weights.

    +
    +
    base_recipe <-
    +  recipe(ridership ~ ., data = Chicago_train) %>%
    +  # Create date features
    +  step_date(date) %>%
    +  step_holiday(date, keep_original_cols = FALSE) %>%
    +  # Remove any columns with a single unique value
    +  step_zv(all_predictors()) %>%
    +  # Normalize all the numerical features
    +  step_normalize(all_numeric_predictors()) %>%
    +  # Perform PCA to reduce the correlation bet the stations
    +  step_pca(all_numeric_predictors(), threshold = 0.95)
    +
    +

    Next we need to build the rest of the workflow. We use a linear regression specification

    +
    +
    lm_spec <-
    +  linear_reg() %>%
    +  set_engine("lm")
    +
    +

    and we add these together in the workflow. To activate the case weights, we use the add_case_weights() function to specify the name of the case weights being used.

    +
    +
    lm_wflow <-
    +  workflow() %>% 
    +  add_case_weights(weight) %>%
    +  add_recipe(base_recipe) %>%
    +  add_model(lm_spec)
    +
    +lm_wflow
    +#> ══ Workflow ══════════════════════════════════════════════════════════
    +#> Preprocessor: Recipe
    +#> Model: linear_reg()
    +#> 
    +#> ── Preprocessor ──────────────────────────────────────────────────────
    +#> 5 Recipe Steps
    +#> 
    +#> • step_date()
    +#> • step_holiday()
    +#> • step_zv()
    +#> • step_normalize()
    +#> • step_pca()
    +#> 
    +#> ── Case Weights ──────────────────────────────────────────────────────
    +#> weight
    +#> 
    +#> ── Model ─────────────────────────────────────────────────────────────
    +#> Linear Regression Model Specification (regression)
    +#> 
    +#> Computational engine: lm
    +
    +

    With all that done we can fit the workflow with the usual syntax:

    +
    +
    lm_fit <- fit(lm_wflow, data = Chicago_train)
    +lm_fit
    +#> ══ Workflow [trained] ════════════════════════════════════════════════
    +#> Preprocessor: Recipe
    +#> Model: linear_reg()
    +#> 
    +#> ── Preprocessor ──────────────────────────────────────────────────────
    +#> 5 Recipe Steps
    +#> 
    +#> • step_date()
    +#> • step_holiday()
    +#> • step_zv()
    +#> • step_normalize()
    +#> • step_pca()
    +#> 
    +#> ── Case Weights ──────────────────────────────────────────────────────
    +#> weight
    +#> 
    +#> ── Model ─────────────────────────────────────────────────────────────
    +#> 
    +#> Call:
    +#> stats::lm(formula = ..y ~ ., data = data, weights = weights)
    +#> 
    +#> Coefficients:
    +#>   (Intercept)    date_dowMon    date_dowTue    date_dowWed    date_dowThu  
    +#>      1.762599      13.307654      14.689027      14.620178      14.382313  
    +#>   date_dowFri    date_dowSat  date_monthFeb  date_monthMar  date_monthApr  
    +#>     13.695433       1.228233       0.364342       1.348229       1.409897  
    +#> date_monthMay  date_monthJun  date_monthJul  date_monthAug  date_monthSep  
    +#>      1.188189       2.598296       2.219721       2.406998       1.932061  
    +#> date_monthOct  date_monthNov  date_monthDec            PC1            PC2  
    +#>      2.655552       0.909007      -0.004751       0.073014      -1.591021  
    +#>           PC3            PC4            PC5  
    +#>     -0.608386      -0.205305       0.696010
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/learn/work/nested-resampling/figs/choose-1.svg b/docs/learn/work/nested-resampling/figs/choose-1.svg new file mode 100644 index 00000000..8ad0cee3 --- /dev/null +++ b/docs/learn/work/nested-resampling/figs/choose-1.svg @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +10 +20 +30 + + + + + + + + + + + + + + + +0.25 +0.5 +1 +2 +4 +8 +16 +32 +64 +128 +256 +SVM Cost +count + + diff --git a/docs/learn/work/nested-resampling/figs/not-nested-1.svg b/docs/learn/work/nested-resampling/figs/not-nested-1.svg new file mode 100644 index 00000000..bafe2ea4 --- /dev/null +++ b/docs/learn/work/nested-resampling/figs/not-nested-1.svg @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +2.6 +2.8 +3.0 +3.2 +3.4 + + + + + + + + +1 +8 +64 +SVM Cost +RMSE + + diff --git a/docs/learn/work/nested-resampling/figs/rmse-plot-1.svg b/docs/learn/work/nested-resampling/figs/rmse-plot-1.svg new file mode 100644 index 00000000..e7c81dc5 --- /dev/null +++ b/docs/learn/work/nested-resampling/figs/rmse-plot-1.svg @@ -0,0 +1,162 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +3.0 +3.5 + + + + + +1 +8 +64 +SVM Cost +Inner RMSE + + diff --git a/content/learn/work/nested-resampling/figs/resampling.svg b/docs/learn/work/nested-resampling/img/resampling.svg similarity index 100% rename from content/learn/work/nested-resampling/figs/resampling.svg rename to docs/learn/work/nested-resampling/img/resampling.svg diff --git a/docs/learn/work/nested-resampling/index.html b/docs/learn/work/nested-resampling/index.html new file mode 100644 index 00000000..2616589f --- /dev/null +++ b/docs/learn/work/nested-resampling/index.html @@ -0,0 +1,987 @@ + + + + + + + + + + +tidymodels - Nested resampling + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Nested resampling

    +
    +
    nested resampling
    +
    SVMs
    +
    +
    + +
    +
    +

    Estimate the best hyperparameters for a model using nested resampling.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: furrr, kernlab, mlbench, scales, and tidymodels.

    +

    In this article, we discuss an alternative method for evaluating and tuning models, called nested resampling. While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance.

    +
    +
    +

    Resampling models

    +

    A typical scheme for splitting the data when developing a predictive model is to create an initial split of the data into a training and test set. If resampling is used, it is executed on the training set. A series of binary splits is created. In rsample, we use the term analysis set for the data that are used to fit the model and the term assessment set for the set used to compute performance:

    +
    +
    +
    +
    +

    +
    +
    +
    +
    +

    A common method for tuning models is grid search where a candidate set of tuning parameters is created. The full set of models for every combination of the tuning parameter grid and the resamples is fitted. Each time, the assessment data are used to measure performance and the average value is determined for each tuning parameter.

    +

    The potential problem is that once we pick the tuning parameter associated with the best performance, this performance value is usually quoted as the performance of the model. There is serious potential for optimization bias since we use the same data to tune the model and to assess performance. This would result in an optimistic estimate of performance.

    +

    Nested resampling uses an additional layer of resampling that separates the tuning activities from the process used to estimate the efficacy of the model. An outer resampling scheme is used and, for every split in the outer resample, another full set of resampling splits are created on the original analysis set. For example, if 10-fold cross-validation is used on the outside and 5-fold cross-validation on the inside, a total of 500 models will be fit. The parameter tuning will be conducted 10 times and the best parameters are determined from the average of the 5 assessment sets. This process occurs 10 times.

    +

    Once the tuning results are complete, a model is fit to each of the outer resampling splits using the best parameter associated with that resample. The average of the outer method’s assessment sets are a unbiased estimate of the model.

    +

    We will simulate some regression data to illustrate the methods. The mlbench package has a function mlbench::mlbench.friedman1() that can simulate a complex regression data structure from the original MARS publication. A training set size of 100 data points are generated as well as a large set that will be used to characterize how well the resampling procedure performed.

    +
    +
    library(mlbench)
    +sim_data <- function(n) {
    +  tmp <- mlbench.friedman1(n, sd = 1)
    +  tmp <- cbind(tmp$x, tmp$y)
    +  tmp <- as.data.frame(tmp)
    +  names(tmp)[ncol(tmp)] <- "y"
    +  tmp
    +}
    +
    +set.seed(9815)
    +train_dat <- sim_data(100)
    +large_dat <- sim_data(10^5)
    +
    +
    +
    +

    Nested resampling

    +

    To get started, the types of resampling methods need to be specified. This isn’t a large data set, so 5 repeats of 10-fold cross validation will be used as the outer resampling method for generating the estimate of overall performance. To tune the model, it would be good to have precise estimates for each of the values of the tuning parameter so let’s use 25 iterations of the bootstrap. This means that there will eventually be 5 * 10 * 25 = 1250 models that are fit to the data per tuning parameter. These models will be discarded once the performance of the model has been quantified.

    +

    To create the tibble with the resampling specifications:

    +
    +
    library(tidymodels)
    +results <- nested_cv(train_dat, 
    +                     outside = vfold_cv(repeats = 5), 
    +                     inside = bootstraps(times = 25))
    +results
    +#> # Nested resampling:
    +#> #  outer: 10-fold cross-validation repeated 5 times
    +#> #  inner: Bootstrap sampling
    +#> # A tibble: 50 × 4
    +#>    splits          id      id2    inner_resamples
    +#>    <list>          <chr>   <chr>  <list>         
    +#>  1 <split [90/10]> Repeat1 Fold01 <boot [25 × 2]>
    +#>  2 <split [90/10]> Repeat1 Fold02 <boot [25 × 2]>
    +#>  3 <split [90/10]> Repeat1 Fold03 <boot [25 × 2]>
    +#>  4 <split [90/10]> Repeat1 Fold04 <boot [25 × 2]>
    +#>  5 <split [90/10]> Repeat1 Fold05 <boot [25 × 2]>
    +#>  6 <split [90/10]> Repeat1 Fold06 <boot [25 × 2]>
    +#>  7 <split [90/10]> Repeat1 Fold07 <boot [25 × 2]>
    +#>  8 <split [90/10]> Repeat1 Fold08 <boot [25 × 2]>
    +#>  9 <split [90/10]> Repeat1 Fold09 <boot [25 × 2]>
    +#> 10 <split [90/10]> Repeat1 Fold10 <boot [25 × 2]>
    +#> # ℹ 40 more rows
    +
    +

    The splitting information for each resample is contained in the split objects. Focusing on the second fold of the first repeat:

    +
    +
    results$splits[[2]]
    +#> <Analysis/Assess/Total>
    +#> <90/10/100>
    +
    +

    <90/10/100> indicates the number of observations in the analysis set, assessment set, and the original data.

    +

    Each element of inner_resamples has its own tibble with the bootstrapping splits.

    +
    +
    results$inner_resamples[[5]]
    +#> # Bootstrap sampling 
    +#> # A tibble: 25 × 2
    +#>    splits          id         
    +#>    <list>          <chr>      
    +#>  1 <split [90/31]> Bootstrap01
    +#>  2 <split [90/33]> Bootstrap02
    +#>  3 <split [90/37]> Bootstrap03
    +#>  4 <split [90/31]> Bootstrap04
    +#>  5 <split [90/32]> Bootstrap05
    +#>  6 <split [90/32]> Bootstrap06
    +#>  7 <split [90/36]> Bootstrap07
    +#>  8 <split [90/34]> Bootstrap08
    +#>  9 <split [90/29]> Bootstrap09
    +#> 10 <split [90/31]> Bootstrap10
    +#> # ℹ 15 more rows
    +
    +

    These are self-contained, meaning that the bootstrap sample is aware that it is a sample of a specific 90% of the data:

    +
    +
    results$inner_resamples[[5]]$splits[[1]]
    +#> <Analysis/Assess/Total>
    +#> <90/31/90>
    +
    +

    To start, we need to define how the model will be created and measured. Let’s use a radial basis support vector machine model via the function kernlab::ksvm. This model is generally considered to have two tuning parameters: the SVM cost value and the kernel parameter sigma. For illustration purposes here, only the cost value will be tuned and the function kernlab::sigest will be used to estimate sigma during each model fit. This is automatically done by ksvm.

    +

    After the model is fit to the analysis set, the root-mean squared error (RMSE) is computed on the assessment set. One important note: for this model, it is critical to center and scale the predictors before computing dot products. We don’t do this operation here because mlbench.friedman1 simulates all of the predictors to be standardized uniform random variables.

    +

    Our function to fit the model and compute the RMSE is:

    +
    +
    library(kernlab)
    +
    +# `object` will be an `rsplit` object from our `results` tibble
    +# `cost` is the tuning parameter
    +svm_rmse <- function(object, cost = 1) {
    +  y_col <- ncol(object$data)
    +  mod <- 
    +    svm_rbf(mode = "regression", cost = cost) %>% 
    +    set_engine("kernlab") %>% 
    +    fit(y ~ ., data = analysis(object))
    +  
    +  holdout_pred <- 
    +    predict(mod, assessment(object) %>% dplyr::select(-y)) %>% 
    +    bind_cols(assessment(object) %>% dplyr::select(y))
    +  rmse(holdout_pred, truth = y, estimate = .pred)$.estimate
    +}
    +
    +# In some case, we want to parameterize the function over the tuning parameter:
    +rmse_wrapper <- function(cost, object) svm_rmse(object, cost)
    +
    +

    For the nested resampling, a model needs to be fit for each tuning parameter and each bootstrap split. To do this, create a wrapper:

    +
    +
    # `object` will be an `rsplit` object for the bootstrap samples
    +tune_over_cost <- function(object) {
    +  tibble(cost = 2 ^ seq(-2, 8, by = 1)) %>% 
    +    mutate(RMSE = map_dbl(cost, rmse_wrapper, object = object))
    +}
    +
    +

    Since this will be called across the set of outer cross-validation splits, another wrapper is required:

    +
    +
    # `object` is an `rsplit` object in `results$inner_resamples` 
    +summarize_tune_results <- function(object) {
    +  # Return row-bound tibble that has the 25 bootstrap results
    +  map_df(object$splits, tune_over_cost) %>%
    +    # For each value of the tuning parameter, compute the 
    +    # average RMSE which is the inner bootstrap estimate. 
    +    group_by(cost) %>%
    +    summarize(mean_RMSE = mean(RMSE, na.rm = TRUE),
    +              n = length(RMSE),
    +              .groups = "drop")
    +}
    +
    +

    Now that those functions are defined, we can execute all the inner resampling loops:

    +
    +
    tuning_results <- map(results$inner_resamples, summarize_tune_results) 
    +
    +

    Alternatively, since these computations can be run in parallel, we can use the furrr package. Instead of using map(), the function future_map() parallelizes the iterations using the future package. The multisession plan uses the local cores to process the inner resampling loop. The end results are the same as the sequential computations.

    +
    +
    library(furrr)
    +plan(multisession)
    +
    +tuning_results <- future_map(results$inner_resamples, summarize_tune_results) 
    +
    +

    The object tuning_results is a list of data frames for each of the 50 outer resamples.

    +

    Let’s make a plot of the averaged results to see what the relationship is between the RMSE and the tuning parameters for each of the inner bootstrapping operations:

    +
    +
    library(scales)
    +
    +pooled_inner <- tuning_results %>% bind_rows
    +
    +best_cost <- function(dat) dat[which.min(dat$mean_RMSE),]
    +
    +p <- 
    +  ggplot(pooled_inner, aes(x = cost, y = mean_RMSE)) + 
    +  scale_x_continuous(trans = 'log2') +
    +  xlab("SVM Cost") + ylab("Inner RMSE")
    +
    +for (i in 1:length(tuning_results))
    +  p <- p  +
    +  geom_line(data = tuning_results[[i]], alpha = .2) +
    +  geom_point(data = best_cost(tuning_results[[i]]), pch = 16, alpha = 3/4)
    +
    +p <- p + geom_smooth(data = pooled_inner, se = FALSE)
    +p
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Each gray line is a separate bootstrap resampling curve created from a different 90% of the data. The blue line is a LOESS smooth of all the results pooled together.

    +

    To determine the best parameter estimate for each of the outer resampling iterations:

    +
    +
    cost_vals <- 
    +  tuning_results %>% 
    +  map_df(best_cost) %>% 
    +  select(cost)
    +
    +results <- 
    +  bind_cols(results, cost_vals) %>% 
    +  mutate(cost = factor(cost, levels = paste(2 ^ seq(-2, 8, by = 1))))
    +
    +ggplot(results, aes(x = cost)) + 
    +  geom_bar() + 
    +  xlab("SVM Cost") + 
    +  scale_x_discrete(drop = FALSE)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Most of the resamples produced an optimal cost value of 2.0, but the distribution is right-skewed due to the flat trend in the resampling profile once the cost value becomes 10 or larger.

    +

    Now that we have these estimates, we can compute the outer resampling results for each of the 50 splits using the corresponding tuning parameter value:

    +
    +
    results <- 
    +  results %>% 
    +  mutate(RMSE = map2_dbl(splits, cost, svm_rmse))
    +
    +summary(results$RMSE)
    +#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    +#>   1.672   2.095   2.685   2.690   3.252   4.254
    +
    +

    The estimated RMSE for the model tuning process is 2.69.

    +

    What is the RMSE estimate for the non-nested procedure when only the outer resampling method is used? For each cost value in the tuning grid, 50 SVM models are fit and their RMSE values are averaged. The table of cost values and mean RMSE estimates is used to determine the best cost value. The associated RMSE is the biased estimate.

    +
    +
    not_nested <- 
    +  map(results$splits, tune_over_cost) %>%
    +  bind_rows
    +
    +outer_summary <- not_nested %>% 
    +  group_by(cost) %>% 
    +  summarize(outer_RMSE = mean(RMSE), n = length(RMSE))
    +
    +outer_summary
    +#> # A tibble: 11 × 3
    +#>      cost outer_RMSE     n
    +#>     <dbl>      <dbl> <int>
    +#>  1   0.25       3.54    50
    +#>  2   0.5        3.11    50
    +#>  3   1          2.77    50
    +#>  4   2          2.62    50
    +#>  5   4          2.65    50
    +#>  6   8          2.75    50
    +#>  7  16          2.82    50
    +#>  8  32          2.82    50
    +#>  9  64          2.83    50
    +#> 10 128          2.83    50
    +#> 11 256          2.82    50
    +
    +ggplot(outer_summary, aes(x = cost, y = outer_RMSE)) + 
    +  geom_point() + 
    +  geom_line() + 
    +  scale_x_continuous(trans = 'log2') +
    +  xlab("SVM Cost") + ylab("RMSE")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    The non-nested procedure estimates the RMSE to be 2.62. Both estimates are fairly close.

    +

    The approximately true RMSE for an SVM model with a cost value of 2.0 can be approximated with the large sample that was simulated at the beginning.

    +
    +
    finalModel <- ksvm(y ~ ., data = train_dat, C = 2)
    +large_pred <- predict(finalModel, large_dat[, -ncol(large_dat)])
    +sqrt(mean((large_dat$y - large_pred) ^ 2, na.rm = TRUE))
    +#> [1] 2.712059
    +
    +

    The nested procedure produces a closer estimate to the approximate truth but the non-nested estimate is very similar.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  furrr      * 0.3.1   2022-08-15 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  kernlab    * 0.9-32  2023-01-31 [1] CRAN (R 4.3.0)
    +#>  mlbench    * 2.1-3.1 2023-05-05 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  scales     * 1.2.1   2022-08-20 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/work/tune-svm/figs/augment-preds-1.svg b/docs/learn/work/tune-svm/figs/augment-preds-1.svg similarity index 100% rename from content/learn/work/tune-svm/figs/augment-preds-1.svg rename to docs/learn/work/tune-svm/figs/augment-preds-1.svg diff --git a/docs/learn/work/tune-svm/index.html b/docs/learn/work/tune-svm/index.html new file mode 100644 index 00000000..36ee5bb6 --- /dev/null +++ b/docs/learn/work/tune-svm/index.html @@ -0,0 +1,970 @@ + + + + + + + + + + +tidymodels - Model tuning via grid search + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Model tuning via grid search

    +
    +
    model tuning
    +
    SVMs
    +
    +
    + +
    +
    +

    Choose hyperparameters for a model by training on a grid of many possible parameter values.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: kernlab, mlbench, and tidymodels.

    +

    This article demonstrates how to tune a model using grid search. Many models have hyperparameters that can’t be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best.

    +
    +
    +

    Example data

    +

    To demonstrate model tuning, we’ll use the Ionosphere data in the mlbench package:

    +
    +
    library(tidymodels)
    +library(mlbench)
    +data(Ionosphere)
    +
    +

    From ?Ionosphere:

    +
    +

    This radar data was collected by a system in Goose Bay, Labrador. This system consists of a phased array of 16 high-frequency antennas with a total transmitted power on the order of 6.4 kilowatts. See the paper for more details. The targets were free electrons in the ionosphere. “good” radar returns are those showing evidence of some type of structure in the ionosphere. “bad” returns are those that do not; their signals pass through the ionosphere.

    +
    +
    +

    Received signals were processed using an autocorrelation function whose arguments are the time of a pulse and the pulse number. There were 17 pulse numbers for the Goose Bay system. Instances in this databse are described by 2 attributes per pulse number, corresponding to the complex values returned by the function resulting from the complex electromagnetic signal. See cited below for more details.

    +
    +

    There are 43 predictors and a factor outcome. Two of the predictors are factors (V1 and V2) and the rest are numeric variables that have been scaled to a range of -1 to 1. Note that the two factor predictors have sparse distributions:

    +
    +
    table(Ionosphere$V1)
    +#> 
    +#>   0   1 
    +#>  38 313
    +table(Ionosphere$V2)
    +#> 
    +#>   0 
    +#> 351
    +
    +

    There’s no point of putting V2 into any model since is is a zero-variance predictor. V1 is not but it could be if the resampling process ends up sampling all of the same value. Is this an issue? It might be since the standard R formula infrastructure fails when there is only a single observed value:

    +
    +
    glm(Class ~ ., data = Ionosphere, family = binomial)
    +
    +# Surprisingly, this doesn't help: 
    +
    +glm(Class ~ . - V2, data = Ionosphere, family = binomial)
    +
    +

    Let’s remove these two problematic variables:

    +
    +
    Ionosphere <- Ionosphere %>% select(-V1, -V2)
    +
    +
    + +
    +

    Optional inputs

    +

    An optional step for model tuning is to specify which metrics should be computed using the out-of-sample predictions. For classification, the default is to calculate the log-likelihood statistic and overall accuracy. Instead of the defaults, the area under the ROC curve will be used. To do this, a yardstick package function can be used to create a metric set:

    +
    +
    roc_vals <- metric_set(roc_auc)
    +
    +

    If no grid or parameters are provided, a set of 10 hyperparameters are created using a space-filling design (via a Latin hypercube). A grid can be given in a data frame where the parameters are in columns and parameter combinations are in rows. Here, the default will be used.

    +

    Also, a control object can be passed that specifies different aspects of the search. Here, the verbose option is turned off and the option to save the out-of-sample predictions is turned on.

    +
    +
    ctrl <- control_grid(verbose = FALSE, save_pred = TRUE)
    +
    +
    +
    +

    Executing with a formula

    +

    First, we can use the formula interface:

    +
    +
    set.seed(35)
    +formula_res <-
    +  svm_mod %>% 
    +  tune_grid(
    +    Class ~ .,
    +    resamples = iono_rs,
    +    metrics = roc_vals,
    +    control = ctrl
    +  )
    +formula_res
    +#> # Tuning results
    +#> # Bootstrap sampling 
    +#> # A tibble: 30 × 5
    +#>    splits            id          .metrics          .notes           .predictions
    +#>    <list>            <chr>       <list>            <list>           <list>      
    +#>  1 <split [351/120]> Bootstrap01 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  2 <split [351/130]> Bootstrap02 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  3 <split [351/137]> Bootstrap03 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  4 <split [351/141]> Bootstrap04 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  5 <split [351/131]> Bootstrap05 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  6 <split [351/131]> Bootstrap06 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  7 <split [351/127]> Bootstrap07 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  8 <split [351/123]> Bootstrap08 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  9 <split [351/131]> Bootstrap09 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#> 10 <split [351/117]> Bootstrap10 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#> # ℹ 20 more rows
    +
    +

    The .metrics column contains tibbles of the performance metrics for each tuning parameter combination:

    +
    +
    formula_res %>% 
    +  select(.metrics) %>% 
    +  slice(1) %>% 
    +  pull(1)
    +#> [[1]]
    +#> # A tibble: 10 × 6
    +#>        cost rbf_sigma .metric .estimator .estimate .config              
    +#>       <dbl>     <dbl> <chr>   <chr>          <dbl> <chr>                
    +#>  1  0.00849  1.11e-10 roc_auc binary         0.815 Preprocessor1_Model01
    +#>  2  0.176    7.28e- 8 roc_auc binary         0.839 Preprocessor1_Model02
    +#>  3 14.9      3.93e- 4 roc_auc binary         0.870 Preprocessor1_Model03
    +#>  4  5.51     2.10e- 3 roc_auc binary         0.919 Preprocessor1_Model04
    +#>  5  1.87     3.53e- 7 roc_auc binary         0.838 Preprocessor1_Model05
    +#>  6  0.00719  1.45e- 5 roc_auc binary         0.832 Preprocessor1_Model06
    +#>  7  0.00114  8.41e- 2 roc_auc binary         0.969 Preprocessor1_Model07
    +#>  8  0.950    1.74e- 1 roc_auc binary         0.984 Preprocessor1_Model08
    +#>  9  0.189    3.13e- 6 roc_auc binary         0.832 Preprocessor1_Model09
    +#> 10  0.0364   4.96e- 9 roc_auc binary         0.839 Preprocessor1_Model10
    +
    +

    To get the final resampling estimates, the collect_metrics() function can be used on the grid object:

    +
    +
    estimates <- collect_metrics(formula_res)
    +estimates
    +#> # A tibble: 10 × 8
    +#>        cost rbf_sigma .metric .estimator  mean     n std_err .config            
    +#>       <dbl>     <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>              
    +#>  1  0.00849  1.11e-10 roc_auc binary     0.822    30 0.00718 Preprocessor1_Mode…
    +#>  2  0.176    7.28e- 8 roc_auc binary     0.871    30 0.00525 Preprocessor1_Mode…
    +#>  3 14.9      3.93e- 4 roc_auc binary     0.916    30 0.00497 Preprocessor1_Mode…
    +#>  4  5.51     2.10e- 3 roc_auc binary     0.960    30 0.00378 Preprocessor1_Mode…
    +#>  5  1.87     3.53e- 7 roc_auc binary     0.871    30 0.00524 Preprocessor1_Mode…
    +#>  6  0.00719  1.45e- 5 roc_auc binary     0.871    30 0.00534 Preprocessor1_Mode…
    +#>  7  0.00114  8.41e- 2 roc_auc binary     0.966    30 0.00301 Preprocessor1_Mode…
    +#>  8  0.950    1.74e- 1 roc_auc binary     0.979    30 0.00204 Preprocessor1_Mode…
    +#>  9  0.189    3.13e- 6 roc_auc binary     0.871    30 0.00536 Preprocessor1_Mode…
    +#> 10  0.0364   4.96e- 9 roc_auc binary     0.871    30 0.00537 Preprocessor1_Mode…
    +
    +

    The top combinations are:

    +
    +
    show_best(formula_res, metric = "roc_auc")
    +#> # A tibble: 5 × 8
    +#>       cost rbf_sigma .metric .estimator  mean     n std_err .config             
    +#>      <dbl>     <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>               
    +#> 1  0.950   0.174     roc_auc binary     0.979    30 0.00204 Preprocessor1_Model…
    +#> 2  0.00114 0.0841    roc_auc binary     0.966    30 0.00301 Preprocessor1_Model…
    +#> 3  5.51    0.00210   roc_auc binary     0.960    30 0.00378 Preprocessor1_Model…
    +#> 4 14.9     0.000393  roc_auc binary     0.916    30 0.00497 Preprocessor1_Model…
    +#> 5  0.00719 0.0000145 roc_auc binary     0.871    30 0.00534 Preprocessor1_Model…
    +
    +
    +
    +

    Executing with a recipe

    +

    Next, we can use the same syntax but pass a recipe in as the pre-processor argument:

    +
    +
    set.seed(325)
    +recipe_res <-
    +  svm_mod %>% 
    +  tune_grid(
    +    iono_rec,
    +    resamples = iono_rs,
    +    metrics = roc_vals,
    +    control = ctrl
    +  )
    +recipe_res
    +#> # Tuning results
    +#> # Bootstrap sampling 
    +#> # A tibble: 30 × 5
    +#>    splits            id          .metrics          .notes           .predictions
    +#>    <list>            <chr>       <list>            <list>           <list>      
    +#>  1 <split [351/120]> Bootstrap01 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  2 <split [351/130]> Bootstrap02 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  3 <split [351/137]> Bootstrap03 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  4 <split [351/141]> Bootstrap04 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  5 <split [351/131]> Bootstrap05 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  6 <split [351/131]> Bootstrap06 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  7 <split [351/127]> Bootstrap07 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  8 <split [351/123]> Bootstrap08 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#>  9 <split [351/131]> Bootstrap09 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#> 10 <split [351/117]> Bootstrap10 <tibble [10 × 6]> <tibble [0 × 3]> <tibble>    
    +#> # ℹ 20 more rows
    +
    +

    The best setting here is:

    +
    +
    show_best(recipe_res, metric = "roc_auc")
    +#> # A tibble: 5 × 8
    +#>      cost rbf_sigma .metric .estimator  mean     n std_err .config              
    +#>     <dbl>     <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
    +#> 1 15.6    0.182     roc_auc binary     0.981    30 0.00213 Preprocessor1_Model04
    +#> 2  0.385  0.0276    roc_auc binary     0.978    30 0.00222 Preprocessor1_Model03
    +#> 3  0.143  0.00243   roc_auc binary     0.930    30 0.00443 Preprocessor1_Model06
    +#> 4  0.841  0.000691  roc_auc binary     0.892    30 0.00504 Preprocessor1_Model07
    +#> 5  0.0499 0.0000335 roc_auc binary     0.872    30 0.00521 Preprocessor1_Model08
    +
    +
    +
    +

    Out-of-sample predictions

    +

    If we used save_pred = TRUE to keep the out-of-sample predictions for each resample during tuning, we can obtain those predictions, along with the tuning parameters and resample identifier, using collect_predictions():

    +
    +
    collect_predictions(recipe_res)
    +#> # A tibble: 38,740 × 8
    +#>    id          .pred_bad .pred_good  .row    cost  rbf_sigma Class .config      
    +#>    <chr>           <dbl>      <dbl> <int>   <dbl>      <dbl> <fct> <chr>        
    +#>  1 Bootstrap01     0.333      0.667     1 0.00296 0.00000383 good  Preprocessor…
    +#>  2 Bootstrap01     0.333      0.667     9 0.00296 0.00000383 good  Preprocessor…
    +#>  3 Bootstrap01     0.333      0.667    10 0.00296 0.00000383 bad   Preprocessor…
    +#>  4 Bootstrap01     0.333      0.667    12 0.00296 0.00000383 bad   Preprocessor…
    +#>  5 Bootstrap01     0.333      0.667    14 0.00296 0.00000383 bad   Preprocessor…
    +#>  6 Bootstrap01     0.333      0.667    15 0.00296 0.00000383 good  Preprocessor…
    +#>  7 Bootstrap01     0.333      0.667    16 0.00296 0.00000383 bad   Preprocessor…
    +#>  8 Bootstrap01     0.334      0.666    22 0.00296 0.00000383 bad   Preprocessor…
    +#>  9 Bootstrap01     0.333      0.667    23 0.00296 0.00000383 good  Preprocessor…
    +#> 10 Bootstrap01     0.334      0.666    24 0.00296 0.00000383 bad   Preprocessor…
    +#> # ℹ 38,730 more rows
    +
    +

    We can obtain the hold-out sets for all the resamples augmented with the predictions using augment(), which provides opportunities for flexible visualization of model results:

    +
    +
    augment(recipe_res) %>%
    +  ggplot(aes(V3, .pred_good, color = Class)) +
    +  geom_point(show.legend = FALSE) +
    +  facet_wrap(~Class)
    +
    +
    +
    +

    +
    +
    +
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  kernlab    * 0.9-32  2023-01-31 [1] CRAN (R 4.3.0)
    +#>  mlbench    * 2.1-3.1 2023-05-05 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/learn/work/tune-text/figs/grid-plot-1.svg b/docs/learn/work/tune-text/figs/grid-plot-1.svg similarity index 100% rename from content/learn/work/tune-text/figs/grid-plot-1.svg rename to docs/learn/work/tune-text/figs/grid-plot-1.svg diff --git a/content/learn/work/tune-text/figs/iter-plot-1.svg b/docs/learn/work/tune-text/figs/iter-plot-1.svg similarity index 100% rename from content/learn/work/tune-text/figs/iter-plot-1.svg rename to docs/learn/work/tune-text/figs/iter-plot-1.svg diff --git a/content/learn/work/tune-text/figs/var-plot-1.svg b/docs/learn/work/tune-text/figs/var-plot-1.svg similarity index 100% rename from content/learn/work/tune-text/figs/var-plot-1.svg rename to docs/learn/work/tune-text/figs/var-plot-1.svg diff --git a/docs/learn/work/tune-text/index.html b/docs/learn/work/tune-text/index.html new file mode 100644 index 00000000..42a12276 --- /dev/null +++ b/docs/learn/work/tune-text/index.html @@ -0,0 +1,1221 @@ + + + + + + + + + + +tidymodels - Tuning text models + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Tuning text models

    +
    +
    model tuning
    +
    text analysis
    +
    logistic regression
    +
    Bayesian optimization
    +
    extracting results
    +
    +
    + +
    +
    +

    Prepare text data for predictive modeling and tune with both grid and iterative search.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    To use code in this article, you will need to install the following packages: stopwords, textfeatures, textrecipes, and tidymodels.

    +

    This article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on.

    +
    +
    +

    Text as data

    +

    The text data we’ll use in this article are from Amazon:

    +
    +

    This dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review.

    +
    +

    This article uses a small subset of the total reviews available at the original source. We sampled a single review from 5,000 random products and allocated 80% of these data to the training set, with the remaining 1,000 reviews held out for the test set.

    +

    There is a column for the product, a column for the text of the review, and a factor column for the outcome variable. The outcome is whether the reviewer gave the product a five-star rating or not.

    +
    +
    library(tidymodels)
    +
    +data("small_fine_foods")
    +training_data
    +#> # A tibble: 4,000 × 3
    +#>    product    review                                                       score
    +#>    <chr>      <chr>                                                        <fct>
    +#>  1 B000J0LSBG "this stuff is  not stuffing  its  not good at all  save yo… other
    +#>  2 B000EYLDYE "I absolutely LOVE this dried fruit.  LOVE IT.  Whenever I … great
    +#>  3 B0026LIO9A "GREAT DEAL, CONVENIENT TOO.  Much cheaper than WalMart and… great
    +#>  4 B00473P8SK "Great flavor, we go through a ton of this sauce! I discove… great
    +#>  5 B001SAWTNM "This is excellent salsa/hot sauce, but you can get it for … great
    +#>  6 B000FAG90U "Again, this is the best dogfood out there.  One suggestion… great
    +#>  7 B006BXTCEK "The box I received was filled with teas, hot chocolates, a… other
    +#>  8 B002GWH5OY "This is delicious coffee which compares favorably with muc… great
    +#>  9 B003R0MFYY "Don't let these little tiny cans fool you.  They pack a lo… great
    +#> 10 B001EO5ZXI "One of the nicest, smoothest cup of chai I've made. Nice m… great
    +#> # ℹ 3,990 more rows
    +
    +

    Our modeling goal is to create modeling features from the text of the reviews to predict whether the review was five-star or not.

    +
    + +
    +

    Resampling

    +

    There are enough data here so that 10-fold resampling would hold out 400 reviews at a time to estimate performance. Performance estimates using this many observations have sufficiently low noise to measure and tune models.

    +
    +
    set.seed(8935)
    +folds <- vfold_cv(training_data)
    +folds
    +#> #  10-fold cross-validation 
    +#> # A tibble: 10 × 2
    +#>    splits             id    
    +#>    <list>             <chr> 
    +#>  1 <split [3600/400]> Fold01
    +#>  2 <split [3600/400]> Fold02
    +#>  3 <split [3600/400]> Fold03
    +#>  4 <split [3600/400]> Fold04
    +#>  5 <split [3600/400]> Fold05
    +#>  6 <split [3600/400]> Fold06
    +#>  7 <split [3600/400]> Fold07
    +#>  8 <split [3600/400]> Fold08
    +#>  9 <split [3600/400]> Fold09
    +#> 10 <split [3600/400]> Fold10
    +
    +
    + + +
    +

    Extracted results

    +

    Let’s return to the grid search results and examine the results of our extract function. For each fitted model, a tibble was saved that contains the relationship between the number of predictors and the penalty value. Let’s look at these results for the best model:

    +
    +
    params <- select_best(five_star_glmnet, metric = "roc_auc")
    +params
    +#> # A tibble: 1 × 4
    +#>   penalty mixture num_terms .config               
    +#>     <dbl>   <dbl>     <dbl> <chr>                 
    +#> 1   0.695    0.01      4096 Preprocessor3_Model019
    +
    +

    Recall that we saved the glmnet results in a tibble. The column five_star_glmnet$.extracts is a list of tibbles. As an example, the first element of the list is:

    +
    +
    five_star_glmnet$.extracts[[1]]
    +#> # A tibble: 300 × 5
    +#>    num_terms penalty mixture .extracts          .config               
    +#>        <dbl>   <dbl>   <dbl> <list>             <chr>                 
    +#>  1       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model001
    +#>  2       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model002
    +#>  3       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model003
    +#>  4       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model004
    +#>  5       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model005
    +#>  6       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model006
    +#>  7       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model007
    +#>  8       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model008
    +#>  9       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model009
    +#> 10       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model010
    +#> # ℹ 290 more rows
    +
    +

    More nested tibbles! Let’s unnest() the five_star_glmnet$.extracts column:

    +
    +
    library(tidyr)
    +extracted <- 
    +  five_star_glmnet %>% 
    +  dplyr::select(id, .extracts) %>% 
    +  unnest(cols = .extracts)
    +extracted
    +#> # A tibble: 3,000 × 6
    +#>    id     num_terms penalty mixture .extracts          .config               
    +#>    <chr>      <dbl>   <dbl>   <dbl> <list>             <chr>                 
    +#>  1 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model001
    +#>  2 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model002
    +#>  3 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model003
    +#>  4 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model004
    +#>  5 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model005
    +#>  6 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model006
    +#>  7 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model007
    +#>  8 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model008
    +#>  9 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model009
    +#> 10 Fold01       256       1    0.01 <tibble [100 × 2]> Preprocessor1_Model010
    +#> # ℹ 2,990 more rows
    +
    +

    One thing to realize here is that tune_grid() may not fit all of the models that are evaluated. In this case, for each value of mixture and num_terms, the model is fit over all penalty values (this is a feature of this particular model and is not generally true for other engines). To select the best parameter set, we can exclude the penalty column in extracted:

    +
    +
    extracted <- 
    +  extracted %>% 
    +  dplyr::select(-penalty) %>% 
    +  inner_join(params, by = c("num_terms", "mixture")) %>% 
    +  # Now remove it from the final results
    +  dplyr::select(-penalty)
    +extracted
    +#> # A tibble: 200 × 6
    +#>    id     num_terms mixture .extracts          .config.x              .config.y 
    +#>    <chr>      <dbl>   <dbl> <list>             <chr>                  <chr>     
    +#>  1 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model001 Preproces…
    +#>  2 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model002 Preproces…
    +#>  3 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model003 Preproces…
    +#>  4 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model004 Preproces…
    +#>  5 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model005 Preproces…
    +#>  6 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model006 Preproces…
    +#>  7 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model007 Preproces…
    +#>  8 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model008 Preproces…
    +#>  9 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model009 Preproces…
    +#> 10 Fold01      4096    0.01 <tibble [100 × 2]> Preprocessor3_Model010 Preproces…
    +#> # ℹ 190 more rows
    +
    +

    Now we can get at the results that we want using another unnest():

    +
    +
    extracted <- 
    +  extracted %>% 
    +  unnest(col = .extracts) # <- these contain a `penalty` column
    +extracted
    +#> # A tibble: 20,000 × 7
    +#>    id     num_terms mixture penalty num_vars .config.x              .config.y   
    +#>    <chr>      <dbl>   <dbl>   <dbl>    <int> <chr>                  <chr>       
    +#>  1 Fold01      4096    0.01    8.60        0 Preprocessor3_Model001 Preprocesso…
    +#>  2 Fold01      4096    0.01    8.21        2 Preprocessor3_Model001 Preprocesso…
    +#>  3 Fold01      4096    0.01    7.84        2 Preprocessor3_Model001 Preprocesso…
    +#>  4 Fold01      4096    0.01    7.48        3 Preprocessor3_Model001 Preprocesso…
    +#>  5 Fold01      4096    0.01    7.14        3 Preprocessor3_Model001 Preprocesso…
    +#>  6 Fold01      4096    0.01    6.82        3 Preprocessor3_Model001 Preprocesso…
    +#>  7 Fold01      4096    0.01    6.51        4 Preprocessor3_Model001 Preprocesso…
    +#>  8 Fold01      4096    0.01    6.21        6 Preprocessor3_Model001 Preprocesso…
    +#>  9 Fold01      4096    0.01    5.93        7 Preprocessor3_Model001 Preprocesso…
    +#> 10 Fold01      4096    0.01    5.66        7 Preprocessor3_Model001 Preprocesso…
    +#> # ℹ 19,990 more rows
    +
    +

    Let’s look at a plot of these results (per resample):

    +
    +
    ggplot(extracted, aes(x = penalty, y = num_vars)) + 
    +  geom_line(aes(group = id, col = id), alpha = .5) + 
    +  ylab("Number of retained predictors") + 
    +  scale_x_log10()  + 
    +  ggtitle(paste("mixture = ", params$mixture, "and", params$num_terms, "features")) + 
    +  theme(legend.position = "none")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    These results might help guide the choice of the penalty range if more optimization was conducted.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package      * version date (UTC) lib source
    +#>  broom        * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials        * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr        * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2      * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer        * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip      * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr        * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes      * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang          1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample      * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  stopwords    * 2.3     2021-10-28 [1] CRAN (R 4.3.0)
    +#>  textfeatures * 0.3.3   2019-09-03 [1] CRAN (R 4.3.0)
    +#>  textrecipes  * 1.0.3   2023-04-14 [1] CRAN (R 4.3.0)
    +#>  tibble       * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels   * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune         * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows    * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick    * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/listings.json b/docs/listings.json new file mode 100644 index 00000000..b8a2d97f --- /dev/null +++ b/docs/listings.json @@ -0,0 +1,43 @@ +[ + { + "listing": "/books/index.html", + "items": [ + "/books/fes/index.html", + "/books/moderndive/index.html", + "/books/smltar/index.html", + "/books/tidytext/index.html", + "/books/tmwr/index.html" + ] + }, + { + "listing": "/learn/index.html", + "items": [ + "/start/case-study/index.html", + "/learn/statistics/bootstrap/index.html", + "/start/models/index.html", + "/learn/models/parsnip-nnet/index.html", + "/learn/statistics/tidy-analysis/index.html", + "/learn/develop/broom/index.html", + "/learn/develop/recipes/index.html", + "/learn/work/case-weights/index.html", + "/learn/develop/metrics/index.html", + "/start/resampling/index.html", + "/learn/develop/models/index.html", + "/learn/develop/parameters/index.html", + "/learn/statistics/infer/index.html", + "/learn/work/bayes-opt/index.html", + "/learn/statistics/k-means/index.html", + "/learn/work/tune-svm/index.html", + "/learn/models/time-series/index.html", + "/learn/models/pls/index.html", + "/learn/work/nested-resampling/index.html", + "/start/recipes/index.html", + "/learn/models/parsnip-ranger-glmnet/index.html", + "/learn/statistics/xtabs/index.html", + "/learn/models/sub-sampling/index.html", + "/start/tuning/index.html", + "/learn/work/tune-text/index.html", + "/learn/models/coefficients/index.html" + ] + } +] \ No newline at end of file diff --git a/docs/packages/index.html b/docs/packages/index.html new file mode 100644 index 00000000..892e325e --- /dev/null +++ b/docs/packages/index.html @@ -0,0 +1,585 @@ + + + + + + + + + +tidymodels - Tidymodels packages + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    Tidymodels packages

    +
    + + + +
    + + + + +
    + + +
    + +
    +

    Installation and use

    +
      +
    • Install many of the packages in the tidymodels ecosystem by running install.packages("tidymodels").

    • +
    • Run library(tidymodels) to load the core packages and make them available in your current R session.

    • +
    +
    +
    + +
    +
    + +
    +

    + tidymodels +

    +

    +tidymodels is a meta-package that installs and load the core packages listed below that you need for modeling and machine learning. +

    +
    +
    +
    + +
    +

    +rsample +

    +

    +rsample provides infrastructure for efficient data splitting and resampling. +

    +
    +
    +
    + +
    +

    + parsnip +

    +

    +parsnip is a tidy, unified interface to models that can be used to try a range of models without getting bogged down in the syntactical minutiae of the underlying packages. +

    +
    +
    +
    + +
    +

    + recipes +

    +

    +recipes is a tidy interface to data pre-processing tools for feature engineering. +

    +
    +
    +
    + +
    +

    + workflows +

    +

    +workflows bundle your pre-processing, modeling, and post-processing together. +

    +
    +
    +
    + +
    +

    + tune +

    +

    +tune helps you optimize the hyperparameters of your model and pre-processing steps. +

    +
    +
    +
    + +
    +

    + yardstick +

    +

    +yardstick measures the effectiveness of models using performance metrics. +

    +
    +
    +
    + +
    +

    + broom +

    +

    +broom converts the information in common statistical R objects into user-friendly, predictable formats. +

    +
    +
    +
    + +
    +

    + dials +

    +

    +dials creates and manages tuning parameters and parameter grids. +

    +
    +
    +
    +
    +

    Learn more about the tidymodels metapackage itself at https://tidymodels.tidymodels.org/.

    +
    +

    Specialized packages

    +

    The tidymodels framework also includes many other packages designed for specialized data analysis and modeling tasks. They are not loaded automatically with library(tidymodels), so you’ll need to load each one with its own call to library(). These packages include:

    +
    +

    Perform statistical analysis

    +
      +
    • infer is a high-level API for tidyverse-friendly statistical inference.

    • +
    • The corrr package has tidy interfaces for working with correlation matrices.

    • +
    +
    +
    +

    Create robust models

    +
      +
    • The spatialsample package provides resampling functions and classes like rsample, but specialized for spatial data.

    • +
    • parsnip also has additional packages that contain more model definitions. discrim contains definitions for discriminant analysis models, poissonreg provides definitions for Poisson regression models, plsmod enables linear projection models, and rules does the same for rule-based classification and regression models. baguette creates ensemble models via bagging, and multilevelmod provides support for multilevel models (otherwise known as mixed models or hierarchical models).

    • +
    • There are several add-on packages for creating recipes. embed contains steps to create embeddings or projections of predictors. textrecipes has extra steps for text processing, and themis can help alleviate class imbalance using sampling methods.

    • +
    • tidypredict and modeldb can convert prediction equations to different languages (e.g. SQL) and fit some models in-database.

    • +
    +
    +
    +

    Tune, compare, and work with your models

    +
      +
    • To try out multiple different workflows (i.e. bundles of pre-processor and model) at once, workflowsets lets you create sets of workflow objects for tuning and resampling.

    • +
    • To integrate predictions from many models, the stacks package provides tools for stacked ensemble modeling.

    • +
    • The finetune package extends the tune package with more approaches such as racing and simulated annealing.

    • +
    • The usemodels package creates templates and automatically generates code to fit and tune models.

    • +
    • probably has tools for post-processing class probability estimates.

    • +
    • The tidyposterior package enables users to make formal statistical comparisons between models using resampling and Bayesian methods.

    • +
    • Some R objects become inconveniently large when saved to disk. The butcher package can reduce the size of those objects by removing the sub-components.

    • +
    • To know whether the data that you are predicting are extrapolations from the training set, applicable can produce metrics that measure extrapolation.

    • +
    • shinymodels lets you explore tuning or resampling results via a Shiny app.

    • +
    +
    +
    +

    Develop custom modeling tools

    +
      +
    • hardhat is a developer-focused package that helps beginners create high-quality R packages for modeling.
    • +
    + + +
    +
    + +
    + +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + + +
    + + + + + \ No newline at end of file diff --git a/docs/search.json b/docs/search.json new file mode 100644 index 00000000..0933fbf5 --- /dev/null +++ b/docs/search.json @@ -0,0 +1,1479 @@ +[ + { + "objectID": "about/davis/index.html", + "href": "about/davis/index.html", + "title": "tidymodels", + "section": "", + "text": "Davis Vaughan is a Software Engineer at RStudio. He is the author or maintainer of several R packages for finance and data analytics, including tidyquant, timetk, tibbletime, sweep, rray and hardhat. He is well-known for this work around creating modeling packages in R." + }, + { + "objectID": "about/max/index.html", + "href": "about/max/index.html", + "title": "tidymodels", + "section": "", + "text": "Dr. Max Kuhn is a Software Engineer at RStudio. He is the author or maintainer of several R packages for predictive modeling including caret, AppliedPredictiveModeling, Cubist, C50 and SparseLDA. He routinely teaches classes in predictive modeling at Predictive Analytics World and UseR! and his publications include work on neuroscience biomarkers, drug discovery, molecular diagnostics and response surface methodology." + }, + { + "objectID": "books/fes/index.html", + "href": "books/fes/index.html", + "title": "Feature Engineering & Selection", + "section": "", + "text": "A primary goal of predictive modeling is to find a reliable and effective predictive relationship between an available set of features and an outcome. This book provides an extensive set of techniques for uncovering effective representations of the features for modeling the outcome and for finding an optimal subset of features to improve a model’s predictive performance.\nAn HTML version of this text can be found at https://bookdown.org/max/FES.\nThe data sets and R code are available in the GitHub repository https://github.com/topepo/FES.\nThe physical copies are sold by Amazon and Taylor & Francis." + }, + { + "objectID": "books/index.html", + "href": "books/index.html", + "title": "Books", + "section": "", + "text": "Feature Engineering & Selection\n\n\n\n\n\nA Practical Approach for Predictive Models\n\n\n\n\n \n\n\n\n\n \n\n\n\n\nStatistical Inference via Data Science\n\n\n\n\n\nA ModernDive into R and the Tidyverse\n\n\n\n\n \n\n\n\n\n \n\n\n\n\nSupervised Machine Learning for Text Analysis in R\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n \n\n\n\n\nText Mining with R\n\n\n\n\n\nA Tidy Approach\n\n\n\n\n \n\n\n\n\n \n\n\n\n\nTidy Modeling with R\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\nNo matching items" + }, + { + "objectID": "books/moderndive/index.html", + "href": "books/moderndive/index.html", + "title": "Statistical Inference via Data Science", + "section": "", + "text": "This book is intended to be a gentle introduction to the practice of analyzing data and answering questions using data the way data scientists, statisticians, data journalists, and other researchers would. Over the course of this book, you will develop your “data science toolbox,” equipping yourself with tools such as data visualization, data formatting, data wrangling, data modeling using regression, and statistical inference via hypothesis testing and confidence intervals.\nAn HTML version of this text can be found at https://moderndive.com/.\nThe data sets and R code are available in the GitHub repository https://github.com/moderndive/ModernDive_book and also the accompanying moderndive R package on CRAN.\nThe physical copies are sold by Amazon and CRC Press." + }, + { + "objectID": "books/smltar/index.html", + "href": "books/smltar/index.html", + "title": "Supervised Machine Learning for Text Analysis in R", + "section": "", + "text": "This book explains how to preprocess text data for modeling, train models, and evaluate model performance using tools from the tidyverse and tidymodels ecosystem. Models like these can be used to make predictions for new observations, to understand what natural language features or characteristics contribute to differences in the output, and more. If you are already familiar with the basics of predictive modeling, use the comprehensive, detailed examples in this book to extend your skills to the domain of natural language processing.\nThis book provides practical guidance and directly applicable knowledge for data scientists and analysts who want to integrate unstructured text data into their modeling pipelines. Learn how to use text data for both regression and classification tasks, and how to apply more straightforward algorithms like regularized regression or support vector machines as well as deep learning approaches. Natural language must be dramatically transformed to be ready for computation, so we explore typical text preprocessing and feature engineering steps like tokenization and word embeddings from the ground up. These steps influence model results in ways we can measure, both in terms of model metrics and other tangible consequences such as how fair or appropriate model results are.\nAn HTML version of this text can be found at https://smltar.com/.\nThe data sets and R code are available in the GitHub repository https://github.com/EmilHvitfeldt/smltar.\nThe physical copies are sold by Amazon and CRC Press." + }, + { + "objectID": "books/tidytext/index.html", + "href": "books/tidytext/index.html", + "title": "Text Mining with R", + "section": "", + "text": "This practical book provides an introduction to text mining using tidy data principles in R, focusing on exploratory data analysis for text. Using tidy data principles can make text mining task easier and more effective; in this book, learn how to manipulate, summarize, and visualize characteristics of text using these methods and R packages from the tidy tool ecosystem.\nAn HTML version of this text can be found at https://www.tidytextmining.com/.\nThe data sets and R code are available in the GitHub repository https://github.com/dgrtwo/tidy-text-mining.\nThe physical copies are sold by Amazon and O’Reilly." + }, + { + "objectID": "books/tmwr/index.html", + "href": "books/tmwr/index.html", + "title": "Tidy Modeling with R", + "section": "", + "text": "Modeling of data is integral to science, business, politics, and many other aspects of our lives. The goals of this book are to:\n\nintroduce and demonstrate how to use the tidymodels packages, and\noutline good practices for the phases of the modeling process.\n\nAn HTML version of this text can be found at https://tmwr.org.\nThe sources to create the book are available in the GitHub repository https://github.com/tidymodels/TMwR." + }, + { + "objectID": "contribute/index.html", + "href": "contribute/index.html", + "title": "How to contribute to tidymodels", + "section": "", + "text": "The ecosystem of tidymodels packages would not be possible without the contributions of the R community. No matter your current skills, it’s possible to contribute back to tidymodels. Contributions are guided by our design goals." + }, + { + "objectID": "contribute/index.html#design-goals", + "href": "contribute/index.html#design-goals", + "title": "How to contribute to tidymodels", + "section": "Design goals", + "text": "Design goals\nThe goals of tidymodels packages are to:\n\nEncourage empirical validation and good statistical practice.\nSmooth out heterogeneous interfaces.\nEstablish highly reusable infrastructure.\nEnable a wider variety of methodologies.\nHelp package developers quickly build high quality model packages of their own.\n\nThese goals are guided by our principles for creating modeling packages.\nWhat are different ways you can contribute?" + }, + { + "objectID": "contribute/index.html#answer-questions", + "href": "contribute/index.html#answer-questions", + "title": "How to contribute to tidymodels", + "section": "Answer questions", + "text": "Answer questions\nYou can help others use and learn tidymodels by answering questions on the RStudio community site, Stack Overflow, and Twitter. Many people asking for help with tidymodels don’t know what a reprex is or how to craft one. Acknowledging an individual’s problem, showing them how to build a reprex, and pointing them to helpful resources are all enormously beneficial, even if you don’t immediately solve their problem.\nRemember that while you might have seen a problem a hundred times before, it’s new to the person asking it. Be patient, polite, and empathic." + }, + { + "objectID": "contribute/index.html#file-issues", + "href": "contribute/index.html#file-issues", + "title": "How to contribute to tidymodels", + "section": "File issues", + "text": "File issues\nIf you’ve found a bug, first create a minimal reprex. Spend some time working to make it as minimal as possible; the more time you spend doing this, the easier it is to fix the bug. When your reprex is ready, file it on the GitHub repo of the appropriate package.\nThe tidymodels team often focuses on one package at a time to reduce context switching and be more efficient. We may not address each issue right away, but we will use the reprex you create to understand your problem when it is time to focus on that package." + }, + { + "objectID": "contribute/index.html#contribute-documentation", + "href": "contribute/index.html#contribute-documentation", + "title": "How to contribute to tidymodels", + "section": "Contribute documentation", + "text": "Contribute documentation\nDocumentation is a high priority for tidymodels, and pull requests to correct or improve documentation are welcome. The most important thing to know is that tidymodels packages use roxygen2; this means that documentation is found in the R code close to the source of each function. There are some special tags, but most tidymodels packages now use markdown in the documentation. This makes it particularly easy to get started!" + }, + { + "objectID": "contribute/index.html#contribute-code", + "href": "contribute/index.html#contribute-code", + "title": "How to contribute to tidymodels", + "section": "Contribute code", + "text": "Contribute code\nIf you are a more experienced R programmer, you may have the inclination, interest, and ability to contribute directly to package development. Before you submit a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal.\nIn tidymodels packages, we use the tidyverse style guide which will make sure that your new code and documentation matches the existing style. This makes the review process much smoother.\nThe tidymodels packages are explicitly built to support the creation of other modeling packages, and we would love to hear about what you build yourself! Check out our learning resources for developing custom modeling tools." + }, + { + "objectID": "find/all/index.html", + "href": "find/all/index.html", + "title": "Search all of tidymodels", + "section": "", + "text": "Here are all the functions available across all of the tidymodels packages. Click on the link in the topic column to find the relevant reference documentation." + }, + { + "objectID": "find/broom/index.html", + "href": "find/broom/index.html", + "title": "Search broom methods", + "section": "", + "text": "Here are all the broom functions available across CRAN packages. Click on the link in the topic column to find more information." + }, + { + "objectID": "find/index.html", + "href": "find/index.html", + "title": "Explore tidymodels", + "section": "", + "text": "Below you’ll find searchable tables to help you explore the tidymodels packages and functions. The tables also include links to the relevant reference page to help you navigate the package documentation. Use the following categories to guide you:\n\nSearch all of tidymodels\nSearch parsnip models\nSearch recipe steps\nSearch broom methods\n\n\nBooks\nIf you want to read more long form there are a number of books written about and using tidymodels\n\nList of books" + }, + { + "objectID": "find/parsnip/index.html", + "href": "find/parsnip/index.html", + "title": "Search parsnip models", + "section": "", + "text": "To learn about the parsnip package, see Get Started: Build a Model. Use the tables below to find model types and engines." + }, + { + "objectID": "find/recipes/index.html", + "href": "find/recipes/index.html", + "title": "Search recipe steps", + "section": "", + "text": "To learn about the recipes package, see Get Started: Preprocess your data with recipes. The table below allows you to search for recipe steps across tidymodels packages." + }, + { + "objectID": "help/index.html", + "href": "help/index.html", + "title": "Get Help", + "section": "", + "text": "If you’re asking for R help, reporting a bug, or requesting a new feature, you’re more likely to succeed if you include a good reproducible example, which is precisely what the reprex package is built for. You can learn more about reprex, along with other tips on how to help others help you in the tidyverse.org help section." + }, + { + "objectID": "help/index.html#asking-for-help", + "href": "help/index.html#asking-for-help", + "title": "Get Help", + "section": "", + "text": "If you’re asking for R help, reporting a bug, or requesting a new feature, you’re more likely to succeed if you include a good reproducible example, which is precisely what the reprex package is built for. You can learn more about reprex, along with other tips on how to help others help you in the tidyverse.org help section." + }, + { + "objectID": "help/index.html#where-to-ask", + "href": "help/index.html#where-to-ask", + "title": "Get Help", + "section": "Where to ask", + "text": "Where to ask\n\nNow that you’ve made a reprex, you need to share it in an appropriate forum. Here are some options:\n\ncommunity.rstudio.com: This is a warm and welcoming place to ask any questions you might have about tidymodels or more generally about modeling, machine learning, and deep learning. (You can also ask questions about the tidyverse and RStudio there, too!)\nStack Overflow. You’re probably already familiar with Stack Overflow from googling; it’s a frequent source of answers to coding related questions. Asking a question on Stack Overflow can be intimidating, but if you’ve taken the time to create a reprex, you’re much more likely to get a useful answer. Make sure to tag your question with r and tidymodels so that the right people are more likely to see it.\nTwitter and Mastodon. These sites are great places to share a link to your reprex that’s hosted elsewhere! The #rstats twitter and #rstats fosstodon communities are extremely friendly and active, and have great crowds to be a part of. Make sure you tag your tweet with #rstats and #tidymodels.\nIf you think you’ve found a bug, please follow the instructions for filing an issue on contributing to tidymodels." + }, + { + "objectID": "help/index.html#resources", + "href": "help/index.html#resources", + "title": "Get Help", + "section": "Resources", + "text": "Resources\n\nSee what you need to know to get started with tidymodels, and learn more about using tidymodels for specific tasks.\nEach tidymodels package has its own documentation site, full of helpful information. Find links to all package documentation sites and explore them!\nSearch all tidymodels functions, and check out our books on these topics.\nStay up to date with the latest news about tidymodels through our posts on the tidyverse blog." + }, + { + "objectID": "learn/develop/broom/index.html", + "href": "learn/develop/broom/index.html", + "title": "Create your own broom tidier methods", + "section": "", + "text": "To use code in this article, you will need to install the following packages: generics, tidymodels, tidyverse, and usethis.\nThe broom package provides tools to summarize key information about models in tidy tibble()s. The package provides three verbs, or “tidiers,” to help make model objects easier to work with:\n\ntidy() summarizes information about model components\nglance() reports information about the entire model\naugment() adds information about observations to a dataset\n\nEach of the three verbs above are generic, in that they do not define a procedure to tidy a given model object, but instead redirect to the relevant method implemented to tidy a specific type of model object. The broom package provides methods for model objects from over 100 modeling packages along with nearly all of the model objects in the stats package that comes with base R. However, for maintainability purposes, the broom package authors now ask that requests for new methods be first directed to the parent package (i.e. the package that supplies the model object) rather than to broom. New methods will generally only be integrated into broom in the case that the requester has already asked the maintainers of the model-owning package to implement tidier methods in the parent package.\nWe’d like to make implementing external tidier methods as painless as possible. The general process for doing so is:\n\nre-export the tidier generics\nimplement tidying methods\ndocument the new methods\n\nIn this article, we’ll walk through each of the above steps in detail, giving examples and pointing out helpful functions when possible." + }, + { + "objectID": "learn/develop/broom/index.html#introduction", + "href": "learn/develop/broom/index.html#introduction", + "title": "Create your own broom tidier methods", + "section": "", + "text": "To use code in this article, you will need to install the following packages: generics, tidymodels, tidyverse, and usethis.\nThe broom package provides tools to summarize key information about models in tidy tibble()s. The package provides three verbs, or “tidiers,” to help make model objects easier to work with:\n\ntidy() summarizes information about model components\nglance() reports information about the entire model\naugment() adds information about observations to a dataset\n\nEach of the three verbs above are generic, in that they do not define a procedure to tidy a given model object, but instead redirect to the relevant method implemented to tidy a specific type of model object. The broom package provides methods for model objects from over 100 modeling packages along with nearly all of the model objects in the stats package that comes with base R. However, for maintainability purposes, the broom package authors now ask that requests for new methods be first directed to the parent package (i.e. the package that supplies the model object) rather than to broom. New methods will generally only be integrated into broom in the case that the requester has already asked the maintainers of the model-owning package to implement tidier methods in the parent package.\nWe’d like to make implementing external tidier methods as painless as possible. The general process for doing so is:\n\nre-export the tidier generics\nimplement tidying methods\ndocument the new methods\n\nIn this article, we’ll walk through each of the above steps in detail, giving examples and pointing out helpful functions when possible." + }, + { + "objectID": "learn/develop/broom/index.html#re-export-the-tidier-generics", + "href": "learn/develop/broom/index.html#re-export-the-tidier-generics", + "title": "Create your own broom tidier methods", + "section": "Re-export the tidier generics", + "text": "Re-export the tidier generics\nThe first step is to re-export the generic functions for tidy(), glance(), and/or augment(). You could do so from broom itself, but we’ve provided an alternative, much lighter dependency called generics.\nFirst you’ll need to add the generics package to Imports. We recommend using the usethis package for this:\n\nusethis::use_package(\"generics\", \"Imports\")\n\nNext, you’ll need to re-export the appropriate tidying methods. If you plan to implement a glance() method, for example, you can re-export the glance() generic by adding the following somewhere inside the /R folder of your package:\n\n#' @importFrom generics glance\n#' @export\ngenerics::glance\n\nOftentimes it doesn’t make sense to define one or more of these methods for a particular model. In this case, only implement the methods that do make sense.\n\n\n\n\n\n\nWarning\n\n\n\nPlease do not define tidy(), glance(), or augment() generics in your package. This will result in namespace conflicts whenever your package is used along other packages that also export tidying methods." + }, + { + "objectID": "learn/develop/broom/index.html#implement-tidying-methods", + "href": "learn/develop/broom/index.html#implement-tidying-methods", + "title": "Create your own broom tidier methods", + "section": "Implement tidying methods", + "text": "Implement tidying methods\nYou’ll now need to implement specific tidying methods for each of the generics you’ve re-exported in the above step. For each of tidy(), glance(), and augment(), we’ll walk through the big picture, an example, and helpful resources.\nIn this article, we’ll use the base R dataset trees, giving the tree girth (in inches), height (in feet), and volume (in cubic feet), to fit an example linear model using the base R lm() function.\n\n# load in the trees dataset\ndata(trees)\n\n# take a look!\nstr(trees)\n#> 'data.frame': 31 obs. of 3 variables:\n#> $ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...\n#> $ Height: num 70 65 63 72 81 83 66 75 80 75 ...\n#> $ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...\n\n# fit the timber volume as a function of girth and height\ntrees_model <- lm(Volume ~ Girth + Height, data = trees)\n\nLet’s take a look at the summary() of our trees_model fit.\n\nsummary(trees_model)\n#> \n#> Call:\n#> lm(formula = Volume ~ Girth + Height, data = trees)\n#> \n#> Residuals:\n#> Min 1Q Median 3Q Max \n#> -6.4065 -2.6493 -0.2876 2.2003 8.4847 \n#> \n#> Coefficients:\n#> Estimate Std. Error t value Pr(>|t|) \n#> (Intercept) -57.9877 8.6382 -6.713 2.75e-07 ***\n#> Girth 4.7082 0.2643 17.816 < 2e-16 ***\n#> Height 0.3393 0.1302 2.607 0.0145 * \n#> ---\n#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#> \n#> Residual standard error: 3.882 on 28 degrees of freedom\n#> Multiple R-squared: 0.948, Adjusted R-squared: 0.9442 \n#> F-statistic: 255 on 2 and 28 DF, p-value: < 2.2e-16\n\nThis output gives some summary statistics on the residuals (which would be described more fully in an augment() output), model coefficients (which, in this case, make up the tidy() output), and some model-level summarizations such as RSE, \\(R^2\\), etc. (which make up the glance() output.)\n\nImplementing the tidy() method\nThe tidy(x, ...) method will return a tibble where each row contains information about a component of the model. The x input is a model object, and the dots (...) are an optional argument to supply additional information to any calls inside your method. New tidy() methods can take additional arguments, but must include the x and ... arguments to be compatible with the generic function. (For a glossary of currently acceptable additional arguments, see the end of this article.) Examples of model components include regression coefficients (for regression models), clusters (for classification/clustering models), etc. These tidy() methods are useful for inspecting model details and creating custom model visualizations.\nReturning to the example of our linear model on timber volume, we’d like to extract information on the model components. In this example, the components are the regression coefficients. After taking a look at the model object and its summary(), you might notice that you can extract the regression coefficients as follows:\n\nsummary(trees_model)$coefficients\n#> Estimate Std. Error t value Pr(>|t|)\n#> (Intercept) -57.9876589 8.6382259 -6.712913 2.749507e-07\n#> Girth 4.7081605 0.2642646 17.816084 8.223304e-17\n#> Height 0.3392512 0.1301512 2.606594 1.449097e-02\n\nThis object contains the model coefficients as a table, where the information giving which coefficient is being described in each row is given in the row names. Converting to a tibble where the row names are contained in a column, you might write:\n\ntrees_model_tidy <- summary(trees_model)$coefficients %>% \n as_tibble(rownames = \"term\")\n\ntrees_model_tidy\n#> # A tibble: 3 × 5\n#> term Estimate `Std. Error` `t value` `Pr(>|t|)`\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) -58.0 8.64 -6.71 2.75e- 7\n#> 2 Girth 4.71 0.264 17.8 8.22e-17\n#> 3 Height 0.339 0.130 2.61 1.45e- 2\n\nThe broom package standardizes common column names used to describe coefficients. In this case, the column names are:\n\ncolnames(trees_model_tidy) <- c(\"term\", \"estimate\", \"std.error\", \"statistic\", \"p.value\")\n\nA glossary giving the currently acceptable column names outputted by tidy() methods can be found at the end of this article. As a rule of thumb, column names resulting from tidy() methods should be all lowercase and contain only alphanumerics or periods (though there are plenty of exceptions).\nFinally, it is common for tidy() methods to include an option to calculate confidence/credible intervals for each component based on the model, when possible. In this example, the confint() function can be used to calculate confidence intervals from a model object resulting from lm():\n\nconfint(trees_model)\n#> 2.5 % 97.5 %\n#> (Intercept) -75.68226247 -40.2930554\n#> Girth 4.16683899 5.2494820\n#> Height 0.07264863 0.6058538\n\nWith these considerations in mind, a reasonable tidy() method for lm() might look something like:\n\ntidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {\n \n result <- summary(x)$coefficients %>%\n tibble::as_tibble(rownames = \"term\") %>%\n dplyr::rename(estimate = Estimate,\n std.error = `Std. Error`,\n statistic = `t value`,\n p.value = `Pr(>|t|)`)\n \n if (conf.int) {\n ci <- confint(x, level = conf.level)\n result <- dplyr::left_join(result, ci, by = \"term\")\n }\n \n result\n}\n\n\n\n\n\n\n\nNote\n\n\n\nIf you’re interested, the actual tidy.lm() source can be found here! It’s not too different from the version above except for some argument checking and additional columns.\n\n\nWith this method exported, then, if a user calls tidy(fit), where fit is an output from lm(), the tidy() generic would “redirect” the call to the tidy.lm() function above.\nSome things to keep in mind while writing your tidy() method:\n\nSometimes a model will have several different types of components. For example, in mixed models, there is different information associated with fixed effects and random effects. Since this information doesn’t have the same interpretation, it doesn’t make sense to summarize the fixed and random effects in the same table. In cases like this you should add an argument that allows the user to specify which type of information they want. For example, you might implement an interface along the lines of:\n\n\nmodel <- mixed_model(...)\ntidy(model, effects = \"fixed\")\ntidy(model, effects = \"random\")\n\n\nHow are missing values encoded in the model object and its summary()? Ensure that rows are included even when the associated model component is missing or rank deficient.\nAre there other measures specific to each component that could reasonably be expected to be included in their summarizations? Some common arguments to tidy() methods include:\n\nconf.int: A logical indicating whether or not to calculate confidence/credible intervals. This should default to FALSE.\nconf.level: The confidence level to use for the interval when conf.int = TRUE. Typically defaults to .95.\nexponentiate: A logical indicating whether or not model terms should be presented on an exponential scale (typical for logistic regression).\n\n\n\n\nImplementing the glance() method\nglance() returns a one-row tibble providing model-level summarizations (e.g. goodness of fit measures and related statistics). This is useful to check for model misspecification and to compare many models. Again, the x input is a model object, and the ... is an optional argument to supply additional information to any calls inside your method. New glance() methods can also take additional arguments and must include the x and ... arguments. (For a glossary of currently acceptable additional arguments, see the end of this article.)\nReturning to the trees_model example, we could pull out the \\(R^2\\) value with the following code:\n\nsummary(trees_model)$r.squared\n#> [1] 0.94795\n\nSimilarly, for the adjusted \\(R^2\\):\n\nsummary(trees_model)$adj.r.squared\n#> [1] 0.9442322\n\nUnfortunately, for many model objects, the extraction of model-level information is largely a manual process. You will likely need to build a tibble() element-by-element by subsetting the summary() object repeatedly. The with() function, however, can help make this process a bit less tedious by evaluating expressions inside of the summary(trees_model) environment. To grab those those same two model elements from above using with():\n\nwith(summary(trees_model),\n tibble::tibble(r.squared = r.squared,\n adj.r.squared = adj.r.squared))\n#> # A tibble: 1 × 2\n#> r.squared adj.r.squared\n#> <dbl> <dbl>\n#> 1 0.948 0.944\n\nA reasonable glance() method for lm(), then, might look something like:\n\nglance.lm <- function(x, ...) {\n with(\n summary(x),\n tibble::tibble(\n r.squared = r.squared,\n adj.r.squared = adj.r.squared,\n sigma = sigma,\n statistic = fstatistic[\"value\"],\n p.value = pf(\n fstatistic[\"value\"],\n fstatistic[\"numdf\"],\n fstatistic[\"dendf\"],\n lower.tail = FALSE\n ),\n df = fstatistic[\"numdf\"],\n logLik = as.numeric(stats::logLik(x)),\n AIC = stats::AIC(x),\n BIC = stats::BIC(x),\n deviance = stats::deviance(x),\n df.residual = df.residual(x),\n nobs = stats::nobs(x)\n )\n )\n}\n\n\n\n\n\n\n\nNote\n\n\n\nThis is the actual definition of glance.lm() provided by broom!\n\n\nSome things to keep in mind while writing glance() methods: * Output should not include the name of the modeling function or any arguments given to the modeling function. * In some cases, you may wish to provide model-level diagnostics not returned by the original object. For example, the above glance.lm() calculates AIC and BIC from the model fit. If these are easy to compute, feel free to add them. However, tidier methods are generally not an appropriate place to implement complex or time consuming calculations. * The glance method should always return the same columns in the same order when given an object of a given model class. If a summary metric (such as AIC) is not defined in certain circumstances, use NA.\n\n\nImplementing the augment() method\naugment() methods add columns to a dataset containing information such as fitted values, residuals or cluster assignments. All columns added to a dataset have a . prefix to prevent existing columns from being overwritten. (Currently acceptable column names are given in the glossary.) The x and ... arguments share their meaning with the two functions described above. augment methods also optionally accept a data argument that is a data.frame (or tibble) to add observation-level information to, returning a tibble object with the same number of rows as data. Many augment() methods also accept a newdata argument, following the same conventions as the data argument, except with the underlying assumption that the model has not “seen” the data yet. As a result, newdata arguments need not contain the response columns in data. Only one of data or newdata should be supplied. A full glossary of acceptable arguments to augment() methods can be found at the end of this article.\nIf a data argument is not specified, augment() should try to reconstruct the original data as much as possible from the model object. This may not always be possible, and often it will not be possible to recover columns not used by the model.\nWith this is mind, we can look back to our trees_model example. For one, the model element inside of the trees_model object will allow us to recover the original data:\n\ntrees_model$model\n#> Volume Girth Height\n#> 1 10.3 8.3 70\n#> 2 10.3 8.6 65\n#> 3 10.2 8.8 63\n#> 4 16.4 10.5 72\n#> 5 18.8 10.7 81\n#> 6 19.7 10.8 83\n#> 7 15.6 11.0 66\n#> 8 18.2 11.0 75\n#> 9 22.6 11.1 80\n#> 10 19.9 11.2 75\n#> 11 24.2 11.3 79\n#> 12 21.0 11.4 76\n#> 13 21.4 11.4 76\n#> 14 21.3 11.7 69\n#> 15 19.1 12.0 75\n#> 16 22.2 12.9 74\n#> 17 33.8 12.9 85\n#> 18 27.4 13.3 86\n#> 19 25.7 13.7 71\n#> 20 24.9 13.8 64\n#> 21 34.5 14.0 78\n#> 22 31.7 14.2 80\n#> 23 36.3 14.5 74\n#> 24 38.3 16.0 72\n#> 25 42.6 16.3 77\n#> 26 55.4 17.3 81\n#> 27 55.7 17.5 82\n#> 28 58.3 17.9 80\n#> 29 51.5 18.0 80\n#> 30 51.0 18.0 80\n#> 31 77.0 20.6 87\n\nSimilarly, the fitted values and residuals can be accessed with the following code:\n\nhead(trees_model$fitted.values)\n#> 1 2 3 4 5 6 \n#> 4.837660 4.553852 4.816981 15.874115 19.869008 21.018327\nhead(trees_model$residuals)\n#> 1 2 3 4 5 6 \n#> 5.4623403 5.7461484 5.3830187 0.5258848 -1.0690084 -1.3183270\n\nAs with glance() methods, it’s fine (and encouraged!) to include common metrics associated with observations if they are not computationally intensive to compute. A common metric associated with linear models, for example, is the standard error of fitted values:\n\nse.fit <- predict(trees_model, newdata = trees, se.fit = TRUE)$se.fit %>%\n unname()\n\nhead(se.fit)\n#> [1] 1.3211285 1.4893775 1.6325024 0.9444212 1.3484251 1.5319772\n\nThus, a reasonable augment() method for lm might look something like this:\n\naugment.lm <- function(x, data = x$model, newdata = NULL, ...) {\n if (is.null(newdata)) {\n dplyr::bind_cols(tibble::as_tibble(data),\n tibble::tibble(.fitted = x$fitted.values,\n .se.fit = predict(x, \n newdata = data, \n se.fit = TRUE)$se.fit,\n .resid = x$residuals))\n } else {\n predictions <- predict(x, newdata = newdata, se.fit = TRUE)\n dplyr::bind_cols(tibble::as_tibble(newdata),\n tibble::tibble(.fitted = predictions$fit,\n .se.fit = predictions$se.fit))\n }\n}\n\nSome other things to keep in mind while writing augment() methods: * The newdata argument should default to NULL. Users should only ever specify one of data or newdata. Providing both data and newdata should result in an error. The newdata argument should accept both data.frames and tibbles. * Data given to the data argument must have both the original predictors and the original response. Data given to the newdata argument only needs to have the original predictors. This is important because there may be important information associated with training data that is not associated with test data. This means that the original_data object in augment(model, data = original_data) should provide .fitted and .resid columns (in most cases), whereas test_data in augment(model, data = test_data) only needs a .fitted column, even if the response is present in test_data. * If the data or newdata is specified as a data.frame with rownames, augment should return them in a column called .rownames. * For observations where no fitted values or summaries are available (where there’s missing data, for example), return NA. * The augment() method should always return as many rows as were in data or newdata, depending on which is supplied\n\n\n\n\n\n\nNote\n\n\n\nThe recommended interface and functionality for augment() methods may change soon." + }, + { + "objectID": "learn/develop/broom/index.html#document-the-new-methods", + "href": "learn/develop/broom/index.html#document-the-new-methods", + "title": "Create your own broom tidier methods", + "section": "Document the new methods", + "text": "Document the new methods\nThe only remaining step is to integrate the new methods into the parent package! To do so, just drop the methods into a .R file inside of the /R folder and document them using roxygen2. If you’re unfamiliar with the process of documenting objects, you can read more about it here. Here’s an example of how our tidy.lm() method might be documented:\n\n#' Tidy a(n) lm object\n#'\n#' @param x A `lm` object.\n#' @param conf.int Logical indicating whether or not to include \n#' a confidence interval in the tidied output. Defaults to FALSE.\n#' @param conf.level The confidence level to use for the confidence \n#' interval if conf.int = TRUE. Must be strictly greater than 0 \n#' and less than 1. Defaults to 0.95, which corresponds to a \n#' 95 percent confidence interval.\n#' @param ... Unused, included for generic consistency only.\n#' @return A tidy [tibble::tibble()] summarizing component-level\n#' information about the model\n#'\n#' @examples\n#' # load the trees dataset\n#' data(trees)\n#' \n#' # fit a linear model on timber volume\n#' trees_model <- lm(Volume ~ Girth + Height, data = trees)\n#'\n#' # summarize model coefficients in a tidy tibble!\n#' tidy(trees_model)\n#'\n#' @export\ntidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) {\n\n # ... the rest of the function definition goes here!\n\nOnce you’ve documented each of your new methods and executed devtools::document(), you’re done! Congrats on implementing your own broom tidier methods for a new model object!" + }, + { + "objectID": "learn/develop/broom/index.html#glossaries", + "href": "learn/develop/broom/index.html#glossaries", + "title": "Create your own broom tidier methods", + "section": "Glossaries", + "text": "Glossaries\n\nArguments\nTidier methods have a standardized set of acceptable argument and output column names. The currently acceptable argument names by tidier method are:\n\n\n\n\n\n\n\n\n\nColumn Names\nThe currently acceptable column names by tidier method are:\n\n\n\n\n\n\n\nThe alexpghayes/modeltests package provides unit testing infrastructure to check your new tidier methods. Please file an issue there to request new arguments/columns to be added to the glossaries!" + }, + { + "objectID": "learn/develop/broom/index.html#session-info", + "href": "learn/develop/broom/index.html#session-info", + "title": "Create your own broom tidier methods", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> generics * 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/develop/metrics/index.html", + "href": "learn/develop/metrics/index.html", + "title": "Custom performance metrics", + "section": "", + "text": "To use code in this article, you will need to install the following packages: rlang and tidymodels.\nThe yardstick package already includes a large number of metrics, but there’s obviously a chance that you might have a custom metric that hasn’t been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics.\nWhy create custom metrics? With the infrastructure yardstick provides, you get:\n\nStandardization between your metric and other preexisting metrics\nAutomatic error handling for types and lengths\nAutomatic selection of binary / multiclass metric implementations\nAutomatic NA handling\nSupport for grouped data frames\nSupport for use alongside other metrics in metric_set()\n\nThe implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of roc_auc() after reading this vignette if you want to work on a class probability metric." + }, + { + "objectID": "learn/develop/metrics/index.html#introduction", + "href": "learn/develop/metrics/index.html#introduction", + "title": "Custom performance metrics", + "section": "", + "text": "To use code in this article, you will need to install the following packages: rlang and tidymodels.\nThe yardstick package already includes a large number of metrics, but there’s obviously a chance that you might have a custom metric that hasn’t been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics.\nWhy create custom metrics? With the infrastructure yardstick provides, you get:\n\nStandardization between your metric and other preexisting metrics\nAutomatic error handling for types and lengths\nAutomatic selection of binary / multiclass metric implementations\nAutomatic NA handling\nSupport for grouped data frames\nSupport for use alongside other metrics in metric_set()\n\nThe implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of roc_auc() after reading this vignette if you want to work on a class probability metric." + }, + { + "objectID": "learn/develop/metrics/index.html#numeric-example-mse", + "href": "learn/develop/metrics/index.html#numeric-example-mse", + "title": "Custom performance metrics", + "section": "Numeric example: MSE", + "text": "Numeric example: MSE\nMean squared error (sometimes MSE or from here on, mse()) is a numeric metric that measures the average of the squared errors. Numeric metrics are generally the simplest to create with yardstick, as they do not have multiclass implementations. The formula for mse() is:\n\\[ MSE = \\frac{1}{N} \\sum_{i=1}^{N} (truth_i - estimate_i) ^ 2 = mean( (truth - estimate) ^ 2) \\]\nAll metrics should have a data frame version, and a vector version. The data frame version here will be named mse(), and the vector version will be mse_vec().\n\nVector implementation\nTo start, create the vector version. Generally, all metrics have the same arguments unless the metric requires an extra parameter (such as beta in f_meas()). To create the vector function, you need to do two things:\n\nCreate an internal implementation function, mse_impl().\nPass on that implementation function to metric_vec_template().\n\nBelow, mse_impl() contains the actual implementation of the metric, and takes truth and estimate as arguments along with any metric specific arguments.\nThe yardstick function metric_vec_template() accepts the implementation function along with the other arguments to mse_vec() and actually executes mse_impl(). Additionally, it has a cls argument to specify the allowed class type of truth and estimate. If the classes are the same, a single character class can be passed, and if they are different a character vector of length 2 can be supplied.\nThe metric_vec_template() helper handles the removal of NA values in your metric, so your implementation function does not have to worry about them. It performs type checking using cls and also checks that the estimator is valid, the second of which is covered in the classification example. This way, all you have to worry about is the core implementation.\n\nlibrary(tidymodels)\n\nmse_vec <- function(truth, estimate, na_rm = TRUE, ...) {\n \n mse_impl <- function(truth, estimate) {\n mean((truth - estimate) ^ 2)\n }\n \n metric_vec_template(\n metric_impl = mse_impl,\n truth = truth, \n estimate = estimate,\n na_rm = na_rm,\n cls = \"numeric\",\n ...\n )\n \n}\n\nAt this point, you’ve created the vector version of the mean squared error metric.\n\ndata(\"solubility_test\")\n\nmse_vec(\n truth = solubility_test$solubility, \n estimate = solubility_test$prediction\n)\n#> Warning: `metric_vec_template()` was deprecated in yardstick 1.2.0.\n#> ℹ Please use `check_numeric_metric()`, `check_class_metric()`,\n#> `check_class_metric()`, `yardstick_remove_missing()`, and\n#> `yardstick_any_missing()` instead.\n#> [1] 0.5214438\n\nIntelligent error handling is immediately available.\n\nmse_vec(truth = \"apple\", estimate = 1)\n#> Error in `validate_class()`:\n#> ! `truth` should be a numeric but a character was supplied.\n\nmse_vec(truth = 1, estimate = factor(\"xyz\"))\n#> Error in `validate_class()`:\n#> ! `estimate` should be a numeric but a factor was supplied.\n\nNA values are removed if na_rm = TRUE (the default). If na_rm = FALSE and any NA values are detected, then the metric automatically returns NA.\n\n# NA values removed\nmse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5))\n#> [1] 0.01\n\n# NA returned\nmse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5), na_rm = FALSE)\n#> [1] NA\n\n\n\nData frame implementation\nThe data frame version of the metric should be fairly simple. It is a generic function with a data.frame method that calls the yardstick helper, metric_summarizer(), and passes along the mse_vec() function to it along with versions of truth and estimate that have been wrapped in rlang::enquo() and then unquoted with !! so that non-standard evaluation can be supported.\n\nlibrary(rlang)\n\nmse <- function(data, ...) {\n UseMethod(\"mse\")\n}\n\nmse <- new_numeric_metric(mse, direction = \"minimize\")\n\nmse.data.frame <- function(data, truth, estimate, na_rm = TRUE, ...) {\n \n metric_summarizer(\n metric_nm = \"mse\",\n metric_fn = mse_vec,\n data = data,\n truth = !! enquo(truth),\n estimate = !! enquo(estimate), \n na_rm = na_rm,\n ...\n )\n \n}\n\nAnd that’s it. The yardstick package handles the rest with an internal call to summarise().\n\nmse(solubility_test, truth = solubility, estimate = prediction)\n\n# Error handling\nmse(solubility_test, truth = solubility, estimate = factor(\"xyz\"))\n\nLet’s test it out on a grouped data frame.\n\nlibrary(dplyr)\n\nset.seed(1234)\nsize <- 100\ntimes <- 10\n\n# create 10 resamples\nsolubility_resampled <- bind_rows(\n replicate(\n n = times,\n expr = sample_n(solubility_test, size, replace = TRUE),\n simplify = FALSE\n ),\n .id = \"resample\"\n)\n\nsolubility_resampled %>%\n group_by(resample) %>%\n mse(solubility, prediction)\n#> Warning: `metric_summarizer()` was deprecated in yardstick 1.2.0.\n#> ℹ Please use `numeric_metric_summarizer()`,\n#> `class_metric_summarizer()`, `prob_metric_summarizer()`, or\n#> `curve_metric_summarizer()` instead.\n#> # A tibble: 10 × 4\n#> resample .metric .estimator .estimate\n#> <chr> <chr> <chr> <dbl>\n#> 1 1 mse standard 0.512\n#> 2 10 mse standard 0.454\n#> 3 2 mse standard 0.513\n#> 4 3 mse standard 0.414\n#> 5 4 mse standard 0.543\n#> 6 5 mse standard 0.456\n#> 7 6 mse standard 0.652\n#> 8 7 mse standard 0.642\n#> 9 8 mse standard 0.404\n#> 10 9 mse standard 0.479" + }, + { + "objectID": "learn/develop/metrics/index.html#class-example-miss-rate", + "href": "learn/develop/metrics/index.html#class-example-miss-rate", + "title": "Custom performance metrics", + "section": "Class example: miss rate", + "text": "Class example: miss rate\nMiss rate is another name for the false negative rate, and is a classification metric in the same family as sens() and spec(). It follows the formula:\n\\[ miss\\_rate = \\frac{FN}{FN + TP} \\]\nThis metric, like other classification metrics, is more easily computed when expressed as a confusion matrix. As you will see in the example, you can achieve this with a call to base::table(estimate, truth) which correctly puts the “correct” result in the columns of the confusion matrix.\nClassification metrics are more complicated than numeric ones because you have to think about extensions to the multiclass case. For now, let’s start with the binary case.\n\nVector implementation\nThe vector implementation for classification metrics initially has the same setup as numeric metrics, but has an additional argument, estimator that determines the type of estimator to use (binary or some kind of multiclass implementation or averaging). This argument is auto-selected for the user, so default it to NULL. Additionally, pass it along to metric_vec_template() so that it can check the provided estimator against the classes of truth and estimate to see if they are allowed.\n\n# Logic for `event_level`\nevent_col <- function(xtab, event_level) {\n if (identical(event_level, \"first\")) {\n colnames(xtab)[[1]]\n } else {\n colnames(xtab)[[2]]\n }\n}\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n estimator <- finalize_estimator(truth, estimator)\n \n miss_rate_impl <- function(truth, estimate) {\n # Create \n xtab <- table(estimate, truth)\n col <- event_col(xtab, event_level)\n col2 <- setdiff(colnames(xtab), col)\n \n tp <- xtab[col, col]\n fn <- xtab[col2, col]\n \n fn / (fn + tp)\n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\nAnother change from the numeric metric is that a call to finalize_estimator() is made. This is the infrastructure that auto-selects the type of estimator to use.\n\ndata(\"two_class_example\")\nmiss_rate_vec(two_class_example$truth, two_class_example$predicted)\n#> [1] 0.120155\n\nWhat happens if you try and pass in a multiclass result?\n\ndata(\"hpc_cv\")\nfold1 <- filter(hpc_cv, Resample == \"Fold01\")\nmiss_rate_vec(fold1$obs, fold1$pred)\n#> F M L \n#> 0.06214689 0.00000000 0.00000000\n\nThis isn’t great, as currently multiclass miss_rate() isn’t supported and it would have been better to throw an error if the estimator was not \"binary\". Currently, finalize_estimator() uses its default implementation which selected \"macro\" as the estimator since truth was a factor with more than 2 classes. When we implement multiclass averaging, this is what you want, but if your metric only works with a binary implementation (or has other specialized multiclass versions), you might want to guard against this.\nTo fix this, a generic counterpart to finalize_estimator(), called finalize_estimator_internal(), exists that helps you restrict the input types. If you provide a method to finalize_estimator_internal() where the method name is the same as your metric name, and then set the metric_class argument in finalize_estimator() to be the same thing, you can control how the auto-selection of the estimator is handled.\nDon’t worry about the metric_dispatcher argument. This is handled for you and just exists as a dummy argument to dispatch off of.\nIt is also good practice to call validate_estimator() which handles the case where a user passed in the estimator themselves. This validates that the supplied estimator is one of the allowed types and error otherwise.\n\nfinalize_estimator_internal.miss_rate <- function(metric_dispatcher, x, estimator) {\n \n validate_estimator(estimator, estimator_override = \"binary\")\n if (!is.null(estimator)) {\n return(estimator)\n }\n \n lvls <- levels(x)\n if (length(lvls) > 2) {\n stop(\"A multiclass `truth` input was provided, but only `binary` is supported.\")\n } \n \"binary\"\n}\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n # calls finalize_estimator_internal() internally\n estimator <- finalize_estimator(truth, estimator, metric_class = \"miss_rate\")\n \n miss_rate_impl <- function(truth, estimate) {\n # Create \n xtab <- table(estimate, truth)\n col <- event_col(xtab, event_level)\n col2 <- setdiff(colnames(xtab), col)\n \n tp <- xtab[col, col]\n fn <- xtab[col2, col]\n \n fn / (fn + tp)\n \n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\n# Error thrown by our custom handler\n# miss_rate_vec(fold1$obs, fold1$pred)\n\n# Error thrown by validate_estimator()\n# miss_rate_vec(fold1$obs, fold1$pred, estimator = \"macro\")\n\n\n\nSupporting multiclass miss rate\nLike many other classification metrics such as precision() or recall(), miss rate does not have a natural multiclass extension, but one can be created using methods such as macro, weighted macro, and micro averaging. If you have not, I encourage you to read vignette(\"multiclass\", \"yardstick\") for more information about how these methods work.\nGenerally, they require more effort to get right than the binary case, especially if you want to have a performant version. Luckily, a somewhat standard template is used in yardstick and can be used here as well.\nLet’s first remove the “binary” restriction we created earlier.\n\nrm(finalize_estimator_internal.miss_rate)\n\nThe main changes below are:\n\nThe binary implementation is moved to miss_rate_binary().\nmiss_rate_estimator_impl() is a helper function for switching between binary and multiclass implementations. It also applies the weighting required for multiclass estimators. It is called from miss_rate_impl() and also accepts the estimator argument using R’s function scoping rules.\nmiss_rate_multiclass() provides the implementation for the multiclass case. It calculates the true positive and false negative values as vectors with one value per class. For the macro case, it returns a vector of miss rate calculations, and for micro, it first sums the individual pieces and returns a single miss rate calculation. In the macro case, the vector is then weighted appropriately in miss_rate_estimator_impl() depending on whether or not it was macro or weighted macro.\n\n\nmiss_rate_vec <- function(truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n # calls finalize_estimator_internal() internally\n estimator <- finalize_estimator(truth, estimator, metric_class = \"miss_rate\")\n \n miss_rate_impl <- function(truth, estimate) {\n xtab <- table(estimate, truth)\n # Rather than implement the actual method here, we rely on\n # an *_estimator_impl() function that can handle binary\n # and multiclass cases\n miss_rate_estimator_impl(xtab, estimator, event_level)\n }\n \n metric_vec_template(\n metric_impl = miss_rate_impl,\n truth = truth,\n estimate = estimate,\n na_rm = na_rm,\n cls = \"factor\",\n estimator = estimator,\n ...\n )\n}\n\n\n# This function switches between binary and multiclass implementations\nmiss_rate_estimator_impl <- function(data, estimator, event_level) {\n if(estimator == \"binary\") {\n miss_rate_binary(data, event_level)\n } else {\n # Encapsulates the macro, macro weighted, and micro cases\n wt <- get_weights(data, estimator)\n res <- miss_rate_multiclass(data, estimator)\n weighted.mean(res, wt)\n }\n}\n\n\nmiss_rate_binary <- function(data, event_level) {\n col <- event_col(data, event_level)\n col2 <- setdiff(colnames(data), col)\n \n tp <- data[col, col]\n fn <- data[col2, col]\n \n fn / (fn + tp)\n}\n\nmiss_rate_multiclass <- function(data, estimator) {\n \n # We need tp and fn for all classes individually\n # we can get this by taking advantage of the fact\n # that tp + fn = colSums(data)\n tp <- diag(data)\n tpfn <- colSums(data)\n fn <- tpfn - tp\n \n # If using a micro estimator, we sum the individual\n # pieces before performing the miss rate calculation\n if (estimator == \"micro\") {\n tp <- sum(tp)\n fn <- sum(fn)\n }\n \n # return the vector \n tp / (tp + fn)\n}\n\nFor the macro case, this separation of weighting from the core implementation might seem strange, but there is good reason for it. Some metrics are combinations of other metrics, and it is nice to be able to reuse code when calculating more complex metrics. For example, f_meas() is a combination of recall() and precision(). When calculating a macro averaged f_meas(), the weighting must be applied 1 time, at the very end of the calculation. recall_multiclass() and precision_multiclass() are defined similarly to how miss_rate_multiclass() is defined and returns the unweighted vector of calculations. This means we can directly use this in f_meas(), and then weight everything once at the end of that calculation.\nLet’s try it out now:\n\n# two class\nmiss_rate_vec(two_class_example$truth, two_class_example$predicted)\n#> [1] 0.120155\n\n# multiclass\nmiss_rate_vec(fold1$obs, fold1$pred)\n#> [1] 0.5483506\n\n\nData frame implementation\nLuckily, the data frame implementation is as simple as the numeric case, we just need to add an extra estimator argument and pass that through.\n\nmiss_rate <- function(data, ...) {\n UseMethod(\"miss_rate\")\n}\n\nmiss_rate <- new_class_metric(miss_rate, direction = \"minimize\")\n\nmiss_rate.data.frame <- function(data, \n truth, \n estimate, \n estimator = NULL, \n na_rm = TRUE, \n event_level = \"first\",\n ...) {\n metric_summarizer(\n metric_nm = \"miss_rate\",\n metric_fn = miss_rate_vec,\n data = data,\n truth = !! enquo(truth),\n estimate = !! enquo(estimate), \n estimator = estimator,\n na_rm = na_rm,\n event_level = event_level,\n ...\n )\n}\n\n\n# Macro weighted automatically selected\nfold1 %>%\n miss_rate(obs, pred)\n\n# Switch to micro\nfold1 %>%\n miss_rate(obs, pred, estimator = \"micro\")\n\n# Macro weighted by resample\nhpc_cv %>%\n group_by(Resample) %>%\n miss_rate(obs, pred, estimator = \"macro_weighted\")\n\n# Error handling\nmiss_rate(hpc_cv, obs, VF)" + }, + { + "objectID": "learn/develop/metrics/index.html#using-custom-metrics", + "href": "learn/develop/metrics/index.html#using-custom-metrics", + "title": "Custom performance metrics", + "section": "Using custom metrics", + "text": "Using custom metrics\nThe metric_set() function validates that all metric functions are of the same metric type by checking the class of the function. If any metrics are not of the right class, metric_set() fails. By using new_numeric_metric() and new_class_metric() in the above custom metrics, they work out of the box without any additional adjustments.\n\nnumeric_mets <- metric_set(mse, rmse)\n\nnumeric_mets(solubility_test, solubility, prediction)\n#> # A tibble: 2 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 mse standard 0.521\n#> 2 rmse standard 0.722" + }, + { + "objectID": "learn/develop/metrics/index.html#session-info", + "href": "learn/develop/metrics/index.html#session-info", + "title": "Custom performance metrics", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang * 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/develop/models/index.html", + "href": "learn/develop/models/index.html", + "title": "How to build a parsnip model", + "section": "", + "text": "To use code in this article, you will need to install the following packages: mda, modeldata, and tidymodels.\nThe parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this:\n\nIt eliminates a lot of duplicate code.\nSince the expressions are not evaluated until fitting, it eliminates many package dependencies.\n\nA parsnip model function is itself very general. For example, the logistic_reg() function itself doesn’t have any model code within it. Instead, each model function is associated with one or more computational engines. These might be different R packages or some function in another language (that can be evaluated by R).\nThis article describes the process of creating a new model function. Before proceeding, take a minute and read our guidelines on creating modeling packages to understand the general themes and conventions that we use." + }, + { + "objectID": "learn/develop/models/index.html#introduction", + "href": "learn/develop/models/index.html#introduction", + "title": "How to build a parsnip model", + "section": "", + "text": "To use code in this article, you will need to install the following packages: mda, modeldata, and tidymodels.\nThe parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this:\n\nIt eliminates a lot of duplicate code.\nSince the expressions are not evaluated until fitting, it eliminates many package dependencies.\n\nA parsnip model function is itself very general. For example, the logistic_reg() function itself doesn’t have any model code within it. Instead, each model function is associated with one or more computational engines. These might be different R packages or some function in another language (that can be evaluated by R).\nThis article describes the process of creating a new model function. Before proceeding, take a minute and read our guidelines on creating modeling packages to understand the general themes and conventions that we use." + }, + { + "objectID": "learn/develop/models/index.html#an-example-model", + "href": "learn/develop/models/index.html#an-example-model", + "title": "How to build a parsnip model", + "section": "An example model", + "text": "An example model\nAs an example, we’ll create a function for mixture discriminant analysis. There are a few packages that implement this but we’ll focus on mda::mda:\n\nstr(mda::mda)\n#> function (formula = formula(data), data = sys.frame(sys.parent()), subclasses = 3, \n#> sub.df = NULL, tot.df = NULL, dimension = sum(subclasses) - 1, eps = 100 * \n#> .Machine$double.eps, iter = 5, weights = mda.start(x, g, subclasses, \n#> trace, ...), method = polyreg, keep.fitted = (n * dimension < 5000), \n#> trace = FALSE, ...)\n\nThe main hyperparameter is the number of subclasses. We’ll name our function discrim_mixture." + }, + { + "objectID": "learn/develop/models/index.html#aspects-of-models", + "href": "learn/develop/models/index.html#aspects-of-models", + "title": "How to build a parsnip model", + "section": "Aspects of models", + "text": "Aspects of models\nBefore proceeding, it helps to to review how parsnip categorizes models:\n\nThe model type is related to the structural aspect of the model. For example, the model type linear_reg represents linear models (slopes and intercepts) that model a numeric outcome. Other model types in the package are nearest_neighbor, decision_tree, and so on.\nWithin a model type is the mode, related to the modeling goal. Currently the two modes in the package are regression and classification. Some models have methods for both models (e.g. nearest neighbors) while others have only a single mode (e.g. logistic regression).\nThe computation engine is a combination of the estimation method and the implementation. For example, for linear regression, one engine is \"lm\" which uses ordinary least squares analysis via the lm() function. Another engine is \"stan\" which uses the Stan infrastructure to estimate parameters using Bayes rule.\n\nWhen adding a model into parsnip, the user has to specify which modes and engines are used. The package also enables users to add a new mode or engine to an existing model." + }, + { + "objectID": "learn/develop/models/index.html#the-general-process", + "href": "learn/develop/models/index.html#the-general-process", + "title": "How to build a parsnip model", + "section": "The general process", + "text": "The general process\nThe parsnip package stores information about the models in an internal environment object. The environment can be accessed via the function get_model_env(). The package includes a variety of functions that can get or set the different aspects of the models.\nIf you are adding a new model from your own package, you can use these functions to add new entries into the model environment.\n\nStep 1. Register the model, modes, and arguments\nWe will add the MDA model using the model type discrim_mixture. Since this is a classification method, we only have to register a single mode:\n\nlibrary(tidymodels)\nset_new_model(\"discrim_mixture\")\nset_model_mode(model = \"discrim_mixture\", mode = \"classification\")\nset_model_engine(\n \"discrim_mixture\", \n mode = \"classification\", \n eng = \"mda\"\n)\nset_dependency(\"discrim_mixture\", eng = \"mda\", pkg = \"mda\")\n\nThese functions should silently finish. There is also a function that can be used to show what aspects of the model have been added to parsnip:\n\nshow_model_info(\"discrim_mixture\")\n#> Information for `discrim_mixture`\n#> modes: unknown, classification \n#> \n#> engines: \n#> classification: mdaNA\n#> \n#> ¹The model can use case weights.\n#> \n#> no registered arguments.\n#> \n#> no registered fit modules.\n#> \n#> no registered prediction modules.\n\nThe next step would be to declare the main arguments to the model. These are declared independent of the mode. To specify the argument, there are a few slots to fill in:\n\nThe name that parsnip uses for the argument. In general, we try to use non-jargony names for arguments (e.g. “penalty” instead of “lambda” for regularized regression). We recommend consulting the model argument table available here to see if an existing argument name can be used before creating a new one.\nThe argument name that is used by the underlying modeling function.\nA function reference for a constructor that will be used to generate tuning parameter values. This should be a character vector with a named element called fun that is the constructor function. There is an optional element pkg that can be used to call the function using its namespace. If referencing functions from the dials package, quantitative parameters can have additional arguments in the list for trans and range while qualitative parameters can pass values via this list.\nA logical value for whether the argument can be used to generate multiple predictions for a single R object. For example, for boosted trees, if a model is fit with 10 boosting iterations, many modeling packages allow the model object to make predictions for any iterations less than the one used to fit the model. In general this is not the case so one would use has_submodels = FALSE.\n\nFor mda::mda(), the main tuning parameter is subclasses which we will rewrite as sub_classes.\n\nset_model_arg(\n model = \"discrim_mixture\",\n eng = \"mda\",\n parsnip = \"sub_classes\",\n original = \"subclasses\",\n func = list(pkg = \"foo\", fun = \"bar\"),\n has_submodel = FALSE\n)\nshow_model_info(\"discrim_mixture\")\n#> Information for `discrim_mixture`\n#> modes: unknown, classification \n#> \n#> engines: \n#> classification: mdaNA\n#> \n#> ¹The model can use case weights.\n#> \n#> arguments: \n#> mda: \n#> sub_classes --> subclasses\n#> \n#> no registered fit modules.\n#> \n#> no registered prediction modules.\n\n\n\nStep 2. Create the model function\nThis is a fairly simple function that can follow a basic template. The main arguments to our function will be:\n\nThe mode. If the model can do more than one mode, you might default this to “unknown”. In our case, since it is only a classification model, it makes sense to default it to that mode so that the users won’t have to specify it.\nThe argument names (sub_classes here). These should be defaulted to NULL.\n\nA basic version of the function is:\n\ndiscrim_mixture <-\n function(mode = \"classification\", sub_classes = NULL) {\n # Check for correct mode\n if (mode != \"classification\") {\n rlang::abort(\"`mode` should be 'classification'\")\n }\n \n # Capture the arguments in quosures\n args <- list(sub_classes = rlang::enquo(sub_classes))\n \n # Save some empty slots for future parts of the specification\n new_model_spec(\n \"discrim_mixture\",\n args = args,\n eng_args = NULL,\n mode = mode,\n method = NULL,\n engine = NULL\n )\n }\n\nThis is pretty simple since the data are not exposed to this function.\n\n\n\n\n\n\nWarning\n\n\n\nWe strongly suggest favoring rlang::abort() and rlang::warn() over stop() and warning(). The former return better traceback results and have safer defaults for handling call objects.\n\n\n\n\nStep 3. Add a fit module\nNow that parsnip knows about the model, mode, and engine, we can give it the information on fitting the model for our engine. The information needed to fit the model is contained in another list. The elements are:\n\ninterface is a single character value that could be “formula”, “data.frame”, or “matrix”. This defines the type of interface used by the underlying fit function (mda::mda, in this case). This helps the translation of the data to be in an appropriate format for the that function.\nprotect is an optional list of function arguments that should not be changeable by the user. In this case, we probably don’t want users to pass data values to these arguments (until the fit() function is called).\nfunc is the package and name of the function that will be called. If you are using a locally defined function, only fun is required.\ndefaults is an optional list of arguments to the fit function that the user can change, but whose defaults can be set here. This isn’t needed in this case, but is described later in this document.\n\nFor the first engine:\n\nset_fit(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n value = list(\n interface = \"formula\",\n protect = c(\"formula\", \"data\"),\n func = c(pkg = \"mda\", fun = \"mda\"),\n defaults = list()\n )\n)\n\nshow_model_info(\"discrim_mixture\")\n#> Information for `discrim_mixture`\n#> modes: unknown, classification \n#> \n#> engines: \n#> classification: mda\n#> \n#> ¹The model can use case weights.\n#> \n#> arguments: \n#> mda: \n#> sub_classes --> subclasses\n#> \n#> fit modules:\n#> engine mode\n#> mda classification\n#> \n#> no registered prediction modules.\n\nWe also set up the information on how the predictors should be handled. These options ensure that the data that parsnip gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly.\n\npredictor_indicators describes whether and how to create indicator/dummy variables from factor predictors. There are three options: \"none\" (do not expand factor predictors), \"traditional\" (apply the standard model.matrix() encodings), and \"one_hot\" (create the complete set including the baseline level for all factors).\ncompute_intercept controls whether model.matrix() should include the intercept in its formula. This affects more than the inclusion of an intercept column. With an intercept, model.matrix() computes dummy variables for all but one factor level. Without an intercept, model.matrix() computes a full set of indicators for the first factor variable, but an incomplete set for the remainder.\nremove_intercept removes the intercept column after model.matrix() is finished. This can be useful if the model function (e.g. lm()) automatically generates an intercept.\nallow_sparse_x specifies whether the model can accommodate a sparse representation for predictors during fitting and tuning.\n\n\nset_encoding(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n options = list(\n predictor_indicators = \"traditional\",\n compute_intercept = TRUE,\n remove_intercept = TRUE,\n allow_sparse_x = FALSE\n )\n)\n\n\n\nStep 4. Add modules for prediction\nSimilar to the fitting module, we specify the code for making different types of predictions. To make hard class predictions, the class object contains the details. The elements of the list are:\n\npre and post are optional functions that can preprocess the data being fed to the prediction code and to postprocess the raw output of the predictions. These won’t be needed for this example, but a section below has examples of how these can be used when the model code is not easy to use. If the data being predicted has a simple type requirement, you can avoid using a pre function with the args below.\nfunc is the prediction function (in the same format as above). In many cases, packages have a predict method for their model’s class but this is typically not exported. In this case (and the example below), it is simple enough to make a generic call to predict() with no associated package.\nargs is a list of arguments to pass to the prediction function. These will most likely be wrapped in rlang::expr() so that they are not evaluated when defining the method. For mda, the code would be predict(object, newdata, type = \"class\"). What is actually given to the function is the parsnip model fit object, which includes a sub-object called fit() that houses the mda model object. If the data need to be a matrix or data frame, you could also use newdata = quote(as.data.frame(newdata)) or similar.\n\nThe parsnip prediction code will expect the result to be an unnamed character string or factor. This will be coerced to a factor with the same levels as the original data.\nTo add this method to the model environment, a similar set() function is used:\n\nclass_info <- \n list(\n pre = NULL,\n post = NULL,\n func = c(fun = \"predict\"),\n args =\n # These lists should be of the form:\n # {predict.mda argument name} = {values provided from parsnip objects}\n list(\n # We don't want the first two arguments evaluated right now\n # since they don't exist yet. `type` is a simple object that\n # doesn't need to have its evaluation deferred. \n object = quote(object$fit),\n newdata = quote(new_data),\n type = \"class\"\n )\n )\n\nset_pred(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n type = \"class\",\n value = class_info\n)\n\nA similar call can be used to define the class probability module (if they can be computed). The format is identical to the class module but the output is expected to be a tibble with columns for each factor level.\nAs an example of the post function, the data frame created by mda:::predict.mda() will be converted to a tibble. The arguments are x (the raw results coming from the predict method) and object (the parsnip model fit object). The latter has a sub-object called lvl which is a character string of the outcome’s factor levels (if any).\nWe register the probability module. There is a template function that makes this slightly easier to format the objects:\n\nprob_info <-\n pred_value_template(\n post = function(x, object) {\n tibble::as_tibble(x)\n },\n func = c(fun = \"predict\"),\n # Now everything else is put into the `args` slot\n object = quote(object$fit),\n newdata = quote(new_data),\n type = \"posterior\"\n )\n\nset_pred(\n model = \"discrim_mixture\",\n eng = \"mda\",\n mode = \"classification\",\n type = \"prob\",\n value = prob_info\n)\n\nshow_model_info(\"discrim_mixture\")\n#> Information for `discrim_mixture`\n#> modes: unknown, classification \n#> \n#> engines: \n#> classification: mda\n#> \n#> ¹The model can use case weights.\n#> \n#> arguments: \n#> mda: \n#> sub_classes --> subclasses\n#> \n#> fit modules:\n#> engine mode\n#> mda classification\n#> \n#> prediction modules:\n#> mode engine methods\n#> classification mda class, prob\n\nIf this model could be used for regression situations, we could also add a “numeric” module. For pred, the model requires an unnamed numeric vector output (usually).\nExamples are here and here.\n\n\nDoes it work?\nAs a developer, one thing that may come in handy is the translate() function. This will tell you what the model’s eventual syntax will be.\nFor example:\n\ndiscrim_mixture(sub_classes = 2) %>%\n translate(engine = \"mda\")\n#> discrim mixture Model Specification (classification)\n#> \n#> Main Arguments:\n#> sub_classes = 2\n#> \n#> Computational engine: mda \n#> \n#> Model fit template:\n#> mda::mda(formula = missing_arg(), data = missing_arg(), subclasses = 2)\n\nLet’s try it on a data set from the modeldata package:\n\ndata(\"two_class_dat\", package = \"modeldata\")\nset.seed(4622)\nexample_split <- initial_split(two_class_dat, prop = 0.99)\nexample_train <- training(example_split)\nexample_test <- testing(example_split)\n\nmda_spec <- discrim_mixture(sub_classes = 2) %>% \n set_engine(\"mda\")\n\nmda_fit <- mda_spec %>%\n fit(Class ~ ., data = example_train, engine = \"mda\")\nmda_fit\n#> parsnip model object\n#> \n#> Call:\n#> mda::mda(formula = Class ~ ., data = data, subclasses = ~2)\n#> \n#> Dimension: 2 \n#> \n#> Percent Between-Group Variance Explained:\n#> v1 v2 \n#> 82.63 100.00 \n#> \n#> Degrees of Freedom (per dimension): 3 \n#> \n#> Training Misclassification Error: 0.17241 ( N = 783 )\n#> \n#> Deviance: 671.391\n\npredict(mda_fit, new_data = example_test, type = \"prob\") %>%\n bind_cols(example_test %>% select(Class))\n#> # A tibble: 8 × 3\n#> .pred_Class1 .pred_Class2 Class \n#> <dbl> <dbl> <fct> \n#> 1 0.679 0.321 Class1\n#> 2 0.690 0.310 Class1\n#> 3 0.384 0.616 Class2\n#> 4 0.300 0.700 Class1\n#> 5 0.0262 0.974 Class2\n#> 6 0.405 0.595 Class2\n#> 7 0.793 0.207 Class1\n#> 8 0.0949 0.905 Class2\n\npredict(mda_fit, new_data = example_test) %>% \n bind_cols(example_test %>% select(Class))\n#> # A tibble: 8 × 2\n#> .pred_class Class \n#> <fct> <fct> \n#> 1 Class1 Class1\n#> 2 Class1 Class1\n#> 3 Class2 Class2\n#> 4 Class2 Class1\n#> 5 Class2 Class2\n#> 6 Class2 Class2\n#> 7 Class1 Class1\n#> 8 Class2 Class2" + }, + { + "objectID": "learn/develop/models/index.html#add-an-engine", + "href": "learn/develop/models/index.html#add-an-engine", + "title": "How to build a parsnip model", + "section": "Add an engine", + "text": "Add an engine\nThe process for adding an engine to an existing model is almost the same as building a new model but simpler with fewer steps. You only need to add the engine-specific aspects of the model. For example, if we wanted to fit a linear regression model using M-estimation, we could only add a new engine. The code for the rlm() function in MASS is pretty similar to lm(), so we can copy that code and change the package/function names:\n\nset_model_engine(\"linear_reg\", \"regression\", eng = \"rlm\")\nset_dependency(\"linear_reg\", eng = \"rlm\", pkg = \"MASS\")\n\nset_fit(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n value = list(\n interface = \"formula\",\n protect = c(\"formula\", \"data\", \"weights\"),\n func = c(pkg = \"MASS\", fun = \"rlm\"),\n defaults = list()\n )\n)\n\nset_encoding(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n options = list(\n predictor_indicators = \"traditional\",\n compute_intercept = TRUE,\n remove_intercept = TRUE,\n allow_sparse_x = FALSE\n )\n)\n\nset_pred(\n model = \"linear_reg\",\n eng = \"rlm\",\n mode = \"regression\",\n type = \"numeric\",\n value = list(\n pre = NULL,\n post = NULL,\n func = c(fun = \"predict\"),\n args =\n list(\n object = expr(object$fit),\n newdata = expr(new_data),\n type = \"response\"\n )\n )\n)\n\n# testing:\nlinear_reg() %>% \n set_engine(\"rlm\") %>% \n fit(mpg ~ ., data = mtcars)\n#> parsnip model object\n#> \n#> Call:\n#> rlm(formula = mpg ~ ., data = data)\n#> Converged in 8 iterations\n#> \n#> Coefficients:\n#> (Intercept) cyl disp hp drat wt \n#> 17.82250038 -0.27878615 0.01593890 -0.02536343 0.46391132 -4.14355431 \n#> qsec vs am gear carb \n#> 0.65307203 0.24975463 1.43412689 0.85943158 -0.01078897 \n#> \n#> Degrees of freedom: 32 total; 21 residual\n#> Scale estimate: 2.15" + }, + { + "objectID": "learn/develop/models/index.html#add-parsnip-models-to-another-package", + "href": "learn/develop/models/index.html#add-parsnip-models-to-another-package", + "title": "How to build a parsnip model", + "section": "Add parsnip models to another package", + "text": "Add parsnip models to another package\nThe process here is almost the same. All of the previous functions are still required but their execution is a little different.\nFor parsnip to register them, that package must already be loaded. For this reason, it makes sense to have parsnip in the “Depends” category.\nThe first difference is that the functions that define the model must be inside of a wrapper function that is called when your package is loaded. For our example here, this might look like:\n\nmake_discrim_mixture_mda <- function() {\n parsnip::set_new_model(\"discrim_mixture\")\n\n parsnip::set_model_mode(\"discrim_mixture\", \"classification\")\n\n # and so one...\n}\n\nThis function is then executed when your package is loaded:\n\n.onLoad <- function(libname, pkgname) {\n # This defines discrim_mixture in the model database\n make_discrim_mixture_mda()\n}\n\nFor an example package that uses parsnip definitions, take a look at the discrim package.\n\n\n\n\n\n\nWarning\n\n\n\nTo use a new model and/or engine in the broader tidymodels infrastructure, we recommend your model definition declarations (e.g. set_new_model() and similar) reside in a package. If these definitions are in a script only, the new model may not work with the tune package, for example for parallel processing.\n\n\nIt is also important for parallel processing support to list the home package as a dependency. If the discrim_mixture() function lived in a package called mixedup, include the line:\nset_dependency(\"discrim_mixture\", eng = \"mda\", pkg = \"mixedup\")\nParallel processing requires this explicit dependency setting. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux will load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a parsnip model is not loaded in the worker processes, the model will not have an entry in parsnip’s internal database (and produce an error)." + }, + { + "objectID": "learn/develop/models/index.html#your-model-tuning-parameters-and-you", + "href": "learn/develop/models/index.html#your-model-tuning-parameters-and-you", + "title": "How to build a parsnip model", + "section": "Your model, tuning parameters, and you", + "text": "Your model, tuning parameters, and you\nThe tune package can be used to find reasonable values of model arguments via tuning. There are some S3 methods that are useful to define for your model. discrim_mixture() has one main tuning parameter: sub_classes. To work with tune it is helpful (but not required) to use an S3 method called tunable() to define which arguments should be tuned and how values of those arguments should be generated.\ntunable() takes the model specification as its argument and returns a tibble with columns:\n\nname: The name of the argument.\ncall_info: A list that describes how to call a function that returns a dials parameter object.\nsource: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just \"model_spec\".\ncomponent: A character string with more information about the source. For models, this is just the name of the function (e.g. \"discrim_mixture\").\ncomponent_id: A character string to indicate where a unique identifier is for the object. For a model, this is indicates the type of model argument (e.g. “main”).\n\nThe main piece of information that requires some detail is call_info. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object.\nFor example, for a nearest-neighbors neighbors parameter, this value is just:\n\ninfo <- list(pkg = \"dials\", fun = \"neighbors\")\n\n# FYI: how it is used under-the-hood: \nnew_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)\nrlang::eval_tidy(new_param_call)\n#> # Nearest Neighbors (quantitative)\n#> Range: [1, 10]\n\nFor discrim_mixture(), a dials object is needed that returns an integer that is the number of sub-classes that should be create. We can create a dials parameter function for this:\n\nsub_classes <- function(range = c(1L, 10L), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(sub_classes = \"# Sub-Classes\"),\n finalize = NULL\n )\n}\n\nIf this were in the dials package, we could use:\n\ntunable.discrim_mixture <- function(x, ...) {\n tibble::tibble(\n name = c(\"sub_classes\"),\n call_info = list(list(pkg = NULL, fun = \"sub_classes\")),\n source = \"model_spec\",\n component = \"discrim_mixture\",\n component_id = \"main\"\n )\n}\n\nOnce this method is in place, the tuning functions can be used:\n\nmda_spec <- \n discrim_mixture(sub_classes = tune()) %>% \n set_engine(\"mda\")\n\nset.seed(452)\ncv <- vfold_cv(example_train)\nmda_tune_res <- mda_spec %>%\n tune_grid(Class ~ ., cv, grid = 4)\nshow_best(mda_tune_res, metric = \"roc_auc\")\n#> # A tibble: 4 × 7\n#> sub_classes .metric .estimator mean n std_err .config \n#> <int> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 2 roc_auc binary 0.890 10 0.0143 Preprocessor1_Model3\n#> 2 3 roc_auc binary 0.889 10 0.0142 Preprocessor1_Model4\n#> 3 6 roc_auc binary 0.884 10 0.0147 Preprocessor1_Model2\n#> 4 8 roc_auc binary 0.881 10 0.0146 Preprocessor1_Model1" + }, + { + "objectID": "learn/develop/models/index.html#pro-tips-what-ifs-exceptions-faq-and-minutiae", + "href": "learn/develop/models/index.html#pro-tips-what-ifs-exceptions-faq-and-minutiae", + "title": "How to build a parsnip model", + "section": "Pro-tips, what-ifs, exceptions, FAQ, and minutiae", + "text": "Pro-tips, what-ifs, exceptions, FAQ, and minutiae\nThere are various things that came to mind while developing this resource.\nDo I have to return a simple vector for predict and predict_class?\nPreviously, when discussing the pred information:\n\nFor pred, the model requires an unnamed numeric vector output (usually).\n\nThere are some models (e.g. glmnet, plsr, Cubist, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so, for these cases, the current convention is to return a tibble with the prediction in a column called values and have extra columns for any parameters that define the different sub-models.\nFor example, if I fit a linear regression model via glmnet and get four values of the regularization parameter (lambda):\n\nlinear_reg() %>%\n set_engine(\"glmnet\", nlambda = 4) %>% \n fit(mpg ~ ., data = mtcars) %>%\n multi_predict(new_data = mtcars[1:3, -1])\n\nHowever, the API is still being developed. Currently, there is not an interface in the prediction functions to pass in the values of the parameters to make predictions with (lambda, in this case).\nWhat do I do about how my model handles factors or categorical data?\nSome modeling functions in R create indicator/dummy variables from categorical data when you use a model formula (typically using model.matrix()), and some do not. Some examples of models that do not create indicator variables include tree-based models, naive Bayes models, and multilevel or hierarchical models. The tidymodels ecosystem assumes a model.matrix()-like default encoding for categorical data used in a model formula, but you can change this encoding using set_encoding(). For example, you can set predictor encodings that say, “leave my data alone,” and keep factors as is:\n\nset_encoding(\n model = \"decision_tree\",\n eng = \"rpart\",\n mode = \"regression\",\n options = list(\n predictor_indicators = \"none\",\n compute_intercept = FALSE,\n remove_intercept = FALSE\n )\n)\n\n\n\n\n\n\n\nNote\n\n\n\nThere are three options for predictor_indicators: - “none” (do not expand factor predictors) - “traditional” (apply the standard model.matrix() encoding) - “one_hot” (create the complete set including the baseline level for all factors)\n\n\nTo learn more about encoding categorical predictors, check out this blog post.\nWhat is the defaults slot and why do I need it?\nYou might want to set defaults that can be overridden by the user. For example, for logistic regression with glm, it make sense to default family = binomial. However, if someone wants to use a different link function, they should be able to do that. For that model/engine definition, it has:\n\ndefaults = list(family = expr(binomial))\n\nSo that is the default:\n\nlogistic_reg() %>% translate(engine = \"glm\")\n\n# but you can change it:\n\nlogistic_reg() %>%\n set_engine(\"glm\", family = expr(binomial(link = \"probit\"))) %>% \n translate()\n\nThat’s what defaults are for.\nNote that we wrapped binomial inside of expr(). If we didn’t, it would substitute the results of executing binomial() inside of the expression (and that’s a mess).\nWhat if I want more complex defaults?\nThe translate function can be used to check values or set defaults once the model’s mode is known. To do this, you can create a model-specific S3 method that first calls the general method (translate.model_spec()) and then makes modifications or conducts error traps.\nFor example, the ranger and randomForest package functions have arguments for calculating importance. One is a logical and the other is a string. Since this is likely to lead to a bunch of frustration and GitHub issues, we can put in a check:\n\n# Simplified version\ntranslate.rand_forest <- function (x, engine, ...){\n # Run the general method to get the real arguments in place\n x <- translate.default(x, engine, ...)\n \n # Check and see if they make sense for the engine and/or mode:\n if (x$engine == \"ranger\") {\n if (any(names(x$method$fit$args) == \"importance\")) \n if (is.logical(x$method$fit$args$importance)) \n rlang::abort(\"`importance` should be a character value. See ?ranger::ranger.\")\n }\n x\n}\n\nAs another example, nnet::nnet() has an option for the final layer to be linear (called linout). If mode = \"regression\", that should probably be set to TRUE. You couldn’t do this with the args (described above) since you need the function translated first.\nMy model fit requires more than one function call. So….?\nThe best course of action is to write wrapper so that it can be one call. This was the case with xgboost and keras.\nWhy would I preprocess my data?\nThere might be non-trivial transformations that the model prediction code requires (such as converting to a sparse matrix representation, etc.)\nThis would not include making dummy variables and model.matrix stuff. The parsnip infrastructure already does that for you.\nWhy would I post-process my predictions?\nWhat comes back from some R functions may be somewhat… arcane or problematic. As an example, for xgboost, if you fit a multi-class boosted tree, you might expect the class probabilities to come back as a matrix (narrator: they don’t). If you have four classes and make predictions on three samples, you get a vector of 12 probability values. You need to convert these to a rectangular data set.\nAnother example is the predict method for ranger, which encapsulates the actual predictions in a more complex object structure.\nThese are the types of problems that the post-processor will solve.\nAre there other modes?\nNot yet but there will be. For example, it might make sense to have a different mode when doing risk-based modeling via Cox regression models. That would enable different classes of objects and those might be needed since the types of models don’t make direct predictions of the outcome.\nIf you have a suggestion, please add a GitHub issue to discuss it." + }, + { + "objectID": "learn/develop/models/index.html#session-info", + "href": "learn/develop/models/index.html#session-info", + "title": "How to build a parsnip model", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> mda * 0.5-3 2022-05-05 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/develop/parameters/index.html", + "href": "learn/develop/parameters/index.html", + "title": "How to create a tuning parameter function", + "section": "", + "text": "To use code in this article, you will need to install the following packages: dials and scales.\nSome models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using new_quant_param() or new_qual_param(), respectively. This article is a guide to creating new parameters." + }, + { + "objectID": "learn/develop/parameters/index.html#introduction", + "href": "learn/develop/parameters/index.html#introduction", + "title": "How to create a tuning parameter function", + "section": "", + "text": "To use code in this article, you will need to install the following packages: dials and scales.\nSome models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using new_quant_param() or new_qual_param(), respectively. This article is a guide to creating new parameters." + }, + { + "objectID": "learn/develop/parameters/index.html#quantitative-parameters", + "href": "learn/develop/parameters/index.html#quantitative-parameters", + "title": "How to create a tuning parameter function", + "section": "Quantitative parameters", + "text": "Quantitative parameters\nAs an example, let’s consider the multivariate adaptive regression spline (MARS) model, which creates nonlinear features from predictors and adds them to a linear regression models. The earth package is an excellent implementation of this method.\nMARS creates an initial set of features and then prunes them back to an appropriate size. This can be done automatically by earth::earth() or the number of final terms can be set by the user. The parsnip function mars() has a parameter called num_terms that defines this.\nWhat if we want to create a parameter for the number of initial terms included in the model. There is no argument in parsnip::mars() for this but we will make one now. The argument name in earth::earth() is nk, which is not very descriptive. Our parameter will be called num_initial_terms.\nWe use the new_quant_param() function since this is a numeric parameter. The main two arguments to a numeric parameter function are range and trans.\nThe range specifies the possible values of the parameter. For our example, a minimal value might be one or two. What is the upper limit? The default in the earth package is\n\nmin(200, max(20, 2 * ncol(x))) + 1\n\nwhere x is the predictor matrix. We often put in values that are either sensible defaults or are minimal enough to work for the majority of data sets. For now, let’s specify an upper limit of 10 but this will be discussed more in the next section.\nThe other argument is trans, which represents a transformation that should be applied to the parameter values when working with them. For example, many regularization methods have a penalty parameter that tends to range between zero and some upper bound (let’s say 1). The effect of going from a penalty value of 0.01 to 0.1 is much more impactful than going from 0.9 to 1.0. In such a case, it might make sense to work with this parameter in transformed units (such as the log, in this example). If new parameter values are generated at random, it helps if they are uniformly simulated in the transformed units and then converted back to the original units.\nThe trans parameter accepts a transformation object from the scales package. For example:\n\nlibrary(scales)\nlsf.str(\"package:scales\", pattern = \"_trans$\")\n#> asn_trans : function () \n#> atanh_trans : function () \n#> boxcox_trans : function (p, offset = 0) \n#> compose_trans : function (...) \n#> date_trans : function () \n#> exp_trans : function (base = exp(1)) \n#> hms_trans : function () \n#> identity_trans : function () \n#> log_trans : function (base = exp(1)) \n#> log10_trans : function () \n#> log1p_trans : function () \n#> log2_trans : function () \n#> logit_trans : function () \n#> modulus_trans : function (p, offset = 1) \n#> probability_trans : function (distribution, ...) \n#> probit_trans : function () \n#> pseudo_log_trans : function (sigma = 1, base = exp(1)) \n#> reciprocal_trans : function () \n#> reverse_trans : function () \n#> sqrt_trans : function () \n#> time_trans : function (tz = NULL) \n#> yj_trans : function (p)\nscales::log10_trans()\n#> Transformer: log-10 [1e-100, Inf]\n\nA value of NULL means that no transformation should be used.\nA quantitative parameter function should have these two arguments and, in the function body, a call new_quant_param(). There are a few arguments to this function:\n\nlibrary(tidymodels)\nargs(new_quant_param)\n#> function (type = c(\"double\", \"integer\"), range = NULL, inclusive = NULL, \n#> default = deprecated(), trans = NULL, values = NULL, label = NULL, \n#> finalize = NULL, ..., call = caller_env()) \n#> NULL\n\n\nPossible types are double precision and integers. The value of type should agree with the values of range in the function definition.\nIt’s OK for our tuning to include the minimum or maximum, so we’ll use c(TRUE, TRUE) for inclusive. If the value cannot include one end of the range, set one or both of these values to FALSE.\nThe label should be a named character string where the name is the parameter name and the value represents what will be printed automatically.\nfinalize is an argument that can set parts of the range. This is discussed more below.\n\nHere’s an example of a basic quantitative parameter object:\n\nnum_initial_terms <- function(range = c(1L, 10L), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = NULL\n )\n}\n\nnum_initial_terms()\n#> # Initial MARS Terms (quantitative)\n#> Range: [1, 10]\n\n# Sample from the parameter:\nset.seed(4832856)\nnum_initial_terms() %>% value_sample(5)\n#> [1] 6 4 9 10 4\n\n\nFinalizing parameters\nIt might be the case that the range of the parameter is unknown. For example, parameters that are related to the number of columns in a data set cannot be exactly specified in the absence of data. In those cases, a placeholder of unknown() can be added. This will force the user to “finalize” the parameter object for their particular data set. Let’s redefine our function with an unknown() value:\n\nnum_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = NULL\n )\n}\nnum_initial_terms()\n\n# Can we sample? \nnum_initial_terms() %>% value_sample(5)\n\nThe finalize argument of num_initial_terms() can take a function that uses data to set the range. For example, the package already includes a few functions for finalization:\n\nlsf.str(\"package:dials\", pattern = \"^get_\")\n#> get_batch_sizes : function (object, x, frac = c(1/10, 1/3), ...) \n#> get_log_p : function (object, x, ...) \n#> get_n : function (object, x, log_vals = FALSE, ...) \n#> get_n_frac : function (object, x, log_vals = FALSE, frac = 1/3, ...) \n#> get_n_frac_range : function (object, x, log_vals = FALSE, frac = c(1/10, 5/10), ...) \n#> get_p : function (object, x, log_vals = FALSE, ...) \n#> get_rbf_range : function (object, x, seed = sample.int(10^5, 1), ...)\n\nThese functions generally take a data frame of predictors (in an argument called x) and add the range of the parameter object. Using the formula in the earth package, we might use:\n\nget_initial_mars_terms <- function(object, x) {\n upper_bound <- min(200, max(20, 2 * ncol(x))) + 1\n upper_bound <- as.integer(upper_bound)\n bounds <- range_get(object)\n bounds$upper <- upper_bound\n range_set(object, bounds)\n}\n\n# Use the mtcars are the finalize the upper bound: \nnum_initial_terms() %>% get_initial_mars_terms(x = mtcars[, -1])\n#> # Initial MARS Terms (quantitative)\n#> Range: [1, 21]\n\nOnce we add this function to the object, the general finalize() method can be used:\n\nnum_initial_terms <- function(range = c(1L, unknown()), trans = NULL) {\n new_quant_param(\n type = \"integer\",\n range = range,\n inclusive = c(TRUE, TRUE),\n trans = trans,\n label = c(num_initial_terms = \"# Initial MARS Terms\"),\n finalize = get_initial_mars_terms\n )\n}\n\nnum_initial_terms() %>% finalize(x = mtcars[, -1])\n#> # Initial MARS Terms (quantitative)\n#> Range: [1, 21]" + }, + { + "objectID": "learn/develop/parameters/index.html#qualitative-parameters", + "href": "learn/develop/parameters/index.html#qualitative-parameters", + "title": "How to create a tuning parameter function", + "section": "Qualitative parameters", + "text": "Qualitative parameters\nNow let’s look at an example of a qualitative parameter. If a model includes a data aggregation step, we want to allow users to tune how our parameters are aggregated. For example, in embedding methods, possible values might be min, max, mean, sum, or to not aggregate at all (“none”). Since these cannot be put on a numeric scale, they are possible values of a qualitative parameter. We’ll take “character” input (not “logical”), and we must specify the allowed values. By default we won’t aggregate.\n\naggregation <- function(values = c(\"none\", \"min\", \"max\", \"mean\", \"sum\")) {\n new_qual_param(\n type = \"character\",\n values = values,\n # By default, the first value is selected as default. We'll specify that to\n # make it clear.\n default = \"none\",\n label = c(aggregation = \"Aggregation Method\")\n )\n}\n\nWithin the dials package, the convention is to have the values contained in a separate vector whose name starts with values_. For example:\n\nvalues_aggregation <- c(\"none\", \"min\", \"max\", \"mean\", \"sum\")\naggregation <- function(values = values_aggregation) {\n new_qual_param(\n type = \"character\",\n values = values,\n # By default, the first value is selected as default. We'll specify that to\n # make it clear.\n default = \"none\",\n label = c(aggregation = \"Aggregation Method\")\n )\n}\n\nThis step may not make sense if you are using the function in a script and not keeping it within a package.\nWe can use our aggregation parameters with dials functions.\n\naggregation()\n#> Warning: The `default` argument of `new_qual_param()` is deprecated as of\n#> dials 1.1.0.\n#> Aggregation Method (qualitative)\n#> 5 possible values include:\n#> 'none', 'min', 'max', 'mean' and 'sum'\naggregation() %>% value_sample(3)\n#> [1] \"min\" \"sum\" \"mean\"" + }, + { + "objectID": "learn/develop/parameters/index.html#session-info", + "href": "learn/develop/parameters/index.html#session-info", + "title": "How to create a tuning parameter function", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/develop/recipes/index.html", + "href": "learn/develop/recipes/index.html", + "title": "Create your own recipe step function", + "section": "", + "text": "To use code in this article, you will need to install the following packages: modeldata and tidymodels.\nThere are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages can be found here. However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for centering or PCA to start.\nFor check operations (e.g. check_class()), the process is very similar. Notes on this are available at the end of this article.\nThe general process to follow is to:\n\nDefine a step constructor function.\nCreate the minimal S3 methods for prep(), bake(), and print().\nOptionally add some extra methods to work with other tidymodels packages, such as tunable() and tidy().\n\nAs an example, we will create a step for converting data into percentiles." + }, + { + "objectID": "learn/develop/recipes/index.html#introduction", + "href": "learn/develop/recipes/index.html#introduction", + "title": "Create your own recipe step function", + "section": "", + "text": "To use code in this article, you will need to install the following packages: modeldata and tidymodels.\nThere are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages can be found here. However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for centering or PCA to start.\nFor check operations (e.g. check_class()), the process is very similar. Notes on this are available at the end of this article.\nThe general process to follow is to:\n\nDefine a step constructor function.\nCreate the minimal S3 methods for prep(), bake(), and print().\nOptionally add some extra methods to work with other tidymodels packages, such as tunable() and tidy().\n\nAs an example, we will create a step for converting data into percentiles." + }, + { + "objectID": "learn/develop/recipes/index.html#a-new-step-definition", + "href": "learn/develop/recipes/index.html#a-new-step-definition", + "title": "Create your own recipe step function", + "section": "A new step definition", + "text": "A new step definition\nLet’s create a step that replaces the value of a variable with its percentile from the training set. The example data we’ll use is from the modeldata package:\n\nlibrary(modeldata)\ndata(biomass)\nstr(biomass)\n#> 'data.frame': 536 obs. of 8 variables:\n#> $ sample : chr \"Akhrot Shell\" \"Alabama Oak Wood Waste\" \"Alder\" \"Alfalfa\" ...\n#> $ dataset : chr \"Training\" \"Training\" \"Training\" \"Training\" ...\n#> $ carbon : num 49.8 49.5 47.8 45.1 46.8 ...\n#> $ hydrogen: num 5.64 5.7 5.8 4.97 5.4 5.75 5.99 5.7 5.5 5.9 ...\n#> $ oxygen : num 42.9 41.3 46.2 35.6 40.7 ...\n#> $ nitrogen: num 0.41 0.2 0.11 3.3 1 2.04 2.68 1.7 0.8 1.2 ...\n#> $ sulfur : num 0 0 0.02 0.16 0.02 0.1 0.2 0.2 0 0.1 ...\n#> $ HHV : num 20 19.2 18.3 18.2 18.4 ...\n\nbiomass_tr <- biomass[biomass$dataset == \"Training\",]\nbiomass_te <- biomass[biomass$dataset == \"Testing\",]\n\nTo illustrate the transformation with the carbon variable, note the training set distribution of this variable with a vertical line below for the first value of the test set.\n\nlibrary(ggplot2)\ntheme_set(theme_bw())\nggplot(biomass_tr, aes(x = carbon)) + \n geom_histogram(binwidth = 5, col = \"blue\", fill = \"blue\", alpha = .5) + \n geom_vline(xintercept = biomass_te$carbon[1], lty = 2)\n\n\n\n\n\n\n\n\nBased on the training set, 42.1% of the data are less than a value of 46.35. There are some applications where it might be advantageous to represent the predictor values as percentiles rather than their original values.\nOur new step will do this computation for any numeric variables of interest. We will call this new recipe step step_percentile(). The code below is designed for illustration and not speed or best practices. We’ve left out a lot of error trapping that we would want in a real implementation." + }, + { + "objectID": "learn/develop/recipes/index.html#create-the-function", + "href": "learn/develop/recipes/index.html#create-the-function", + "title": "Create your own recipe step function", + "section": "Create the function", + "text": "Create the function\nTo start, there is a user-facing function. Let’s call that step_percentile(). This is just a simple wrapper around a constructor function, which defines the rules for any step object that defines a percentile transformation. We’ll call this constructor step_percentile_new().\nThe function step_percentile() takes the same arguments as your function and simply adds it to a new recipe. The ... signifies the variable selectors that can be used.\n\nstep_percentile <- function(\n recipe, \n ..., \n role = NA, \n trained = FALSE, \n ref_dist = NULL,\n options = list(probs = (0:100)/100, names = TRUE),\n skip = FALSE,\n id = rand_id(\"percentile\")\n ) {\n\n ## The variable selectors are not immediately evaluated by using\n ## the `quos()` function in `rlang`. `ellipse_check()` captures \n ## the values and also checks to make sure that they are not empty. \n terms <- ellipse_check(...) \n\n add_step(\n recipe, \n step_percentile_new(\n terms = terms, \n trained = trained,\n role = role, \n ref_dist = ref_dist,\n options = options,\n skip = skip,\n id = id\n )\n )\n}\n\nYou should always keep the first four arguments (recipe though trained) the same as listed above. Some notes:\n\nthe role argument is used when you either 1) create new variables and want their role to be pre-set or 2) replace the existing variables with new values. The latter is what we will be doing and using role = NA will leave the existing role intact.\ntrained is set by the package when the estimation step has been run. You should default your function definition’s argument to FALSE.\nskip is a logical. Whenever a recipe is prepped, each step is trained and then baked. However, there are some steps that should not be applied when a call to bake() is used. For example, if a step is applied to the variables with roles of “outcomes”, these data would not be available for new samples.\nid is a character string that can be used to identify steps in package code. rand_id() will create an ID that has the prefix and a random character sequence.\n\nWe can estimate the percentiles of new data points based on the percentiles from the training set with approx(). Our step_percentile contains a ref_dist object to store these percentiles (pre-computed from the training set in prep()) for later use in bake().\nWe will use stats::quantile() to compute the grid. However, we might also want to have control over the granularity of this grid, so the options argument will be used to define how that calculation is done. We could use the ellipses (aka ...) so that any options passed to step_percentile() that are not one of its arguments will then be passed to stats::quantile(). However, we recommend making a separate list object with the options and use these inside the function because ... is already used to define the variable selection.\nIt is also important to consider if there are any main arguments to the step. For example, for spline-related steps such as step_ns(), users typically want to adjust the argument for the degrees of freedom in the spline (e.g. splines::ns(x, df)). Rather than letting users add df to the options argument:\n\nAllow the important arguments to be main arguments to the step function.\nFollow the tidymodels conventions for naming arguments. Whenever possible, avoid jargon and keep common argument names.\n\nThere are benefits to following these principles (as shown below)." + }, + { + "objectID": "learn/develop/recipes/index.html#initialize-a-new-object", + "href": "learn/develop/recipes/index.html#initialize-a-new-object", + "title": "Create your own recipe step function", + "section": "Initialize a new object", + "text": "Initialize a new object\nNow, the constructor function can be created.\nThe function cascade is:\nstep_percentile() calls recipes::add_step()\n└──> recipes::add_step() calls step_percentile_new()\n └──> step_percentile_new() calls recipes::step()\nstep() is a general constructor for recipes that mainly makes sure that the resulting step object is a list with an appropriate S3 class structure. Using subclass = \"percentile\" will set the class of new objects to \"step_percentile\".\n\nstep_percentile_new <- \n function(terms, role, trained, ref_dist, options, skip, id) {\n step(\n subclass = \"percentile\", \n terms = terms,\n role = role,\n trained = trained,\n ref_dist = ref_dist,\n options = options,\n skip = skip,\n id = id\n )\n }\n\nThis constructor function should have no default argument values. Defaults should be set in the user-facing step object." + }, + { + "objectID": "learn/develop/recipes/index.html#create-the-prep-method", + "href": "learn/develop/recipes/index.html#create-the-prep-method", + "title": "Create your own recipe step function", + "section": "Create the prep method", + "text": "Create the prep method\nYou will need to create a new prep() method for your step’s class. To do this, three arguments that the method should have are:\nfunction(x, training, info = NULL)\nwhere\n\nx will be the step_percentile object,\ntraining will be a tibble that has the training set data, and\ninfo will also be a tibble that has information on the current set of data available. This information is updated as each step is evaluated by its specific prep() method so it may not have the variables from the original data. The columns in this tibble are variable (the variable name), type (currently either “numeric” or “nominal”), role (defining the variable’s role), and source (either “original” or “derived” depending on where it originated).\n\nYou can define other arguments as well.\nThe first thing that you might want to do in the prep() function is to translate the specification listed in the terms argument to column names in the current data. There is a function called recipes_eval_select() that can be used to obtain this.\n\n\n\n\n\n\nWarning\n\n\n\nThe recipes_eval_select() function is not one you interact with as a typical recipes user, but it is helpful if you develop your own custom recipe steps.\n\n\n\nprep.step_percentile <- function(x, training, info = NULL, ...) {\n col_names <- recipes_eval_select(x$terms, training, info) \n # TODO finish the rest of the function\n}\n\nAfter this function call, it is a good idea to check that the selected columns have the appropriate type (e.g. numeric for this example). See recipes::check_type() to do this for basic types.\nOnce we have this, we can save the approximation grid. For the grid, we will use a helper function that enables us to run rlang::exec() to splice in any extra arguments contained in the options list to the call to quantile():\n\nget_train_pctl <- function(x, args = NULL) {\n res <- rlang::exec(\"quantile\", x = x, !!!args)\n # Remove duplicate percentile values\n res[!duplicated(res)]\n}\n\n# For example:\nget_train_pctl(biomass_tr$carbon, list(probs = 0:1))\n#> 0% 100% \n#> 14.61 97.18\nget_train_pctl(biomass_tr$carbon)\n#> 0% 25% 50% 75% 100% \n#> 14.610 44.715 47.100 49.725 97.180\n\nNow, the prep() method can be created:\n\nprep.step_percentile <- function(x, training, info = NULL, ...) {\n col_names <- recipes_eval_select(x$terms, training, info)\n ## You can add error trapping for non-numeric data here and so on. \n \n ## We'll use the names later so make sure they are available\n if (x$options$names == FALSE) {\n rlang::abort(\"`names` should be set to TRUE\")\n }\n \n if (!any(names(x$options) == \"probs\")) {\n x$options$probs <- (0:100)/100\n } else {\n x$options$probs <- sort(unique(x$options$probs))\n }\n \n # Compute percentile grid\n ref_dist <- purrr::map(training[, col_names], get_train_pctl, args = x$options)\n\n ## Use the constructor function to return the updated object. \n ## Note that `trained` is now set to TRUE\n \n step_percentile_new(\n terms = x$terms, \n trained = TRUE,\n role = x$role, \n ref_dist = ref_dist,\n options = x$options,\n skip = x$skip,\n id = x$id\n )\n}\n\nWe suggest favoring rlang::abort() and rlang::warn() over stop() and warning(). The former can be used for better traceback results." + }, + { + "objectID": "learn/develop/recipes/index.html#create-the-bake-method", + "href": "learn/develop/recipes/index.html#create-the-bake-method", + "title": "Create your own recipe step function", + "section": "Create the bake method", + "text": "Create the bake method\nRemember that the prep() function does not apply the step to the data; it only estimates any required values such as ref_dist. We will need to create a new method for our step_percentile() class. The minimum arguments for this are\nfunction(object, new_data, ...)\nwhere object is the updated step function that has been through the corresponding prep() code and new_data is a tibble of data to be processed.\nHere is the code to convert the new data to percentiles. The input data (x below) comes in as a numeric vector and the output is a vector of approximate percentiles:\n\npctl_by_approx <- function(x, ref) {\n # In case duplicates were removed, get the percentiles from\n # the names of the reference object\n grid <- as.numeric(gsub(\"%$\", \"\", names(ref))) \n approx(x = ref, y = grid, xout = x)$y/100\n}\n\nThese computations are done column-wise using purrr::map2_dfc() to modify the new data in-place:\n\nbake.step_percentile <- function(object, new_data, ...) {\n ## For illustration (and not speed), we will loop through the affected variables\n ## and do the computations\n vars <- names(object$ref_dist)\n \n new_data[, vars] <-\n purrr::map2_dfc(new_data[, vars], object$ref_dist, pctl_by_approx)\n \n ## Always convert to tibbles on the way out\n tibble::as_tibble(new_data)\n}\n\n\n\n\n\n\n\nNote\n\n\n\nYou need to import recipes::prep() and recipes::bake() to create your own step function in a package." + }, + { + "objectID": "learn/develop/recipes/index.html#run-the-example", + "href": "learn/develop/recipes/index.html#run-the-example", + "title": "Create your own recipe step function", + "section": "Run the example", + "text": "Run the example\nLet’s use the example data to make sure that it works:\n\nrec_obj <- \n recipe(HHV ~ ., data = biomass_tr) %>%\n step_percentile(ends_with(\"gen\")) %>%\n prep(training = biomass_tr)\n\nbiomass_te %>% select(ends_with(\"gen\")) %>% slice(1:2)\nbake(rec_obj, biomass_te %>% slice(1:2), ends_with(\"gen\"))\n\n# Checking to get approximate result: \nmean(biomass_tr$hydrogen <= biomass_te$hydrogen[1])\nmean(biomass_tr$oxygen <= biomass_te$oxygen[1])\n\nThe plot below shows how the original hydrogen percentiles line up with the estimated values:\n\nhydrogen_values <- \n bake(rec_obj, biomass_te, hydrogen) %>% \n bind_cols(biomass_te %>% select(original = hydrogen))\n\nggplot(biomass_tr, aes(x = hydrogen)) + \n # Plot the empirical distribution function of the \n # hydrogen training set values as a black line\n stat_ecdf() + \n # Overlay the estimated percentiles for the new data: \n geom_point(data = hydrogen_values, \n aes(x = original, y = hydrogen), \n col = \"red\", alpha = .5, cex = 2) + \n labs(x = \"New Hydrogen Values\", y = \"Percentile Based on Training Set\")\n\nThese line up very nicely!" + }, + { + "objectID": "learn/develop/recipes/index.html#custom-check-operations", + "href": "learn/develop/recipes/index.html#custom-check-operations", + "title": "Create your own recipe step function", + "section": "Custom check operations", + "text": "Custom check operations\nThe process here is exactly the same as steps; the internal functions have a similar naming convention:\n\nadd_check() instead of add_step()\ncheck() instead of step(), and so on.\n\nIt is strongly recommended that:\n\nThe operations start with check_ (i.e. check_range() and check_range_new())\nThe check uses rlang::abort(paste0(...)) when the conditions are not met\nThe original data are returned (unaltered) by the check when the conditions are satisfied." + }, + { + "objectID": "learn/develop/recipes/index.html#other-step-methods", + "href": "learn/develop/recipes/index.html#other-step-methods", + "title": "Create your own recipe step function", + "section": "Other step methods", + "text": "Other step methods\nThere are a few other S3 methods that can be created for your step function. They are not required unless you plan on using your step in the broader tidymodels package set.\n\nA print method\nIf you don’t add a print method for step_percentile, it will still print but it will be printed as a list of (potentially large) objects and look a bit ugly. The recipes package contains a helper function called printer() that should be useful in most cases. We are using it here for the custom print method for step_percentile. It requires the original terms specification and the column names this specification is evaluated to by prep(). For the former, our step object is structured so that the list object ref_dist has the names of the selected variables:\n\nprint.step_percentile <-\n function(x, width = max(20, options()$width - 35), ...) {\n cat(\"Percentile transformation on \", sep = \"\")\n printer(\n # Names before prep (could be selectors)\n untr_obj = x$terms,\n # Names after prep:\n tr_obj = names(x$ref_dist),\n # Has it been prepped? \n trained = x$trained,\n # An estimate of how many characters to print on a line: \n width = width\n )\n invisible(x)\n }\n\n# Results before `prep()`:\nrecipe(HHV ~ ., data = biomass_tr) %>%\n step_percentile(ends_with(\"gen\"))\n\n# Results after `prep()`: \nrec_obj\n\n\n\nMethods for declaring required packages\nSome recipe steps use functions from other packages. When this is the case, the step_*() function should check to see if the package is installed. The function recipes::recipes_pkg_check() will do this. For example:\n> recipes::recipes_pkg_check(\"some_package\")\n1 package is needed for this step and is not installed. (some_package). Start \na clean R session then run: install.packages(\"some_package\")\nThere is an S3 method that can be used to declare what packages should be loaded when using the step. For a hypothetical step that relies on the hypothetical package, this might look like:\n\nrequired_pkgs.step_hypothetical <- function(x, ...) {\n c(\"hypothetical\", \"myrecipespkg\")\n}\n\nIn this example, myrecipespkg is the package where the step resides (if it is in a package).\nThe reason to declare what packages should be loaded is parallel processing. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a recipe step is not loaded in the worker processes, the prep() methods cannot be found and an error occurs.\nIf this S3 method is used for your step, you can rely on this for checking the installation:\n\nrecipes::recipes_pkg_check(required_pkgs.step_hypothetical())\n\nIf you’d like an example of this in a package, please take a look at the embed or themis package.\n\n\nA tidy method\nThe broom::tidy() method is a means to return information about the step in a usable format. For our step, it would be helpful to know the reference values.\nWhen the recipe has been prepped, those data are in the list ref_dist. A small function can be used to reformat that data into a tibble. It is customary to return the main values as value:\n\nformat_pctl <- function(x) {\n tibble::tibble(\n value = unname(x),\n percentile = as.numeric(gsub(\"%$\", \"\", names(x))) \n )\n}\n\n# For example: \npctl_step_object <- rec_obj$steps[[1]]\npctl_step_object\nformat_pctl(pctl_step_object$ref_dist[[\"hydrogen\"]])\n\nThe tidy method could return these values for each selected column. Before prep(), missing values can be used as placeholders.\n\ntidy.step_percentile <- function(x, ...) {\n if (is_trained(x)) {\n res <- map_dfr(x$ref_dist, format_pctl, .id = \"term\")\n }\n else {\n term_names <- sel2char(x$terms)\n res <-\n tibble(\n terms = term_names,\n value = rlang::na_dbl,\n percentile = rlang::na_dbl\n )\n }\n # Always return the step id: \n res$id <- x$id\n res\n}\n\ntidy(rec_obj, number = 1)\n\n\n\nMethods for tuning parameters\nThe tune package can be used to find reasonable values of step arguments by model tuning. There are some S3 methods that are useful to define for your step. The percentile example doesn’t really have any tunable parameters, so we will demonstrate using step_poly(), which returns a polynomial expansion of selected columns. Its function definition has the arguments:\n\nargs(step_poly)\n\nThe argument degree is tunable.\nTo work with tune it is helpful (but not required) to use an S3 method called tunable() to define which arguments should be tuned and how values of those arguments should be generated.\ntunable() takes the step object as its argument and returns a tibble with columns:\n\nname: The name of the argument.\ncall_info: A list that describes how to call a function that returns a dials parameter object.\nsource: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just \"recipe\".\ncomponent: A character string with more information about the source. For recipes, this is just the name of the step (e.g. \"step_poly\").\ncomponent_id: A character string to indicate where a unique identifier is for the object. For recipes, this is just the id value of the step object.\n\nThe main piece of information that requires some detail is call_info. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object.\nFor example, for a nearest-neighbors neighbors parameter, this value is just:\n\ninfo <- list(pkg = \"dials\", fun = \"neighbors\")\n\n# FYI: how it is used under-the-hood: \nnew_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg)\nrlang::eval_tidy(new_param_call)\n\nFor step_poly(), a dials object is needed that returns an integer that is the number of new columns to create. It turns out that there are a few different types of tuning parameters related to degree:\n> lsf.str(\"package:dials\", pattern = \"degree\")\ndegree : function (range = c(1, 3), trans = NULL) \ndegree_int : function (range = c(1L, 3L), trans = NULL) \nprod_degree : function (range = c(1L, 2L), trans = NULL) \nspline_degree : function (range = c(3L, 10L), trans = NULL) \nLooking at the range values, some return doubles and others return integers. For our problem, degree_int() would be a good choice.\nFor step_poly() the tunable() S3 method could be:\n\ntunable.step_poly <- function (x, ...) {\n tibble::tibble(\n name = c(\"degree\"),\n call_info = list(list(pkg = \"dials\", fun = \"degree_int\")),\n source = \"recipe\",\n component = \"step_poly\",\n component_id = x$id\n )\n}" + }, + { + "objectID": "learn/develop/recipes/index.html#session-info", + "href": "learn/develop/recipes/index.html#session-info", + "title": "Create your own recipe step function", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/index.html", + "href": "learn/index.html", + "title": "Learn", + "section": "", + "text": "After you know what you need to get started with tidymodels, you can learn more and go further. Find articles here to help you solve specific problems using the tidymodels framework.\n\n\n\n\n\n\n\n\n\n\n\n\nA predictive modeling case study\n\n\n\nmodel fitting\n\n\ntuning\n\n\nparsnip\n\n\nrecipes\n\n\nrsample\n\n\nworkflows\n\n\ntune\n\n\n\nDevelop, from beginning to end, a predictive model using best practices.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nBootstrap resampling and tidy regression models\n\n\n\nstatistical analysis\n\n\nbootstraping\n\n\ntidying results\n\n\nconfidence intervals\n\n\n\nApply bootstrap resampling to estimate uncertainty in model parameters.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nBuild a model\n\n\n\nmodel fitting\n\n\nparsnip\n\n\nbroom\n\n\n\nGet started by learning how to specify and train a model using tidymodels.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nClassification models using a neural network\n\n\n\nmodel fitting\n\n\ntorch\n\n\nneural networks\n\n\n\nTrain a classification model and evaluate its performance.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCorrelation and regression fundamentals with tidy data principles\n\n\n\nstatistical analysis\n\n\ncorrelation\n\n\ntidying results\n\n\n\nAnalyze the results of correlation tests and simple regression models for many data sets at once.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCreate your own broom tidier methods\n\n\n\ndeveloper tools\n\n\n\nWrite tidy(), glance(), and augment() methods for new model objects.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCreate your own recipe step function\n\n\n\ndeveloper tools\n\n\n\nWrite a new recipe step for data preprocessing.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCreating case weights based on time\n\n\n\nmodel fitting\n\n\ncase weights\n\n\ntime series\n\n\n\nCreate models that use coefficients, extract them from fitted models, and visualize them.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCustom performance metrics\n\n\n\ndeveloper tools\n\n\n\nCreate a new performance metric and integrate it with yardstick functions.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nEvaluate your model with resampling\n\n\n\nresampling\n\n\nrsample\n\n\nparsnip\n\n\ntune\n\n\nworkflows\n\n\nyardstick\n\n\n\nMeasure model performance by generating different versions of the training data through resampling.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHow to build a parsnip model\n\n\n\ndeveloper tools\n\n\n\nCreate a parsnip model function from an existing model implementation.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHow to create a tuning parameter function\n\n\n\ndeveloper tools\n\n\n\nBuild functions to use in tuning both quantitative and qualitative parameters.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHypothesis testing using resampling and tidy data\n\n\n\nstatistical analysis\n\n\nhypothesis testing\n\n\nbootstraping\n\n\n\nPerform common hypothesis tests for statistical inference using flexible functions.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nIterative Bayesian optimization of a classification model\n\n\n\nmodel tuning\n\n\nBayesian optimization\n\n\nSVMs\n\n\n\nIdentify the best hyperparameters for a model using Bayesian optimization of iterative search.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nK-means clustering with tidy data principles\n\n\n\nstatistical analysis\n\n\nclustering\n\n\ntidying results\n\n\n\nSummarize clustering characteristics and estimate the best number of clusters for a data set.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nModel tuning via grid search\n\n\n\nmodel tuning\n\n\nSVMs\n\n\n\nChoose hyperparameters for a model by training on a grid of many possible parameter values.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nModeling time series with tidy resampling\n\n\n\nmodel fitting\n\n\ntime series\n\n\n\nCalculate performance estimates for time series forecasts using resampling.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nMultivariate analysis using partial least squares\n\n\n\npre-processing\n\n\nmultivariate analysis\n\n\npartial least squares\n\n\n\nBuild and fit a predictive model with more than one outcome.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nNested resampling\n\n\n\nnested resampling\n\n\nSVMs\n\n\n\nEstimate the best hyperparameters for a model using nested resampling.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPreprocess your data with recipes\n\n\n\npre-processing\n\n\nrecipes\n\n\nparsnip\n\n\nworkflows\n\n\nyardstick\n\n\nbroom\n\n\n\nPrepare data for modeling with modular preprocessing steps.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nRegression models two ways\n\n\n\nmodel fitting\n\n\nrandom forests\n\n\nlinear regression\n\n\n\nCreate and train different kinds of regression models with different computational engines.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nStatistical analysis of contingency tables\n\n\n\nstatistical analysis\n\n\nanalysis of tables\n\n\nhypothesis testing\n\n\n\nUse tests of independence and goodness of fit to analyze tables of counts.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSubsampling for class imbalances\n\n\n\nmodel fitting\n\n\npre-processing\n\n\nclass imbalances\n\n\ndiscriminant analysis\n\n\n\nImprove model performance in imbalanced data sets through undersampling or oversampling.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTune model parameters\n\n\n\ntuning\n\n\nrsample\n\n\nparsnip\n\n\ntune\n\n\ndials\n\n\nworkflows\n\n\nyardstick\n\n\n\nEstimate the best values for hyperparameters that cannot be learned directly during model training.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nTuning text models\n\n\n\nmodel tuning\n\n\ntext analysis\n\n\nlogistic regression\n\n\nBayesian optimization\n\n\nextracting results\n\n\n\nPrepare text data for predictive modeling and tune with both grid and iterative search.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nWorking with model coefficients\n\n\n\nmodel fitting\n\n\ntidying results\n\n\nlinear regression\n\n\nmodel tuning\n\n\n\nCreate models that use coefficients, extract them from fitted models, and visualize them.\n\n\n\n\n\n\n\n\n\n\n\n\nNo matching items" + }, + { + "objectID": "learn/models/coefficients/index.html", + "href": "learn/models/coefficients/index.html", + "title": "Working with model coefficients", + "section": "", + "text": "There are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients.\nThis article describes how to retrieve the estimated coefficients from models fit using tidymodels. To use code in this article, you will need to install the following packages: glmnet and tidymodels." + }, + { + "objectID": "learn/models/coefficients/index.html#introduction", + "href": "learn/models/coefficients/index.html#introduction", + "title": "Working with model coefficients", + "section": "", + "text": "There are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients.\nThis article describes how to retrieve the estimated coefficients from models fit using tidymodels. To use code in this article, you will need to install the following packages: glmnet and tidymodels." + }, + { + "objectID": "learn/models/coefficients/index.html#linear-regression", + "href": "learn/models/coefficients/index.html#linear-regression", + "title": "Working with model coefficients", + "section": "Linear regression", + "text": "Linear regression\nLet’s start with a linear regression model:\n\\[\\hat{y} = \\hat{\\beta}_0 + \\hat{\\beta}_1x_1 + \\ldots + \\hat{\\beta}_px_p\\]\nThe \\(\\beta\\) values are the coefficients and the \\(x_j\\) are model predictors, or features.\nLet’s use the Chicago train data where we predict the ridership at the Clark and Lake station (column name: ridership) with the previous ridership data 14 days prior at three of the stations.\nThe data are in the modeldata package:\n\nlibrary(tidymodels)\ntidymodels_prefer()\ntheme_set(theme_bw())\n\ndata(Chicago)\n\nChicago <- Chicago %>% select(ridership, Clark_Lake, Austin, Harlem)\n\n\nA single model\nLet’s start by fitting only a single parsnip model object. We’ll create a model specification using linear_reg().\n\n\n\n\n\n\nNote\n\n\n\nThe default engine is \"lm\" so no call to set_engine() is required.\n\n\nThe fit() function estimates the model coefficients, given a formula and data set.\n\nlm_spec <- linear_reg()\nlm_fit <- fit(lm_spec, ridership ~ ., data = Chicago)\nlm_fit\n#> parsnip model object\n#> \n#> \n#> Call:\n#> stats::lm(formula = ridership ~ ., data = data)\n#> \n#> Coefficients:\n#> (Intercept) Clark_Lake Austin Harlem \n#> 1.6778 0.9035 0.6123 -0.5550\n\nThe best way to retrieve the fitted parameters is to use the tidy() method. This function, in the broom package, returns the coefficients and their associated statistics in a data frame with standardized column names:\n\ntidy(lm_fit)\n#> # A tibble: 4 × 5\n#> term estimate std.error statistic p.value\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) 1.68 0.156 10.7 1.11e- 26\n#> 2 Clark_Lake 0.904 0.0280 32.3 5.14e-210\n#> 3 Austin 0.612 0.320 1.91 5.59e- 2\n#> 4 Harlem -0.555 0.165 -3.36 7.85e- 4\n\nWe’ll use this function in subsequent sections.\n\n\nResampled or tuned models\nThe tidymodels framework emphasizes the use of resampling methods to evaluate and characterize how well a model works. While time series resampling methods are appropriate for these data, we can also use the bootstrap to resample the data. This is a standard resampling approach when evaluating the uncertainty in statistical estimates.\nWe’ll use five bootstrap resamples of the data to simplify the plots and output (normally, we would use a larger number of resamples for more reliable estimates).\n\nset.seed(123)\nbt <- bootstraps(Chicago, times = 5)\n\nWith resampling, we fit the same model to the different simulated versions of the data set produced by resampling. The tidymodels function fit_resamples() is the recommended approach for doing so.\n\n\n\n\n\n\nWarning\n\n\n\nThe fit_resamples() function does not automatically save the model objects for each resample since these can be quite large and its main purpose is estimating performance. However, we can pass a function to fit_resamples() that can save the model object or any other aspect of the fit.\n\n\nThis function takes a single argument that represents the fitted workflow object (even if you don’t give fit_resamples() a workflow).\nFrom this, we can extract the model fit. There are two “levels” of model objects that are available:\n\nThe parsnip model object, which wraps the underlying model object. We retrieve this using the extract_fit_parsnip() function.\nThe underlying model object (a.k.a. the engine fit) via the extract_fit_engine().\n\nWe’ll use the latter option and then tidy this model object as we did in the previous section. Let’s add this to the control function so that we can re-use it.\n\nget_lm_coefs <- function(x) {\n x %>% \n # get the lm model object\n extract_fit_engine() %>% \n # transform its format\n tidy()\n}\ntidy_ctrl <- control_grid(extract = get_lm_coefs)\n\nThis argument is then passed to fit_resamples():\n\nlm_res <- \n lm_spec %>% \n fit_resamples(ridership ~ ., resamples = bt, control = tidy_ctrl)\nlm_res\n#> # Resampling results\n#> # Bootstrap sampling \n#> # A tibble: 5 × 5\n#> splits id .metrics .notes .extracts\n#> <list> <chr> <list> <list> <list> \n#> 1 <split [5698/2076]> Bootstrap1 <tibble [2 × 4]> <tibble [0 × 3]> <tibble> \n#> 2 <split [5698/2098]> Bootstrap2 <tibble [2 × 4]> <tibble [0 × 3]> <tibble> \n#> 3 <split [5698/2064]> Bootstrap3 <tibble [2 × 4]> <tibble [0 × 3]> <tibble> \n#> 4 <split [5698/2082]> Bootstrap4 <tibble [2 × 4]> <tibble [0 × 3]> <tibble> \n#> 5 <split [5698/2088]> Bootstrap5 <tibble [2 × 4]> <tibble [0 × 3]> <tibble>\n\nNote that there is a .extracts column in our resampling results. This object contains the output of our get_lm_coefs() function for each resample. The structure of the elements of this column is a little complex. Let’s start by looking at the first element (which corresponds to the first resample):\n\nlm_res$.extracts[[1]]\n#> # A tibble: 1 × 2\n#> .extracts .config \n#> <list> <chr> \n#> 1 <tibble [4 × 5]> Preprocessor1_Model1\n\nThere is another column in this element called .extracts that has the results of the tidy() function call:\n\nlm_res$.extracts[[1]]$.extracts[[1]]\n#> # A tibble: 4 × 5\n#> term estimate std.error statistic p.value\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) 1.40 0.157 8.90 7.23e- 19\n#> 2 Clark_Lake 0.842 0.0280 30.1 2.39e-184\n#> 3 Austin 1.46 0.320 4.54 5.70e- 6\n#> 4 Harlem -0.637 0.163 -3.92 9.01e- 5\n\nThese nested columns can be flattened via the purrr unnest() function:\n\nlm_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) \n#> # A tibble: 5 × 3\n#> id .extracts .config \n#> <chr> <list> <chr> \n#> 1 Bootstrap1 <tibble [4 × 5]> Preprocessor1_Model1\n#> 2 Bootstrap2 <tibble [4 × 5]> Preprocessor1_Model1\n#> 3 Bootstrap3 <tibble [4 × 5]> Preprocessor1_Model1\n#> 4 Bootstrap4 <tibble [4 × 5]> Preprocessor1_Model1\n#> 5 Bootstrap5 <tibble [4 × 5]> Preprocessor1_Model1\n\nWe still have a column of nested tibbles, so we can run the same command again to get the data into a more useful format:\n\nlm_coefs <- \n lm_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n unnest(.extracts)\n\nlm_coefs %>% select(id, term, estimate, p.value)\n#> # A tibble: 20 × 4\n#> id term estimate p.value\n#> <chr> <chr> <dbl> <dbl>\n#> 1 Bootstrap1 (Intercept) 1.40 7.23e- 19\n#> 2 Bootstrap1 Clark_Lake 0.842 2.39e-184\n#> 3 Bootstrap1 Austin 1.46 5.70e- 6\n#> 4 Bootstrap1 Harlem -0.637 9.01e- 5\n#> 5 Bootstrap2 (Intercept) 1.69 2.87e- 28\n#> 6 Bootstrap2 Clark_Lake 0.911 1.06e-219\n#> 7 Bootstrap2 Austin 0.595 5.93e- 2\n#> 8 Bootstrap2 Harlem -0.580 3.88e- 4\n#> 9 Bootstrap3 (Intercept) 1.27 3.43e- 16\n#> 10 Bootstrap3 Clark_Lake 0.859 5.03e-194\n#> 11 Bootstrap3 Austin 1.09 6.77e- 4\n#> 12 Bootstrap3 Harlem -0.470 4.34e- 3\n#> 13 Bootstrap4 (Intercept) 1.95 2.91e- 34\n#> 14 Bootstrap4 Clark_Lake 0.974 1.47e-233\n#> 15 Bootstrap4 Austin -0.116 7.21e- 1\n#> 16 Bootstrap4 Harlem -0.620 2.11e- 4\n#> 17 Bootstrap5 (Intercept) 1.87 1.98e- 33\n#> 18 Bootstrap5 Clark_Lake 0.901 1.16e-210\n#> 19 Bootstrap5 Austin 0.494 1.15e- 1\n#> 20 Bootstrap5 Harlem -0.512 1.73e- 3\n\nThat’s better! Now, let’s plot the model coefficients for each resample:\n\nlm_coefs %>%\n filter(term != \"(Intercept)\") %>% \n ggplot(aes(x = term, y = estimate, group = id, col = id)) + \n geom_hline(yintercept = 0, lty = 3) + \n geom_line(alpha = 0.3, lwd = 1.2) + \n labs(y = \"Coefficient\", x = NULL) +\n theme(legend.position = \"top\")\n\n\n\n\n\n\n\n\nThere seems to be a lot of uncertainty in the coefficient for the Austin station data, but less for the other two.\nLooking at the code for unnesting the results, you may find the double-nesting structure excessive or cumbersome. However, the extraction functionality is flexible, and a simpler structure would prevent many use cases." + }, + { + "objectID": "learn/models/coefficients/index.html#more-complex-a-glmnet-model", + "href": "learn/models/coefficients/index.html#more-complex-a-glmnet-model", + "title": "Working with model coefficients", + "section": "More complex: a glmnet model", + "text": "More complex: a glmnet model\nThe glmnet model can fit the same linear regression model structure shown above. It uses regularization (a.k.a penalization) to estimate the model parameters. This has the benefit of shrinking the coefficients towards zero, important in situations where there are strong correlations between predictors or if some feature selection is required. Both of these cases are true for our Chicago train data set.\nThere are two types of penalization that this model uses:\n\nLasso (a.k.a. \\(L_1\\)) penalties can shrink the model terms so much that they are absolute zero (i.e. their effect is entirely removed from the model).\nWeight decay (a.k.a ridge regression or \\(L_2\\)) uses a different type of penalty that is most useful for highly correlated predictors.\n\nThe glmnet model has two primary tuning parameters, the total amount of penalization and the mixture of the two penalty types. For example, this specification:\n\nglmnet_spec <- \n linear_reg(penalty = 0.1, mixture = 0.95) %>% \n set_engine(\"glmnet\")\n\nhas a penalty that is 95% lasso and 5% weight decay. The total amount of these two penalties is 0.1 (which is fairly high).\n\n\n\n\n\n\nNote\n\n\n\nModels with regularization require that predictors are all on the same scale. The ridership at our three stations are very different, but glmnet automatically centers and scales the data. You can use recipes to center and scale your data yourself.\n\n\nLet’s combine the model specification with a formula in a model workflow() and then fit the model to the data:\n\nglmnet_wflow <- \n workflow() %>% \n add_model(glmnet_spec) %>% \n add_formula(ridership ~ .)\n\nglmnet_fit <- fit(glmnet_wflow, Chicago)\nglmnet_fit\n#> ══ Workflow [trained] ════════════════════════════════════════════════\n#> Preprocessor: Formula\n#> Model: linear_reg()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> ridership ~ .\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> \n#> Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = \"gaussian\", alpha = ~0.95) \n#> \n#> Df %Dev Lambda\n#> 1 0 0.00 6.1040\n#> 2 1 12.75 5.5620\n#> 3 1 23.45 5.0680\n#> 4 1 32.43 4.6180\n#> 5 1 39.95 4.2070\n#> 6 1 46.25 3.8340\n#> 7 1 51.53 3.4930\n#> 8 1 55.94 3.1830\n#> 9 1 59.62 2.9000\n#> 10 1 62.70 2.6420\n#> 11 2 65.28 2.4080\n#> 12 2 67.44 2.1940\n#> 13 2 69.23 1.9990\n#> 14 2 70.72 1.8210\n#> 15 2 71.96 1.6600\n#> 16 2 73.00 1.5120\n#> 17 2 73.86 1.3780\n#> 18 2 74.57 1.2550\n#> 19 2 75.17 1.1440\n#> 20 2 75.66 1.0420\n#> 21 2 76.07 0.9496\n#> 22 2 76.42 0.8653\n#> 23 2 76.70 0.7884\n#> 24 2 76.94 0.7184\n#> 25 2 77.13 0.6545\n#> 26 2 77.30 0.5964\n#> 27 2 77.43 0.5434\n#> 28 2 77.55 0.4951\n#> 29 2 77.64 0.4512\n#> 30 2 77.72 0.4111\n#> 31 2 77.78 0.3746\n#> 32 2 77.84 0.3413\n#> 33 2 77.88 0.3110\n#> 34 2 77.92 0.2833\n#> 35 2 77.95 0.2582\n#> 36 2 77.98 0.2352\n#> 37 2 78.00 0.2143\n#> 38 2 78.01 0.1953\n#> 39 2 78.03 0.1779\n#> 40 2 78.04 0.1621\n#> 41 2 78.05 0.1477\n#> 42 2 78.06 0.1346\n#> 43 2 78.07 0.1226\n#> 44 2 78.07 0.1118\n#> 45 2 78.08 0.1018\n#> 46 2 78.08 0.0928\n#> \n#> ...\n#> and 9 more lines.\n\nIn this output, the term lambda is used to represent the penalty.\nNote that the output shows many values of the penalty despite our specification of penalty = 0.1. It turns out that this model fits a “path” of penalty values. Even though we are interested in a value of 0.1, we can get the model coefficients for many associated values of the penalty from the same model object.\nLet’s look at two different approaches to obtaining the coefficients. Both will use the tidy() method. One will tidy a glmnet object and the other will tidy a tidymodels object.\n\nUsing glmnet penalty values\nThis glmnet fit contains multiple penalty values which depend on the data set; changing the data (or the mixture amount) often produces a different set of values. For this data set, there are 55 penalties available. To get the set of penalties produced for this data set, we can extract the engine fit and tidy:\n\nglmnet_fit %>% \n extract_fit_engine() %>% \n tidy() %>% \n rename(penalty = lambda) %>% # <- for consistent naming\n filter(term != \"(Intercept)\")\n#> # A tibble: 99 × 5\n#> term step estimate penalty dev.ratio\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 Clark_Lake 2 0.0753 5.56 0.127\n#> 2 Clark_Lake 3 0.145 5.07 0.234\n#> 3 Clark_Lake 4 0.208 4.62 0.324\n#> 4 Clark_Lake 5 0.266 4.21 0.400\n#> 5 Clark_Lake 6 0.319 3.83 0.463\n#> 6 Clark_Lake 7 0.368 3.49 0.515\n#> 7 Clark_Lake 8 0.413 3.18 0.559\n#> 8 Clark_Lake 9 0.454 2.90 0.596\n#> 9 Clark_Lake 10 0.491 2.64 0.627\n#> 10 Clark_Lake 11 0.526 2.41 0.653\n#> # ℹ 89 more rows\n\nThis works well but, it turns out that our penalty value (0.1) is not in the list produced by the model! The underlying package has functions that use interpolation to produce coefficients for this specific value, but the tidy() method for glmnet objects does not use it.\n\n\nUsing specific penalty values\nIf we run the tidy() method on the workflow or parsnip object, a different function is used that returns the coefficients for the penalty value that we specified:\n\ntidy(glmnet_fit)\n#> # A tibble: 4 × 3\n#> term estimate penalty\n#> <chr> <dbl> <dbl>\n#> 1 (Intercept) 1.69 0.1\n#> 2 Clark_Lake 0.846 0.1\n#> 3 Austin 0.271 0.1\n#> 4 Harlem 0 0.1\n\nFor any another (single) penalty, we can use an additional argument:\n\ntidy(glmnet_fit, penalty = 5.5620) # A value from above\n#> # A tibble: 4 × 3\n#> term estimate penalty\n#> <chr> <dbl> <dbl>\n#> 1 (Intercept) 12.6 5.56\n#> 2 Clark_Lake 0.0753 5.56\n#> 3 Austin 0 5.56\n#> 4 Harlem 0 5.56\n\nThe reason for having two tidy() methods is that, with tidymodels, the focus is on using a specific penalty value.\n\n\nTuning a glmnet model\nIf we know a priori acceptable values for penalty and mixture, we can use the fit_resamples() function as we did before with linear regression. Otherwise, we can tune those parameters with the tidymodels tune_*() functions.\nLet’s tune our glmnet model over both parameters with this grid:\n\npen_vals <- 10^seq(-3, 0, length.out = 10)\ngrid <- crossing(penalty = pen_vals, mixture = c(0.1, 1.0))\n\nHere is where more glmnet-related complexity comes in: we know that each resample and each value of mixture will probably produce a different set of penalty values contained in the model object. How can we look at the coefficients at the specific penalty values that we are using to tune?\nThe approach that we suggest is to use the special path_values option for glmnet. Details are described in the technical documentation about glmnet and tidymodels but in short, this parameter will assign the collection of penalty values used by each glmnet fit (regardless of the data or value of mixture).\nWe can pass these as an engine argument and then update our previous workflow object:\n\nglmnet_tune_spec <- \n linear_reg(penalty = tune(), mixture = tune()) %>% \n set_engine(\"glmnet\", path_values = pen_vals)\n\nglmnet_wflow <- \n glmnet_wflow %>% \n update_model(glmnet_tune_spec)\n\nNow we will use an extraction function similar to when we used ordinary least squares. We add an additional argument to retain coefficients that are shrunk to zero by the lasso penalty:\n\nget_glmnet_coefs <- function(x) {\n x %>% \n extract_fit_engine() %>% \n tidy(return_zeros = TRUE) %>% \n rename(penalty = lambda)\n}\nparsnip_ctrl <- control_grid(extract = get_glmnet_coefs)\n\nglmnet_res <- \n glmnet_wflow %>% \n tune_grid(\n resamples = bt,\n grid = grid,\n control = parsnip_ctrl\n )\nglmnet_res\n#> # Tuning results\n#> # Bootstrap sampling \n#> # A tibble: 5 × 5\n#> splits id .metrics .notes .extracts\n#> <list> <chr> <list> <list> <list> \n#> 1 <split [5698/2076]> Bootstrap1 <tibble [40 × 6]> <tibble [0 × 3]> <tibble> \n#> 2 <split [5698/2098]> Bootstrap2 <tibble [40 × 6]> <tibble [0 × 3]> <tibble> \n#> 3 <split [5698/2064]> Bootstrap3 <tibble [40 × 6]> <tibble [0 × 3]> <tibble> \n#> 4 <split [5698/2082]> Bootstrap4 <tibble [40 × 6]> <tibble [0 × 3]> <tibble> \n#> 5 <split [5698/2088]> Bootstrap5 <tibble [40 × 6]> <tibble [0 × 3]> <tibble>\n\nAs noted before, the elements of the main .extracts column have an embedded list column with the results of get_glmnet_coefs():\n\nglmnet_res$.extracts[[1]] %>% head()\n#> # A tibble: 6 × 4\n#> penalty mixture .extracts .config \n#> <dbl> <dbl> <list> <chr> \n#> 1 1 0.1 <tibble [40 × 5]> Preprocessor1_Model01\n#> 2 1 0.1 <tibble [40 × 5]> Preprocessor1_Model02\n#> 3 1 0.1 <tibble [40 × 5]> Preprocessor1_Model03\n#> 4 1 0.1 <tibble [40 × 5]> Preprocessor1_Model04\n#> 5 1 0.1 <tibble [40 × 5]> Preprocessor1_Model05\n#> 6 1 0.1 <tibble [40 × 5]> Preprocessor1_Model06\n\nglmnet_res$.extracts[[1]]$.extracts[[1]] %>% head()\n#> # A tibble: 6 × 5\n#> term step estimate penalty dev.ratio\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) 1 0.568 1 0.769\n#> 2 (Intercept) 2 0.432 0.464 0.775\n#> 3 (Intercept) 3 0.607 0.215 0.779\n#> 4 (Intercept) 4 0.846 0.1 0.781\n#> 5 (Intercept) 5 1.06 0.0464 0.782\n#> 6 (Intercept) 6 1.22 0.0215 0.783\n\nAs before, we’ll have to use a double unnest(). Since the penalty value is in both the top-level and lower-level .extracts, we’ll use select() to get rid of the first version (but keep mixture):\n\nglmnet_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n select(id, mixture, .extracts) %>% # <- removes the first penalty column\n unnest(.extracts)\n\nBut wait! We know that each glmnet fit contains all of the coefficients. This means, for a specific resample and value of mixture, the results are the same:\n\nall.equal(\n # First bootstrap, first `mixture`, first `penalty`\n glmnet_res$.extracts[[1]]$.extracts[[1]],\n # First bootstrap, first `mixture`, second `penalty`\n glmnet_res$.extracts[[1]]$.extracts[[2]]\n)\n#> [1] TRUE\n\nFor this reason, we’ll add a slice(1) when grouping by id and mixture. This will get rid of the replicated results.\n\nglmnet_coefs <- \n glmnet_res %>% \n select(id, .extracts) %>% \n unnest(.extracts) %>% \n select(id, mixture, .extracts) %>% \n group_by(id, mixture) %>% # ┐\n slice(1) %>% # │ Remove the redundant results\n ungroup() %>% # ┘\n unnest(.extracts)\n\nglmnet_coefs %>% \n select(id, penalty, mixture, term, estimate) %>% \n filter(term != \"(Intercept)\")\n#> # A tibble: 300 × 5\n#> id penalty mixture term estimate\n#> <chr> <dbl> <dbl> <chr> <dbl>\n#> 1 Bootstrap1 1 0.1 Clark_Lake 0.391\n#> 2 Bootstrap1 0.464 0.1 Clark_Lake 0.485\n#> 3 Bootstrap1 0.215 0.1 Clark_Lake 0.590\n#> 4 Bootstrap1 0.1 0.1 Clark_Lake 0.680\n#> 5 Bootstrap1 0.0464 0.1 Clark_Lake 0.746\n#> 6 Bootstrap1 0.0215 0.1 Clark_Lake 0.793\n#> 7 Bootstrap1 0.01 0.1 Clark_Lake 0.817\n#> 8 Bootstrap1 0.00464 0.1 Clark_Lake 0.828\n#> 9 Bootstrap1 0.00215 0.1 Clark_Lake 0.834\n#> 10 Bootstrap1 0.001 0.1 Clark_Lake 0.837\n#> # ℹ 290 more rows\n\nNow we have the coefficients. Let’s look at how they behave as more regularization is used:\n\nglmnet_coefs %>% \n filter(term != \"(Intercept)\") %>% \n mutate(mixture = format(mixture)) %>% \n ggplot(aes(x = penalty, y = estimate, col = mixture, groups = id)) + \n geom_hline(yintercept = 0, lty = 3) +\n geom_line(alpha = 0.5, lwd = 1.2) + \n facet_wrap(~ term) + \n scale_x_log10() +\n scale_color_brewer(palette = \"Accent\") +\n labs(y = \"coefficient\") +\n theme(legend.position = \"top\")\n\n\n\n\n\n\n\n\nNotice a couple of things:\n\nWith a pure lasso model (i.e., mixture = 1), the Austin station predictor is selected out in each resample. With a mixture of both penalties, its influence increases. Also, as the penalty increases, the uncertainty in this coefficient decreases.\nThe Harlem predictor is either quickly selected out of the model or goes from negative to positive." + }, + { + "objectID": "learn/models/coefficients/index.html#session-info", + "href": "learn/models/coefficients/index.html#session-info", + "title": "Working with model coefficients", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> glmnet * 4.1-7 2023-03-23 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/models/parsnip-nnet/index.html", + "href": "learn/models/parsnip-nnet/index.html", + "title": "Classification models using a neural network", + "section": "", + "text": "To use code in this article, you will need to install the following packages: AppliedPredictiveModeling, brulee, and tidymodels. You will also need the python torch library installed (see ?torch::install_torch()).\nWe can create classification models with the tidymodels package parsnip to predict categorical quantities or class labels. Here, let’s fit a single classification model using a neural network and evaluate using a validation set. While the tune package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage." + }, + { + "objectID": "learn/models/parsnip-nnet/index.html#introduction", + "href": "learn/models/parsnip-nnet/index.html#introduction", + "title": "Classification models using a neural network", + "section": "", + "text": "To use code in this article, you will need to install the following packages: AppliedPredictiveModeling, brulee, and tidymodels. You will also need the python torch library installed (see ?torch::install_torch()).\nWe can create classification models with the tidymodels package parsnip to predict categorical quantities or class labels. Here, let’s fit a single classification model using a neural network and evaluate using a validation set. While the tune package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage." + }, + { + "objectID": "learn/models/parsnip-nnet/index.html#fitting-a-neural-network", + "href": "learn/models/parsnip-nnet/index.html#fitting-a-neural-network", + "title": "Classification models using a neural network", + "section": "Fitting a neural network", + "text": "Fitting a neural network\nLet’s fit a model to a small, two predictor classification data set. The data are in the modeldata package (part of tidymodels) and have been split into training, validation, and test data sets. In this analysis, the test set is left untouched; this article tries to emulate a good data usage methodology where the test set would only be evaluated once at the end after a variety of models have been considered.\n\nlibrary(AppliedPredictiveModeling)\n\nset.seed(321)\ncls_train <- quadBoundaryFunc(2000) %>% select(A = X1, B = X2, class)\ncls_val <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)\ncls_test <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class)\n\nA plot of the data shows two right-skewed predictors:\n\nggplot(cls_train, aes(x = A, y = B, col = class)) + \n geom_point(alpha = 1 / 4, cex = 3) + \n coord_fixed()\n\n\n\n\n\n\n\n\nLet’s use a single hidden layer neural network to predict the outcome. To do this, we transform the predictor columns to be more symmetric (via the step_BoxCox() function) and on a common scale (using step_normalize()). We can use recipes to do so:\n\nbiv_rec <- \n recipe(class ~ ., data = cls_train) %>%\n step_normalize(all_predictors())\n\nThis recipe is not directly executed; the steps will be estimated when the model is fit.\nWe can use the brulee package to fit a model with 5 hidden units and a 10% dropout rate, to regularize the model:\n\nnnet_spec <- \n mlp(epochs = 1000, hidden_units = 10, penalty = 0.01, learn_rate = 0.1) %>% \n set_engine(\"brulee\", validation = 0) %>% \n set_mode(\"classification\")\n\nnnet_wflow <- \n biv_rec %>% \n workflow(nnet_spec)\n\nset.seed(987)\nnnet_fit <- fit(nnet_wflow, cls_train)\nnnet_fit %>% extract_fit_engine()\n#> Multilayer perceptron\n#> \n#> relu activation\n#> 10 hidden units, 52 model parameters\n#> 2,000 samples, 2 features, 2 classes \n#> class weights Class1=1, Class2=1 \n#> weight decay: 0.01 \n#> dropout proportion: 0 \n#> batch size: 2000 \n#> learn rate: 0.1 \n#> training set loss after 1000 epochs: 0.375" + }, + { + "objectID": "learn/models/parsnip-nnet/index.html#model-performance", + "href": "learn/models/parsnip-nnet/index.html#model-performance", + "title": "Classification models using a neural network", + "section": "Model performance", + "text": "Model performance\nIn parsnip, the predict() function can be used to characterize performance on the validation set. Since parsnip always produces tibble outputs, these can just be column bound to the original data:\n\nval_results <- \n cls_val %>%\n bind_cols(\n predict(nnet_fit, new_data = cls_val),\n predict(nnet_fit, new_data = cls_val, type = \"prob\")\n )\nval_results %>% slice(1:5)\n#> A B class .pred_class .pred_Class1 .pred_Class2\n#> 1 0.7632082 -0.04012164 Class2 Class2 0.06255509 0.93744493\n#> 2 0.9823745 -0.16911637 Class2 Class2 0.05721300 0.94278705\n#> 3 1.0558147 0.52817699 Class2 Class2 0.10368267 0.89631736\n#> 4 1.2424507 1.10902951 Class2 Class2 0.34966809 0.65033191\n#> 5 1.5889815 2.71047720 Class1 Class1 0.97951710 0.02048291\n\nval_results %>% roc_auc(truth = class, .pred_Class1)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 roc_auc binary 0.957\n\nval_results %>% accuracy(truth = class, .pred_class)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 accuracy binary 0.91\n\nval_results %>% conf_mat(truth = class, .pred_class)\n#> Truth\n#> Prediction Class1 Class2\n#> Class1 175 18\n#> Class2 27 280\n\nLet’s also create a grid to get a visual sense of the class boundary for the test set.\n\na_rng <- range(cls_train$A)\nb_rng <- range(cls_train$B)\nx_grid <-\n expand.grid(A = seq(a_rng[1], a_rng[2], length.out = 100),\n B = seq(b_rng[1], b_rng[2], length.out = 100))\n\n\n# Make predictions using the transformed predictors but \n# attach them to the predictors in the original units: \nx_grid <- \n x_grid %>% \n bind_cols(predict(nnet_fit, x_grid, type = \"prob\"))\n\nggplot(x_grid, aes(x = A, y = B)) + \n geom_point(data = cls_test, aes(col = class), alpha = 1 / 2, cex = 3) +\n geom_contour(aes(z = .pred_Class1), breaks = .5, col = \"black\", linewidth = 1) + \n coord_fixed()" + }, + { + "objectID": "learn/models/parsnip-nnet/index.html#session-info", + "href": "learn/models/parsnip-nnet/index.html#session-info", + "title": "Classification models using a neural network", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> AppliedPredictiveModeling * 1.1-7 2018-05-22 [1] CRAN (R 4.3.0)\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> brulee 0.2.0 2022-09-19 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/models/parsnip-ranger-glmnet/index.html", + "href": "learn/models/parsnip-ranger-glmnet/index.html", + "title": "Regression models two ways", + "section": "", + "text": "To use code in this article, you will need to install the following packages: glmnet, randomForest, ranger, and tidymodels.\nWe can create regression models with the tidymodels package parsnip to predict continuous or numeric quantities. Here, let’s first fit a random forest model, which does not require all numeric input (see discussion here) and discuss how to use fit() and fit_xy(), as well as data descriptors.\nSecond, let’s fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip." + }, + { + "objectID": "learn/models/parsnip-ranger-glmnet/index.html#introduction", + "href": "learn/models/parsnip-ranger-glmnet/index.html#introduction", + "title": "Regression models two ways", + "section": "", + "text": "To use code in this article, you will need to install the following packages: glmnet, randomForest, ranger, and tidymodels.\nWe can create regression models with the tidymodels package parsnip to predict continuous or numeric quantities. Here, let’s first fit a random forest model, which does not require all numeric input (see discussion here) and discuss how to use fit() and fit_xy(), as well as data descriptors.\nSecond, let’s fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip." + }, + { + "objectID": "learn/models/parsnip-ranger-glmnet/index.html#the-ames-housing-data", + "href": "learn/models/parsnip-ranger-glmnet/index.html#the-ames-housing-data", + "title": "Regression models two ways", + "section": "The Ames housing data", + "text": "The Ames housing data\nWe’ll use the Ames housing data set to demonstrate how to create regression models using parsnip. First, set up the data set and create a simple training/test set split:\n\nlibrary(tidymodels)\n\ndata(ames)\n\nset.seed(4595)\ndata_split <- initial_split(ames, strata = \"Sale_Price\", prop = 0.75)\n\names_train <- training(data_split)\names_test <- testing(data_split)\n\nThe use of the test set here is only for illustration; normally in a data analysis these data would be saved to the very end after many models have been evaluated." + }, + { + "objectID": "learn/models/parsnip-ranger-glmnet/index.html#random-forest", + "href": "learn/models/parsnip-ranger-glmnet/index.html#random-forest", + "title": "Regression models two ways", + "section": "Random forest", + "text": "Random forest\nWe’ll start by fitting a random forest model to a small set of parameters. Let’s create a model with the predictors Longitude, Latitude, Lot_Area, Neighborhood, and Year_Sold. A simple random forest model can be specified via:\n\nrf_defaults <- rand_forest(mode = \"regression\")\nrf_defaults\n#> Random Forest Model Specification (regression)\n#> \n#> Computational engine: ranger\n\nThe model will be fit with the ranger package by default. Since we didn’t add any extra arguments to fit, many of the arguments will be set to their defaults from the function ranger::ranger(). The help pages for the model function describe the default parameters and you can also use the translate() function to check out such details.\nThe parsnip package provides two different interfaces to fit a model:\n\nthe formula interface (fit()), and\nthe non-formula interface (fit_xy()).\n\nLet’s start with the non-formula interface:\n\npreds <- c(\"Longitude\", \"Latitude\", \"Lot_Area\", \"Neighborhood\", \"Year_Sold\")\n\nrf_xy_fit <- \n rf_defaults %>%\n set_engine(\"ranger\") %>%\n fit_xy(\n x = ames_train[, preds],\n y = log10(ames_train$Sale_Price)\n )\n\nrf_xy_fit\n#> parsnip model object\n#> \n#> Ranger result\n#> \n#> Call:\n#> ranger::ranger(x = maybe_data_frame(x), y = y, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) \n#> \n#> Type: Regression \n#> Number of trees: 500 \n#> Sample size: 2197 \n#> Number of independent variables: 5 \n#> Mtry: 2 \n#> Target node size: 5 \n#> Variable importance mode: none \n#> Splitrule: variance \n#> OOB prediction error (MSE): 0.008500188 \n#> R squared (OOB): 0.7239116\n\nThe non-formula interface doesn’t do anything to the predictors before passing them to the underlying model function. This particular model does not require indicator variables (sometimes called “dummy variables”) to be created prior to fitting the model. Note that the output shows “Number of independent variables: 5”.\nFor regression models, we can use the basic predict() method, which returns a tibble with a column named .pred:\n\ntest_results <- \n ames_test %>%\n select(Sale_Price) %>%\n mutate(Sale_Price = log10(Sale_Price)) %>%\n bind_cols(\n predict(rf_xy_fit, new_data = ames_test[, preds])\n )\ntest_results %>% slice(1:5)\n#> # A tibble: 5 × 2\n#> Sale_Price .pred\n#> <dbl> <dbl>\n#> 1 5.39 5.25\n#> 2 5.28 5.29\n#> 3 5.23 5.26\n#> 4 5.21 5.30\n#> 5 5.60 5.51\n\n# summarize performance\ntest_results %>% metrics(truth = Sale_Price, estimate = .pred) \n#> # A tibble: 3 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 rmse standard 0.0945\n#> 2 rsq standard 0.733 \n#> 3 mae standard 0.0629\n\nNote that:\n\nIf the model required indicator variables, we would have to create them manually prior to using fit() (perhaps using the recipes package).\nWe had to manually log the outcome prior to modeling.\n\nNow, for illustration, let’s use the formula method using some new parameter values:\n\nrand_forest(mode = \"regression\", mtry = 3, trees = 1000) %>%\n set_engine(\"ranger\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n#> parsnip model object\n#> \n#> Ranger result\n#> \n#> Call:\n#> ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~3, x), num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) \n#> \n#> Type: Regression \n#> Number of trees: 1000 \n#> Sample size: 2197 \n#> Number of independent variables: 5 \n#> Mtry: 3 \n#> Target node size: 5 \n#> Variable importance mode: none \n#> Splitrule: variance \n#> OOB prediction error (MSE): 0.008402569 \n#> R squared (OOB): 0.7270823\n\nSuppose that we would like to use the randomForest package instead of ranger. To do so, the only part of the syntax that needs to change is the set_engine() argument:\n\nrand_forest(mode = \"regression\", mtry = 3, trees = 1000) %>%\n set_engine(\"randomForest\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n#> parsnip model object\n#> \n#> \n#> Call:\n#> randomForest(x = maybe_data_frame(x), y = y, ntree = ~1000, mtry = min_cols(~3, x)) \n#> Type of random forest: regression\n#> Number of trees: 1000\n#> No. of variables tried at each split: 3\n#> \n#> Mean of squared residuals: 0.008472074\n#> % Var explained: 72.47\n\nLook at the formula code that was printed out; one function uses the argument name ntree and the other uses num.trees. The parsnip models don’t require you to know the specific names of the main arguments.\nNow suppose that we want to modify the value of mtry based on the number of predictors in the data. Usually, a good default value is floor(sqrt(num_predictors)) but a pure bagging model requires an mtry value equal to the total number of parameters. There may be cases where you may not know how many predictors are going to be present when the model will be fit (perhaps due to the generation of indicator variables or a variable filter) so this might be difficult to know exactly ahead of time when you write your code.\nWhen the model it being fit by parsnip, data descriptors are made available. These attempt to let you know what you will have available when the model is fit. When a model object is created (say using rand_forest()), the values of the arguments that you give it are immediately evaluated unless you delay them. To delay the evaluation of any argument, you can used rlang::expr() to make an expression.\nTwo relevant data descriptors for our example model are:\n\n.preds(): the number of predictor variables in the data set that are associated with the predictors prior to dummy variable creation.\n.cols(): the number of predictor columns after dummy variables (or other encodings) are created.\n\nSince ranger won’t create indicator values, .preds() would be appropriate for mtry for a bagging model.\nFor example, let’s use an expression with the .preds() descriptor to fit a bagging model:\n\nrand_forest(mode = \"regression\", mtry = .preds(), trees = 1000) %>%\n set_engine(\"ranger\") %>%\n fit(\n log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold,\n data = ames_train\n )\n#> parsnip model object\n#> \n#> Ranger result\n#> \n#> Call:\n#> ranger::ranger(x = maybe_data_frame(x), y = y, mtry = min_cols(~.preds(), x), num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1)) \n#> \n#> Type: Regression \n#> Number of trees: 1000 \n#> Sample size: 2197 \n#> Number of independent variables: 5 \n#> Mtry: 5 \n#> Target node size: 5 \n#> Variable importance mode: none \n#> Splitrule: variance \n#> OOB prediction error (MSE): 0.00867085 \n#> R squared (OOB): 0.7183685" + }, + { + "objectID": "learn/models/parsnip-ranger-glmnet/index.html#regularized-regression", + "href": "learn/models/parsnip-ranger-glmnet/index.html#regularized-regression", + "title": "Regression models two ways", + "section": "Regularized regression", + "text": "Regularized regression\nA linear model might work for this data set as well. We can use the linear_reg() parsnip model. There are two engines that can perform regularization/penalization, the glmnet and sparklyr packages. Let’s use the former here. The glmnet package only implements a non-formula method, but parsnip will allow either one to be used.\nWhen regularization is used, the predictors should first be centered and scaled before being passed to the model. The formula method won’t do that automatically so we will need to do this ourselves. We’ll use the recipes package for these steps.\n\nnorm_recipe <- \n recipe(\n Sale_Price ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, \n data = ames_train\n ) %>%\n step_other(Neighborhood) %>% \n step_dummy(all_nominal()) %>%\n step_center(all_predictors()) %>%\n step_scale(all_predictors()) %>%\n step_log(Sale_Price, base = 10) %>% \n # estimate the means and standard deviations\n prep(training = ames_train, retain = TRUE)\n\n# Now let's fit the model using the processed version of the data\n\nglmn_fit <- \n linear_reg(penalty = 0.001, mixture = 0.5) %>% \n set_engine(\"glmnet\") %>%\n fit(Sale_Price ~ ., data = bake(norm_recipe, new_data = NULL))\nglmn_fit\n#> parsnip model object\n#> \n#> \n#> Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = \"gaussian\", alpha = ~0.5) \n#> \n#> Df %Dev Lambda\n#> 1 0 0.00 0.138300\n#> 2 1 1.96 0.126000\n#> 3 1 3.72 0.114800\n#> 4 1 5.28 0.104600\n#> 5 2 7.07 0.095320\n#> 6 3 9.64 0.086850\n#> 7 4 12.58 0.079140\n#> 8 5 15.45 0.072110\n#> 9 5 17.93 0.065700\n#> 10 7 20.81 0.059860\n#> 11 7 23.51 0.054550\n#> 12 7 25.82 0.049700\n#> 13 8 28.20 0.045290\n#> 14 8 30.31 0.041260\n#> 15 8 32.12 0.037600\n#> 16 8 33.66 0.034260\n#> 17 8 34.97 0.031210\n#> 18 8 36.08 0.028440\n#> 19 8 37.02 0.025910\n#> 20 9 37.90 0.023610\n#> 21 9 38.65 0.021510\n#> 22 9 39.29 0.019600\n#> 23 9 39.83 0.017860\n#> 24 9 40.28 0.016270\n#> 25 10 40.68 0.014830\n#> 26 11 41.06 0.013510\n#> 27 11 41.38 0.012310\n#> 28 11 41.65 0.011220\n#> 29 11 41.88 0.010220\n#> 30 12 42.09 0.009313\n#> 31 12 42.27 0.008486\n#> 32 12 42.43 0.007732\n#> 33 12 42.56 0.007045\n#> 34 12 42.66 0.006419\n#> 35 12 42.75 0.005849\n#> 36 12 42.83 0.005329\n#> 37 12 42.90 0.004856\n#> 38 12 42.95 0.004424\n#> 39 12 42.99 0.004031\n#> 40 12 43.03 0.003673\n#> 41 12 43.06 0.003347\n#> 42 12 43.09 0.003050\n#> 43 12 43.11 0.002779\n#> 44 12 43.13 0.002532\n#> 45 12 43.15 0.002307\n#> 46 12 43.16 0.002102\n#> 47 12 43.17 0.001915\n#> 48 12 43.18 0.001745\n#> 49 12 43.19 0.001590\n#> 50 12 43.19 0.001449\n#> 51 12 43.20 0.001320\n#> 52 12 43.20 0.001203\n#> 53 12 43.21 0.001096\n#> 54 12 43.21 0.000999\n#> 55 12 43.21 0.000910\n#> 56 12 43.21 0.000829\n#> 57 12 43.22 0.000755\n#> 58 12 43.22 0.000688\n#> 59 12 43.22 0.000627\n#> 60 12 43.22 0.000571\n#> 61 12 43.22 0.000521\n#> 62 12 43.22 0.000474\n#> 63 12 43.22 0.000432\n#> 64 12 43.22 0.000394\n#> 65 12 43.22 0.000359\n\nIf penalty were not specified, all of the lambda values would be computed.\nTo get the predictions for this specific value of lambda (aka penalty):\n\n# First, get the processed version of the test set predictors:\ntest_normalized <- bake(norm_recipe, new_data = ames_test, all_predictors())\n\ntest_results <- \n test_results %>%\n rename(`random forest` = .pred) %>%\n bind_cols(\n predict(glmn_fit, new_data = test_normalized) %>%\n rename(glmnet = .pred)\n )\ntest_results\n#> # A tibble: 733 × 3\n#> Sale_Price `random forest` glmnet\n#> <dbl> <dbl> <dbl>\n#> 1 5.39 5.25 5.16\n#> 2 5.28 5.29 5.27\n#> 3 5.23 5.26 5.24\n#> 4 5.21 5.30 5.24\n#> 5 5.60 5.51 5.24\n#> 6 5.32 5.29 5.26\n#> 7 5.17 5.14 5.18\n#> 8 5.06 5.13 5.17\n#> 9 4.98 5.01 5.18\n#> 10 5.11 5.14 5.19\n#> # ℹ 723 more rows\n\ntest_results %>% metrics(truth = Sale_Price, estimate = glmnet) \n#> # A tibble: 3 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 rmse standard 0.142 \n#> 2 rsq standard 0.391 \n#> 3 mae standard 0.0979\n\ntest_results %>% \n gather(model, prediction, -Sale_Price) %>% \n ggplot(aes(x = prediction, y = Sale_Price)) + \n geom_abline(col = \"green\", lty = 2) + \n geom_point(alpha = .4) + \n facet_wrap(~model) + \n coord_fixed()\n\n\n\n\n\n\n\n\nThis final plot compares the performance of the random forest and regularized regression models." + }, + { + "objectID": "learn/models/parsnip-ranger-glmnet/index.html#session-info", + "href": "learn/models/parsnip-ranger-glmnet/index.html#session-info", + "title": "Regression models two ways", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> glmnet * 4.1-7 2023-03-23 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> randomForest * 4.7-1.1 2022-05-23 [1] CRAN (R 4.3.0)\n#> ranger * 0.15.1 2023-04-03 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/models/pls/index.html", + "href": "learn/models/pls/index.html", + "title": "Multivariate analysis using partial least squares", + "section": "", + "text": "To use code in this article, you will need to install the following packages: modeldata, pls, and tidymodels.\n“Multivariate analysis” usually refers to multiple outcomes being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns y1 and y2 representing two outcomes to be predicted. The lm() function would look something like:\n\nlm(cbind(y1, y2) ~ ., data = dat)\n\nThis cbind() call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes.\nThe data that we’ll use has three outcomes. From ?modeldata::meats:\n\n“These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents.\n\n\n“For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is -log10 of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry.”\n\nThe goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here.\nTo start, let’s take the two data matrices (called endpoints and absorp) and bind them together in a data frame:\n\nlibrary(modeldata)\ndata(meats)\n\nThe three outcomes have fairly high correlations also." + }, + { + "objectID": "learn/models/pls/index.html#introduction", + "href": "learn/models/pls/index.html#introduction", + "title": "Multivariate analysis using partial least squares", + "section": "", + "text": "To use code in this article, you will need to install the following packages: modeldata, pls, and tidymodels.\n“Multivariate analysis” usually refers to multiple outcomes being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns y1 and y2 representing two outcomes to be predicted. The lm() function would look something like:\n\nlm(cbind(y1, y2) ~ ., data = dat)\n\nThis cbind() call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes.\nThe data that we’ll use has three outcomes. From ?modeldata::meats:\n\n“These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents.\n\n\n“For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is -log10 of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry.”\n\nThe goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here.\nTo start, let’s take the two data matrices (called endpoints and absorp) and bind them together in a data frame:\n\nlibrary(modeldata)\ndata(meats)\n\nThe three outcomes have fairly high correlations also." + }, + { + "objectID": "learn/models/pls/index.html#preprocessing-the-data", + "href": "learn/models/pls/index.html#preprocessing-the-data", + "title": "Multivariate analysis using partial least squares", + "section": "Preprocessing the data", + "text": "Preprocessing the data\nIf the outcomes can be predicted using a linear model, partial least squares (PLS) is an ideal method. PLS models the data as a function of a set of unobserved latent variables that are derived in a manner similar to principal component analysis (PCA).\nPLS, unlike PCA, also incorporates the outcome data when creating the PLS components. Like PCA, it tries to maximize the variance of the predictors that are explained by the components but it also tries to simultaneously maximize the correlation between those components and the outcomes. In this way, PLS chases variation of the predictors and outcomes.\nSince we are working with variances and covariances, we need to standardize the data. The recipe will center and scale all of the variables.\nMany base R functions that deal with multivariate outcomes using a formula require the use of cbind() on the left-hand side of the formula to work with the traditional formula methods. In tidymodels, recipes do not; the outcomes can be symbolically “added” together on the left-hand side:\n\nnorm_rec <- \n recipe(water + fat + protein ~ ., data = meats) %>%\n step_normalize(everything()) \n\nBefore we can finalize the PLS model, the number of PLS components to retain must be determined. This can be done using performance metrics such as the root mean squared error. However, we can also calculate the proportion of variance explained by the components for the predictors and each of the outcomes. This allows an informed choice to be made based on the level of evidence that the situation requires.\nSince the data set isn’t large, let’s use resampling to measure these proportions. With ten repeats of 10-fold cross-validation, we build the PLS model on 90% of the data and evaluate on the heldout 10%. For each of the 100 models, we extract and save the proportions.\nThe folds can be created using the rsample package and the recipe can be estimated for each resample using the prepper() function:\n\nset.seed(57343)\nfolds <- vfold_cv(meats, repeats = 10)\n\nfolds <- \n folds %>%\n mutate(recipes = map(splits, prepper, recipe = norm_rec))" + }, + { + "objectID": "learn/models/pls/index.html#partial-least-squares", + "href": "learn/models/pls/index.html#partial-least-squares", + "title": "Multivariate analysis using partial least squares", + "section": "Partial least squares", + "text": "Partial least squares\nThe complicated parts for moving forward are:\n\nFormatting the predictors and outcomes into the format that the pls package requires, and\nEstimating the proportions.\n\nFor the first part, the standardized outcomes and predictors need to be formatted into two separate matrices. Since we used retain = TRUE when prepping the recipes, we can bake() with new_data = NULl to get the processed data back out. To save the data as a matrix, the option composition = \"matrix\" will avoid saving the data as tibbles and use the required format.\nThe pls package expects a simple formula to specify the model, but each side of the formula should represent a matrix. In other words, we need a data set with two columns where each column is a matrix. The secret to doing this is to “protect” the two matrices using I() when adding them to the data frame.\nThe calculation for the proportion of variance explained is straightforward for the predictors; the function pls::explvar() will compute that. For the outcomes, the process is more complicated. A ready-made function to compute these is not obvious but there is some code inside of the summary function to do the computation (see below).\nThe function get_var_explained() shown here will do all these computations and return a data frame with columns components, source (for the predictors, water, etc), and the proportion of variance that is explained by the components.\n\nlibrary(pls)\n\nget_var_explained <- function(recipe, ...) {\n \n # Extract the predictors and outcomes into their own matrices\n y_mat <- bake(recipe, new_data = NULL, composition = \"matrix\", all_outcomes())\n x_mat <- bake(recipe, new_data = NULL, composition = \"matrix\", all_predictors())\n \n # The pls package prefers the data in a data frame where the outcome\n # and predictors are in _matrices_. To make sure this is formatted\n # properly, use the `I()` function to inhibit `data.frame()` from making\n # all the individual columns. `pls_format` should have two columns.\n pls_format <- data.frame(\n endpoints = I(y_mat),\n measurements = I(x_mat)\n )\n # Fit the model\n mod <- plsr(endpoints ~ measurements, data = pls_format)\n \n # Get the proportion of the predictor variance that is explained\n # by the model for different number of components. \n xve <- explvar(mod)/100 \n\n # To do the same for the outcome, it is more complex. This code \n # was extracted from pls:::summary.mvr. \n explained <- \n drop(pls::R2(mod, estimate = \"train\", intercept = FALSE)$val) %>% \n # transpose so that components are in rows\n t() %>% \n as_tibble() %>%\n # Add the predictor proportions\n mutate(predictors = cumsum(xve) %>% as.vector(),\n components = seq_along(xve)) %>%\n # Put into a tidy format that is tall\n pivot_longer(\n cols = c(-components),\n names_to = \"source\",\n values_to = \"proportion\"\n )\n}\n\nWe compute this data frame for each resample and save the results in the different columns.\n\nfolds <- \n folds %>%\n mutate(var = map(recipes, get_var_explained),\n var = unname(var))\n\nTo extract and aggregate these data, simple row binding can be used to stack the data vertically. Most of the action happens in the first 15 components so let’s filter the data and compute the average proportion.\n\nvariance_data <- \n bind_rows(folds[[\"var\"]]) %>%\n filter(components <= 15) %>%\n group_by(components, source) %>%\n summarize(proportion = mean(proportion))\n#> `summarise()` has grouped output by 'components'. You can override\n#> using the `.groups` argument.\n\nThe plot below shows that, if the protein measurement is important, you might require 10 or so components to achieve a good representation of that outcome. Note that the predictor variance is captured extremely well using a single component. This is due to the high degree of correlation in those data.\n\nggplot(variance_data, aes(x = components, y = proportion, col = source)) + \n geom_line(alpha = 0.5, size = 1.2) + \n geom_point() \n#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.\n#> ℹ Please use `linewidth` instead." + }, + { + "objectID": "learn/models/pls/index.html#session-info", + "href": "learn/models/pls/index.html#session-info", + "title": "Multivariate analysis using partial least squares", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> pls * 2.8-1 2022-07-16 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/models/sub-sampling/index.html", + "href": "learn/models/sub-sampling/index.html", + "title": "Subsampling for class imbalances", + "section": "", + "text": "To use code in this article, you will need to install the following packages: discrim, klaR, readr, ROSE, themis, and tidymodels.\nSubsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance.\nThis article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in Kuhn and Johnson (2019) for more information on these metrics." + }, + { + "objectID": "learn/models/sub-sampling/index.html#introduction", + "href": "learn/models/sub-sampling/index.html#introduction", + "title": "Subsampling for class imbalances", + "section": "", + "text": "To use code in this article, you will need to install the following packages: discrim, klaR, readr, ROSE, themis, and tidymodels.\nSubsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance.\nThis article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in Kuhn and Johnson (2019) for more information on these metrics." + }, + { + "objectID": "learn/models/sub-sampling/index.html#simulated-data", + "href": "learn/models/sub-sampling/index.html#simulated-data", + "title": "Subsampling for class imbalances", + "section": "Simulated data", + "text": "Simulated data\nConsider a two-class problem where the first class has a very low rate of occurrence. The data were simulated and can be imported into R using the code below:\n\nimbal_data <- \n readr::read_csv(\"https://bit.ly/imbal_data\") %>% \n mutate(Class = factor(Class))\ndim(imbal_data)\n#> [1] 1200 16\ntable(imbal_data$Class)\n#> \n#> Class1 Class2 \n#> 60 1140\n\nIf “Class1” is the event of interest, it is very likely that a classification model would be able to achieve very good specificity since almost all of the data are of the second class. Sensitivity, however, would likely be poor since the models will optimize accuracy (or other loss functions) by predicting everything to be the majority class.\nOne result of class imbalance when there are two classes is that the default probability cutoff of 50% is inappropriate; a different cutoff that is more extreme might be able to achieve good performance." + }, + { + "objectID": "learn/models/sub-sampling/index.html#subsampling-the-data", + "href": "learn/models/sub-sampling/index.html#subsampling-the-data", + "title": "Subsampling for class imbalances", + "section": "Subsampling the data", + "text": "Subsampling the data\nOne way to alleviate this issue is to subsample the data. There are a number of ways to do this but the most simple one is to sample down (undersample) the majority class data until it occurs with the same frequency as the minority class. While it may seem counterintuitive, throwing out a large percentage of your data can be effective at producing a useful model that can recognize both the majority and minority classes. In some cases, this even means that the overall performance of the model is better (e.g. improved area under the ROC curve). However, subsampling almost always produces models that are better calibrated, meaning that the distributions of the class probabilities are more well behaved. As a result, the default 50% cutoff is much more likely to produce better sensitivity and specificity values than they would otherwise.\nLet’s explore subsampling using themis::step_rose() in a recipe for the simulated data. It uses the ROSE (random over sampling examples) method from Menardi, G. and Torelli, N. (2014). This is an example of an oversampling strategy, rather than undersampling.\nIn terms of workflow:\n\nIt is extremely important that subsampling occurs inside of resampling. Otherwise, the resampling process can produce poor estimates of model performance.\nThe subsampling process should only be applied to the analysis set. The assessment set should reflect the event rates seen “in the wild” and, for this reason, the skip argument to step_downsample() and other subsampling recipes steps has a default of TRUE.\n\nHere is a simple recipe implementing oversampling:\n\nlibrary(tidymodels)\nlibrary(themis)\nimbal_rec <- \n recipe(Class ~ ., data = imbal_data) %>%\n step_rose(Class)\n\nFor a model, let’s use a quadratic discriminant analysis (QDA) model. From the discrim package, this model can be specified using:\n\nlibrary(discrim)\nqda_mod <- \n discrim_regularized(frac_common_cov = 0, frac_identity = 0) %>% \n set_engine(\"klaR\")\n\nTo keep these objects bound together, they can be combined in a workflow:\n\nqda_rose_wflw <- \n workflow() %>% \n add_model(qda_mod) %>% \n add_recipe(imbal_rec)\nqda_rose_wflw\n#> ══ Workflow ══════════════════════════════════════════════════════════\n#> Preprocessor: Recipe\n#> Model: discrim_regularized()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> 1 Recipe Step\n#> \n#> • step_rose()\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> Regularized Discriminant Model Specification (classification)\n#> \n#> Main Arguments:\n#> frac_common_cov = 0\n#> frac_identity = 0\n#> \n#> Computational engine: klaR" + }, + { + "objectID": "learn/models/sub-sampling/index.html#model-performance", + "href": "learn/models/sub-sampling/index.html#model-performance", + "title": "Subsampling for class imbalances", + "section": "Model performance", + "text": "Model performance\nStratified, repeated 10-fold cross-validation is used to resample the model:\n\nset.seed(5732)\ncv_folds <- vfold_cv(imbal_data, strata = \"Class\", repeats = 5)\n\nTo measure model performance, let’s use two metrics:\n\nThe area under the ROC curve is an overall assessment of performance across all cutoffs. Values near one indicate very good results while values near 0.5 would imply that the model is very poor.\nThe J index (a.k.a. Youden’s J statistic) is sensitivity + specificity - 1. Values near one are once again best.\n\nIf a model is poorly calibrated, the ROC curve value might not show diminished performance. However, the J index would be lower for models with pathological distributions for the class probabilities. The yardstick package will be used to compute these metrics.\n\ncls_metrics <- metric_set(roc_auc, j_index)\n\nNow, we train the models and generate the results using tune::fit_resamples():\n\nset.seed(2180)\nqda_rose_res <- fit_resamples(\n qda_rose_wflw, \n resamples = cv_folds, \n metrics = cls_metrics\n)\n\ncollect_metrics(qda_rose_res)\n#> # A tibble: 2 × 6\n#> .metric .estimator mean n std_err .config \n#> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 j_index binary 0.749 50 0.0234 Preprocessor1_Model1\n#> 2 roc_auc binary 0.949 50 0.00510 Preprocessor1_Model1\n\nWhat do the results look like without using ROSE? We can create another workflow and fit the QDA model along the same resamples:\n\nqda_wflw <- \n workflow() %>% \n add_model(qda_mod) %>% \n add_formula(Class ~ .)\n\nset.seed(2180)\nqda_only_res <- fit_resamples(qda_wflw, resamples = cv_folds, metrics = cls_metrics)\ncollect_metrics(qda_only_res)\n#> # A tibble: 2 × 6\n#> .metric .estimator mean n std_err .config \n#> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 j_index binary 0.250 50 0.0288 Preprocessor1_Model1\n#> 2 roc_auc binary 0.953 50 0.00479 Preprocessor1_Model1\n\nIt looks like ROSE helped a lot, especially with the J-index. Class imbalance sampling methods tend to greatly improve metrics based on the hard class predictions (i.e., the categorical predictions) because the default cutoff tends to be a better balance of sensitivity and specificity.\nLet’s plot the metrics for each resample to see how the individual results changed.\n\nno_sampling <- \n qda_only_res %>% \n collect_metrics(summarize = FALSE) %>% \n dplyr::select(-.estimator) %>% \n mutate(sampling = \"no_sampling\")\n\nwith_sampling <- \n qda_rose_res %>% \n collect_metrics(summarize = FALSE) %>% \n dplyr::select(-.estimator) %>% \n mutate(sampling = \"rose\")\n\nbind_rows(no_sampling, with_sampling) %>% \n mutate(label = paste(id2, id)) %>% \n ggplot(aes(x = sampling, y = .estimate, group = label)) + \n geom_line(alpha = .4) + \n facet_wrap(~ .metric, scales = \"free_y\")\n\n\n\n\n\n\n\n\nThis visually demonstrates that the subsampling mostly affects metrics that use the hard class predictions." + }, + { + "objectID": "learn/models/sub-sampling/index.html#session-info", + "href": "learn/models/sub-sampling/index.html#session-info", + "title": "Subsampling for class imbalances", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> discrim * 1.0.1 2023-03-08 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> klaR * 1.7-2 2023-03-17 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> ROSE * 0.0-4 2021-06-14 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> themis * 1.0.1 2023-04-14 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/models/time-series/index.html", + "href": "learn/models/time-series/index.html", + "title": "Modeling time series with tidy resampling", + "section": "", + "text": "To use code in this article, you will need to install the following packages: forecast, sweep, tidymodels, timetk, and zoo.\n“Demo Week: Tidy Forecasting with sweep” is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using rolling forecast origin resampling." + }, + { + "objectID": "learn/models/time-series/index.html#introduction", + "href": "learn/models/time-series/index.html#introduction", + "title": "Modeling time series with tidy resampling", + "section": "", + "text": "To use code in this article, you will need to install the following packages: forecast, sweep, tidymodels, timetk, and zoo.\n“Demo Week: Tidy Forecasting with sweep” is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using rolling forecast origin resampling." + }, + { + "objectID": "learn/models/time-series/index.html#example-data", + "href": "learn/models/time-series/index.html#example-data", + "title": "Modeling time series with tidy resampling", + "section": "Example data", + "text": "Example data\nThe data for this article are sales of alcoholic beverages originally from the Federal Reserve Bank of St. Louis website.\n\nlibrary(tidymodels)\nlibrary(modeldata)\ndata(\"drinks\")\nglimpse(drinks)\n#> Rows: 309\n#> Columns: 2\n#> $ date <date> 1992-01-01, 1992-02-01, 1992-03-01, 1992-04-01, 1992-0…\n#> $ S4248SM144NCEN <dbl> 3459, 3458, 4002, 4564, 4221, 4529, 4466, 4137, 4126, 4…\n\nEach row represents one month of sales (in millions of US dollars)." + }, + { + "objectID": "learn/models/time-series/index.html#time-series-resampling", + "href": "learn/models/time-series/index.html#time-series-resampling", + "title": "Modeling time series with tidy resampling", + "section": "Time series resampling", + "text": "Time series resampling\nSuppose that we need predictions for one year ahead and our model should use the most recent data from the last 20 years. To set up this resampling scheme:\n\nroll_rs <- rolling_origin(\n drinks, \n initial = 12 * 20, \n assess = 12,\n cumulative = FALSE\n )\n\nnrow(roll_rs)\n#> [1] 58\n\nroll_rs\n#> # Rolling origin forecast resampling \n#> # A tibble: 58 × 2\n#> splits id \n#> <list> <chr> \n#> 1 <split [240/12]> Slice01\n#> 2 <split [240/12]> Slice02\n#> 3 <split [240/12]> Slice03\n#> 4 <split [240/12]> Slice04\n#> 5 <split [240/12]> Slice05\n#> 6 <split [240/12]> Slice06\n#> 7 <split [240/12]> Slice07\n#> 8 <split [240/12]> Slice08\n#> 9 <split [240/12]> Slice09\n#> 10 <split [240/12]> Slice10\n#> # ℹ 48 more rows\n\nEach split element contains the information about that resample:\n\nroll_rs$splits[[1]]\n#> <Analysis/Assess/Total>\n#> <240/12/309>\n\nFor plotting, let’s index each split by the first day of the assessment set:\n\nget_date <- function(x) {\n min(assessment(x)$date)\n}\n\nstart_date <- map(roll_rs$splits, get_date)\nroll_rs$start_date <- do.call(\"c\", start_date)\nhead(roll_rs$start_date)\n#> [1] \"2012-01-01\" \"2012-02-01\" \"2012-03-01\" \"2012-04-01\" \"2012-05-01\"\n#> [6] \"2012-06-01\"\n\nThis resampling scheme has 58 splits of the data so that there will be 58 ARIMA models that are fit. To create the models, we use the auto.arima() function from the forecast package. The rsample functions analysis() and assessment() return a data frame, so another step converts the data to a ts object called mod_dat using a function in the timetk package.\n\nlibrary(forecast) # for `auto.arima`\nlibrary(timetk) # for `tk_ts`\nlibrary(zoo) # for `as.yearmon`\n\nfit_model <- function(x, ...) {\n # suggested by Matt Dancho:\n x %>%\n analysis() %>%\n # Since the first day changes over resamples, adjust it\n # based on the first date value in the data frame \n tk_ts(start = .$date[[1]] %>% as.yearmon(), \n frequency = 12, \n silent = TRUE) %>%\n auto.arima(...)\n}\n\nSave each model in a new column:\n\nroll_rs$arima <- map(roll_rs$splits, fit_model)\n\n# For example:\nroll_rs$arima[[1]]\n#> Series: . \n#> ARIMA(4,1,1)(0,1,2)[12] \n#> \n#> Coefficients:\n#> ar1 ar2 ar3 ar4 ma1 sma1 sma2\n#> -0.1852 -0.0238 0.3577 -0.1517 -0.8311 -0.193 -0.3244\n#> s.e. 0.1466 0.1656 0.1440 0.0809 0.1377 0.067 0.0640\n#> \n#> sigma^2 = 72198: log likelihood = -1591.15\n#> AIC=3198.3 AICc=3198.97 BIC=3225.7\n\n(There are some warnings produced by these regarding extra columns in the data that can be ignored.)" + }, + { + "objectID": "learn/models/time-series/index.html#model-performance", + "href": "learn/models/time-series/index.html#model-performance", + "title": "Modeling time series with tidy resampling", + "section": "Model performance", + "text": "Model performance\nUsing the model fits, let’s measure performance in two ways:\n\nInterpolation error will measure how well the model fits to the data that were used to create the model. This is most likely optimistic since no holdout method is used.\nExtrapolation or forecast error evaluates the performance of the model on the data from the following year (that were not used in the model fit).\n\nIn each case, the mean absolute percent error (MAPE) is the statistic used to characterize the model fits. The interpolation error can be computed from the Arima object. To make things easy, let’s use the sweep package’s sw_glance() function:\n\nlibrary(sweep)\n\nroll_rs$interpolation <- map_dbl(\n roll_rs$arima,\n function(x) \n sw_glance(x)[[\"MAPE\"]]\n )\n\nsummary(roll_rs$interpolation)\n#> Min. 1st Qu. Median Mean 3rd Qu. Max. \n#> 2.841 2.921 2.950 2.947 2.969 3.135\n\nFor the extrapolation error, the model and split objects are required. Using these:\n\nget_extrap <- function(split, mod) {\n n <- nrow(assessment(split))\n # Get assessment data\n pred_dat <- assessment(split) %>%\n mutate(\n pred = as.vector(forecast(mod, h = n)$mean),\n pct_error = ( S4248SM144NCEN - pred ) / S4248SM144NCEN * 100\n )\n mean(abs(pred_dat$pct_error))\n}\n\nroll_rs$extrapolation <- \n map2_dbl(roll_rs$splits, roll_rs$arima, get_extrap)\n\nsummary(roll_rs$extrapolation)\n#> Min. 1st Qu. Median Mean 3rd Qu. Max. \n#> 2.371 3.231 3.629 3.654 4.113 5.453\n\nWhat do these error estimates look like over time?\n\nroll_rs %>%\n select(interpolation, extrapolation, start_date) %>%\n pivot_longer(cols = matches(\"ation\"), names_to = \"error\", values_to = \"MAPE\") %>%\n ggplot(aes(x = start_date, y = MAPE, col = error)) + \n geom_point() + \n geom_line()\n\n\n\n\n\n\n\n\nIt is likely that the interpolation error is an underestimate to some degree, as mentioned above.\nIt is also worth noting that rolling_origin() can be used over calendar periods, rather than just over a fixed window size. This is especially useful for irregular series where a fixed window size might not make sense because of missing data points, or because of calendar features like different months having a different number of days.\nThe example below demonstrates this idea by splitting drinks into a nested set of 26 years, and rolling over years rather than months. Note that the end result accomplishes a different task than the original example; in this new case, each slice moves forward an entire year, rather than just one month.\n\n# The idea is to nest by the period to roll over,\n# which in this case is the year.\nroll_rs_annual <- drinks %>%\n mutate(year = as.POSIXlt(date)$year + 1900) %>%\n nest(data = c(date, S4248SM144NCEN)) %>%\n rolling_origin(\n initial = 20, \n assess = 1, \n cumulative = FALSE\n )\n\nanalysis(roll_rs_annual$splits[[1]])\n#> # A tibble: 20 × 2\n#> year data \n#> <dbl> <list> \n#> 1 1992 <tibble [12 × 2]>\n#> 2 1993 <tibble [12 × 2]>\n#> 3 1994 <tibble [12 × 2]>\n#> 4 1995 <tibble [12 × 2]>\n#> 5 1996 <tibble [12 × 2]>\n#> 6 1997 <tibble [12 × 2]>\n#> 7 1998 <tibble [12 × 2]>\n#> 8 1999 <tibble [12 × 2]>\n#> 9 2000 <tibble [12 × 2]>\n#> 10 2001 <tibble [12 × 2]>\n#> 11 2002 <tibble [12 × 2]>\n#> 12 2003 <tibble [12 × 2]>\n#> 13 2004 <tibble [12 × 2]>\n#> 14 2005 <tibble [12 × 2]>\n#> 15 2006 <tibble [12 × 2]>\n#> 16 2007 <tibble [12 × 2]>\n#> 17 2008 <tibble [12 × 2]>\n#> 18 2009 <tibble [12 × 2]>\n#> 19 2010 <tibble [12 × 2]>\n#> 20 2011 <tibble [12 × 2]>\n\nThe workflow to access these calendar slices is to use bind_rows() to join each analysis set together.\n\nmutate(\n roll_rs_annual,\n extracted_slice = map(splits, ~ bind_rows(analysis(.x)$data))\n)\n#> # Rolling origin forecast resampling \n#> # A tibble: 6 × 3\n#> splits id extracted_slice \n#> <list> <chr> <list> \n#> 1 <split [20/1]> Slice1 <tibble [240 × 2]>\n#> 2 <split [20/1]> Slice2 <tibble [240 × 2]>\n#> 3 <split [20/1]> Slice3 <tibble [240 × 2]>\n#> 4 <split [20/1]> Slice4 <tibble [240 × 2]>\n#> 5 <split [20/1]> Slice5 <tibble [240 × 2]>\n#> 6 <split [20/1]> Slice6 <tibble [240 × 2]>" + }, + { + "objectID": "learn/models/time-series/index.html#session-info", + "href": "learn/models/time-series/index.html#session-info", + "title": "Modeling time series with tidy resampling", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> forecast * 8.21 2023-02-27 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> sweep * 0.2.4 2023-05-25 [1] Github (business-science/sweep@d0327bc)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> timetk * 2.8.3 2023-03-30 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> zoo * 1.8-12 2023-04-13 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/statistics/bootstrap/index.html", + "href": "learn/statistics/bootstrap/index.html", + "title": "Bootstrap resampling and tidy regression models", + "section": "", + "text": "This article only requires the tidymodels package.\nCombining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by Andrew MacDonald here, and Hadley has explored efficient support for bootstrapping as a potential enhancement to dplyr. The tidymodels package broom fits naturally with dplyr in performing these analyses.\nBootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate.\nLet’s say we want to fit a nonlinear model to the weight/mileage relationship in the mtcars data set.\n\nlibrary(tidymodels)\n\nggplot(mtcars, aes(mpg, wt)) + \n geom_point()\n\n\n\n\n\n\n\n\nWe might use the method of nonlinear least squares (via the nls() function) to fit a model.\n\nnlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0))\nsummary(nlsfit)\n#> \n#> Formula: mpg ~ k/wt + b\n#> \n#> Parameters:\n#> Estimate Std. Error t value Pr(>|t|) \n#> k 45.829 4.249 10.786 7.64e-12 ***\n#> b 4.386 1.536 2.855 0.00774 ** \n#> ---\n#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#> \n#> Residual standard error: 2.774 on 30 degrees of freedom\n#> \n#> Number of iterations to convergence: 1 \n#> Achieved convergence tolerance: 6.813e-09\n\nggplot(mtcars, aes(wt, mpg)) +\n geom_point() +\n geom_line(aes(y = predict(nlsfit)))\n\n\n\n\n\n\n\n\nWhile this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data." + }, + { + "objectID": "learn/statistics/bootstrap/index.html#introduction", + "href": "learn/statistics/bootstrap/index.html#introduction", + "title": "Bootstrap resampling and tidy regression models", + "section": "", + "text": "This article only requires the tidymodels package.\nCombining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by Andrew MacDonald here, and Hadley has explored efficient support for bootstrapping as a potential enhancement to dplyr. The tidymodels package broom fits naturally with dplyr in performing these analyses.\nBootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate.\nLet’s say we want to fit a nonlinear model to the weight/mileage relationship in the mtcars data set.\n\nlibrary(tidymodels)\n\nggplot(mtcars, aes(mpg, wt)) + \n geom_point()\n\n\n\n\n\n\n\n\nWe might use the method of nonlinear least squares (via the nls() function) to fit a model.\n\nnlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0))\nsummary(nlsfit)\n#> \n#> Formula: mpg ~ k/wt + b\n#> \n#> Parameters:\n#> Estimate Std. Error t value Pr(>|t|) \n#> k 45.829 4.249 10.786 7.64e-12 ***\n#> b 4.386 1.536 2.855 0.00774 ** \n#> ---\n#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#> \n#> Residual standard error: 2.774 on 30 degrees of freedom\n#> \n#> Number of iterations to convergence: 1 \n#> Achieved convergence tolerance: 6.813e-09\n\nggplot(mtcars, aes(wt, mpg)) +\n geom_point() +\n geom_line(aes(y = predict(nlsfit)))\n\n\n\n\n\n\n\n\nWhile this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data." + }, + { + "objectID": "learn/statistics/bootstrap/index.html#bootstrapping-models", + "href": "learn/statistics/bootstrap/index.html#bootstrapping-models", + "title": "Bootstrap resampling and tidy regression models", + "section": "Bootstrapping models", + "text": "Bootstrapping models\nWe can use the bootstraps() function in the rsample package to sample bootstrap replications. First, we construct 2000 bootstrap replicates of the data, each of which has been randomly sampled with replacement. The resulting object is an rset, which is a data frame with a column of rsplit objects.\nAn rsplit object has two main components: an analysis data set and an assessment data set, accessible via analysis(rsplit) and assessment(rsplit) respectively. For bootstrap samples, the analysis data set is the bootstrap sample itself, and the assessment data set consists of all the out-of-bag samples.\n\nset.seed(27)\nboots <- bootstraps(mtcars, times = 2000, apparent = TRUE)\nboots\n#> # Bootstrap sampling with apparent sample \n#> # A tibble: 2,001 × 2\n#> splits id \n#> <list> <chr> \n#> 1 <split [32/13]> Bootstrap0001\n#> 2 <split [32/10]> Bootstrap0002\n#> 3 <split [32/13]> Bootstrap0003\n#> 4 <split [32/11]> Bootstrap0004\n#> 5 <split [32/9]> Bootstrap0005\n#> 6 <split [32/10]> Bootstrap0006\n#> 7 <split [32/11]> Bootstrap0007\n#> 8 <split [32/13]> Bootstrap0008\n#> 9 <split [32/11]> Bootstrap0009\n#> 10 <split [32/11]> Bootstrap0010\n#> # ℹ 1,991 more rows\n\nLet’s create a helper function to fit an nls() model on each bootstrap sample, and then use purrr::map() to apply this function to all the bootstrap samples at once. Similarly, we create a column of tidy coefficient information by unnesting.\n\nfit_nls_on_bootstrap <- function(split) {\n nls(mpg ~ k / wt + b, analysis(split), start = list(k = 1, b = 0))\n}\n\nboot_models <-\n boots %>% \n mutate(model = map(splits, fit_nls_on_bootstrap),\n coef_info = map(model, tidy))\n\nboot_coefs <- \n boot_models %>% \n unnest(coef_info)\n\nThe unnested coefficient information contains a summary of each replication combined in a single data frame:\n\nboot_coefs\n#> # A tibble: 4,002 × 8\n#> splits id model term estimate std.error statistic p.value\n#> <list> <chr> <lis> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 <split [32/13]> Bootstrap0… <nls> k 42.1 4.05 10.4 1.91e-11\n#> 2 <split [32/13]> Bootstrap0… <nls> b 5.39 1.43 3.78 6.93e- 4\n#> 3 <split [32/10]> Bootstrap0… <nls> k 49.9 5.66 8.82 7.82e-10\n#> 4 <split [32/10]> Bootstrap0… <nls> b 3.73 1.92 1.94 6.13e- 2\n#> 5 <split [32/13]> Bootstrap0… <nls> k 37.8 2.68 14.1 9.01e-15\n#> 6 <split [32/13]> Bootstrap0… <nls> b 6.73 1.17 5.75 2.78e- 6\n#> 7 <split [32/11]> Bootstrap0… <nls> k 45.6 4.45 10.2 2.70e-11\n#> 8 <split [32/11]> Bootstrap0… <nls> b 4.75 1.62 2.93 6.38e- 3\n#> 9 <split [32/9]> Bootstrap0… <nls> k 43.6 4.63 9.41 1.85e-10\n#> 10 <split [32/9]> Bootstrap0… <nls> b 5.89 1.68 3.51 1.44e- 3\n#> # ℹ 3,992 more rows" + }, + { + "objectID": "learn/statistics/bootstrap/index.html#confidence-intervals", + "href": "learn/statistics/bootstrap/index.html#confidence-intervals", + "title": "Bootstrap resampling and tidy regression models", + "section": "Confidence intervals", + "text": "Confidence intervals\nWe can then calculate confidence intervals (using what is called the percentile method):\n\npercentile_intervals <- int_pctl(boot_models, coef_info)\npercentile_intervals\n#> # A tibble: 2 × 6\n#> term .lower .estimate .upper .alpha .method \n#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> \n#> 1 b 0.0475 4.12 7.31 0.05 percentile\n#> 2 k 37.6 46.7 59.8 0.05 percentile\n\nOr we can use histograms to get a more detailed idea of the uncertainty in each estimate:\n\nggplot(boot_coefs, aes(estimate)) +\n geom_histogram(bins = 30) +\n facet_wrap( ~ term, scales = \"free\") +\n geom_vline(aes(xintercept = .lower), data = percentile_intervals, col = \"blue\") +\n geom_vline(aes(xintercept = .upper), data = percentile_intervals, col = \"blue\")\n\n\n\n\n\n\n\n\nThe rsample package also has functions for other types of confidence intervals." + }, + { + "objectID": "learn/statistics/bootstrap/index.html#possible-model-fits", + "href": "learn/statistics/bootstrap/index.html#possible-model-fits", + "title": "Bootstrap resampling and tidy regression models", + "section": "Possible model fits", + "text": "Possible model fits\nWe can use augment() to visualize the uncertainty in the fitted curve. Since there are so many bootstrap samples, we’ll only show a sample of the model fits in our visualization:\n\nboot_aug <- \n boot_models %>% \n sample_n(200) %>% \n mutate(augmented = map(model, augment)) %>% \n unnest(augmented)\n\nboot_aug\n#> # A tibble: 6,400 × 8\n#> splits id model coef_info mpg wt .fitted .resid\n#> <list> <chr> <list> <list> <dbl> <dbl> <dbl> <dbl>\n#> 1 <split [32/11]> Bootstrap1644 <nls> <tibble> 16.4 4.07 15.6 0.829\n#> 2 <split [32/11]> Bootstrap1644 <nls> <tibble> 19.7 2.77 21.9 -2.21 \n#> 3 <split [32/11]> Bootstrap1644 <nls> <tibble> 19.2 3.84 16.4 2.84 \n#> 4 <split [32/11]> Bootstrap1644 <nls> <tibble> 21.4 2.78 21.8 -0.437\n#> 5 <split [32/11]> Bootstrap1644 <nls> <tibble> 26 2.14 27.8 -1.75 \n#> 6 <split [32/11]> Bootstrap1644 <nls> <tibble> 33.9 1.84 32.0 1.88 \n#> 7 <split [32/11]> Bootstrap1644 <nls> <tibble> 32.4 2.2 27.0 5.35 \n#> 8 <split [32/11]> Bootstrap1644 <nls> <tibble> 30.4 1.62 36.1 -5.70 \n#> 9 <split [32/11]> Bootstrap1644 <nls> <tibble> 21.5 2.46 24.4 -2.86 \n#> 10 <split [32/11]> Bootstrap1644 <nls> <tibble> 26 2.14 27.8 -1.75 \n#> # ℹ 6,390 more rows\n\n\nggplot(boot_aug, aes(wt, mpg)) +\n geom_line(aes(y = .fitted, group = id), alpha = .2, col = \"blue\") +\n geom_point()\n\n\n\n\n\n\n\n\nWith only a few small changes, we could easily perform bootstrapping with other kinds of predictive or hypothesis testing models, since the tidy() and augment() functions works for many statistical outputs. As another example, we could use smooth.spline(), which fits a cubic smoothing spline to data:\n\nfit_spline_on_bootstrap <- function(split) {\n data <- analysis(split)\n smooth.spline(data$wt, data$mpg, df = 4)\n}\n\nboot_splines <- \n boots %>% \n sample_n(200) %>% \n mutate(spline = map(splits, fit_spline_on_bootstrap),\n aug_train = map(spline, augment))\n\nsplines_aug <- \n boot_splines %>% \n unnest(aug_train)\n\nggplot(splines_aug, aes(x, y)) +\n geom_line(aes(y = .fitted, group = id), alpha = 0.2, col = \"blue\") +\n geom_point()" + }, + { + "objectID": "learn/statistics/bootstrap/index.html#session-info", + "href": "learn/statistics/bootstrap/index.html#session-info", + "title": "Bootstrap resampling and tidy regression models", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/statistics/infer/index.html", + "href": "learn/statistics/infer/index.html", + "title": "Hypothesis testing using resampling and tidy data", + "section": "", + "text": "This article only requires the tidymodels package.\nThe tidymodels package infer implements an expressive grammar to perform statistical inference that coheres with the tidyverse design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs.\nRegardless of which hypothesis test we’re using, we’re still asking the same kind of question:\n\nIs the effect or difference in our observed data real, or due to chance?\n\nTo answer this question, we start by assuming that the observed data came from some world where “nothing is going on” (i.e. the observed effect was simply due to random chance), and call this assumption our null hypothesis. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the alternate hypothesis, which supposes that the effect present in the observed data is actually due to the fact that “something is going on.”) We then calculate a test statistic from our data that describes the observed effect. We can use this test statistic to calculate a p-value, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined significance level \\(\\alpha\\), then we can reject our null hypothesis.\nIf you are new to hypothesis testing, take a look at\n\nSection 9.2 of Statistical Inference via Data Science\nThe American Statistical Association’s recent statement on p-values\n\nThe workflow of this package is designed around these ideas. Starting from some data set,\n\nspecify() allows you to specify the variable, or relationship between variables, that you’re interested in,\nhypothesize() allows you to declare the null hypothesis,\ngenerate() allows you to generate data reflecting the null hypothesis, and\ncalculate() allows you to calculate a distribution of statistics from the generated data to form the null distribution.\n\nThroughout this vignette, we make use of gss, a data set available in infer containing a sample of 500 observations of 11 variables from the General Social Survey.\n\nlibrary(tidymodels) # Includes the infer package\n\n# load in the data set\ndata(gss)\n\n# take a look at its structure\ndplyr::glimpse(gss)\n#> Rows: 500\n#> Columns: 11\n#> $ year <dbl> 2014, 1994, 1998, 1996, 1994, 1996, 1990, 2016, 2000, 1998, 20…\n#> $ age <dbl> 36, 34, 24, 42, 31, 32, 48, 36, 30, 33, 21, 30, 38, 49, 25, 56…\n#> $ sex <fct> male, female, male, male, male, female, female, female, female…\n#> $ college <fct> degree, no degree, degree, no degree, degree, no degree, no de…\n#> $ partyid <fct> ind, rep, ind, ind, rep, rep, dem, ind, rep, dem, dem, ind, de…\n#> $ hompop <dbl> 3, 4, 1, 4, 2, 4, 2, 1, 5, 2, 4, 3, 4, 4, 2, 2, 3, 2, 1, 2, 5,…\n#> $ hours <dbl> 50, 31, 40, 40, 40, 53, 32, 20, 40, 40, 23, 52, 38, 72, 48, 40…\n#> $ income <ord> $25000 or more, $20000 - 24999, $25000 or more, $25000 or more…\n#> $ class <fct> middle class, working class, working class, working class, mid…\n#> $ finrela <fct> below average, below average, below average, above average, ab…\n#> $ weight <dbl> 0.8960034, 1.0825000, 0.5501000, 1.0864000, 1.0825000, 1.08640…\n\nEach row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See ?gss for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let’s suppose that this data set is a representative sample of a population we want to learn about: American adults." + }, + { + "objectID": "learn/statistics/infer/index.html#introduction", + "href": "learn/statistics/infer/index.html#introduction", + "title": "Hypothesis testing using resampling and tidy data", + "section": "", + "text": "This article only requires the tidymodels package.\nThe tidymodels package infer implements an expressive grammar to perform statistical inference that coheres with the tidyverse design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs.\nRegardless of which hypothesis test we’re using, we’re still asking the same kind of question:\n\nIs the effect or difference in our observed data real, or due to chance?\n\nTo answer this question, we start by assuming that the observed data came from some world where “nothing is going on” (i.e. the observed effect was simply due to random chance), and call this assumption our null hypothesis. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the alternate hypothesis, which supposes that the effect present in the observed data is actually due to the fact that “something is going on.”) We then calculate a test statistic from our data that describes the observed effect. We can use this test statistic to calculate a p-value, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined significance level \\(\\alpha\\), then we can reject our null hypothesis.\nIf you are new to hypothesis testing, take a look at\n\nSection 9.2 of Statistical Inference via Data Science\nThe American Statistical Association’s recent statement on p-values\n\nThe workflow of this package is designed around these ideas. Starting from some data set,\n\nspecify() allows you to specify the variable, or relationship between variables, that you’re interested in,\nhypothesize() allows you to declare the null hypothesis,\ngenerate() allows you to generate data reflecting the null hypothesis, and\ncalculate() allows you to calculate a distribution of statistics from the generated data to form the null distribution.\n\nThroughout this vignette, we make use of gss, a data set available in infer containing a sample of 500 observations of 11 variables from the General Social Survey.\n\nlibrary(tidymodels) # Includes the infer package\n\n# load in the data set\ndata(gss)\n\n# take a look at its structure\ndplyr::glimpse(gss)\n#> Rows: 500\n#> Columns: 11\n#> $ year <dbl> 2014, 1994, 1998, 1996, 1994, 1996, 1990, 2016, 2000, 1998, 20…\n#> $ age <dbl> 36, 34, 24, 42, 31, 32, 48, 36, 30, 33, 21, 30, 38, 49, 25, 56…\n#> $ sex <fct> male, female, male, male, male, female, female, female, female…\n#> $ college <fct> degree, no degree, degree, no degree, degree, no degree, no de…\n#> $ partyid <fct> ind, rep, ind, ind, rep, rep, dem, ind, rep, dem, dem, ind, de…\n#> $ hompop <dbl> 3, 4, 1, 4, 2, 4, 2, 1, 5, 2, 4, 3, 4, 4, 2, 2, 3, 2, 1, 2, 5,…\n#> $ hours <dbl> 50, 31, 40, 40, 40, 53, 32, 20, 40, 40, 23, 52, 38, 72, 48, 40…\n#> $ income <ord> $25000 or more, $20000 - 24999, $25000 or more, $25000 or more…\n#> $ class <fct> middle class, working class, working class, working class, mid…\n#> $ finrela <fct> below average, below average, below average, above average, ab…\n#> $ weight <dbl> 0.8960034, 1.0825000, 0.5501000, 1.0864000, 1.0825000, 1.08640…\n\nEach row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See ?gss for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let’s suppose that this data set is a representative sample of a population we want to learn about: American adults." + }, + { + "objectID": "learn/statistics/infer/index.html#specify-variables", + "href": "learn/statistics/infer/index.html#specify-variables", + "title": "Hypothesis testing using resampling and tidy data", + "section": "Specify variables", + "text": "Specify variables\nThe specify() function can be used to specify which of the variables in the data set you’re interested in. If you’re only interested in, say, the age of the respondents, you might write:\n\ngss %>%\n specify(response = age)\n#> Response: age (numeric)\n#> # A tibble: 500 × 1\n#> age\n#> <dbl>\n#> 1 36\n#> 2 34\n#> 3 24\n#> 4 42\n#> 5 31\n#> 6 32\n#> 7 48\n#> 8 36\n#> 9 30\n#> 10 33\n#> # ℹ 490 more rows\n\nOn the front end, the output of specify() just looks like it selects off the columns in the dataframe that you’ve specified. What do we see if we check the class of this object, though?\n\ngss %>%\n specify(response = age) %>%\n class()\n#> [1] \"infer\" \"tbl_df\" \"tbl\" \"data.frame\"\n\nWe can see that the infer class has been appended on top of the dataframe classes; this new class stores some extra metadata.\nIf you’re interested in two variables (age and partyid, for example) you can specify() their relationship in one of two (equivalent) ways:\n\n# as a formula\ngss %>%\n specify(age ~ partyid)\n#> Response: age (numeric)\n#> Explanatory: partyid (factor)\n#> # A tibble: 500 × 2\n#> age partyid\n#> <dbl> <fct> \n#> 1 36 ind \n#> 2 34 rep \n#> 3 24 ind \n#> 4 42 ind \n#> 5 31 rep \n#> 6 32 rep \n#> 7 48 dem \n#> 8 36 ind \n#> 9 30 rep \n#> 10 33 dem \n#> # ℹ 490 more rows\n\n# with the named arguments\ngss %>%\n specify(response = age, explanatory = partyid)\n#> Response: age (numeric)\n#> Explanatory: partyid (factor)\n#> # A tibble: 500 × 2\n#> age partyid\n#> <dbl> <fct> \n#> 1 36 ind \n#> 2 34 rep \n#> 3 24 ind \n#> 4 42 ind \n#> 5 31 rep \n#> 6 32 rep \n#> 7 48 dem \n#> 8 36 ind \n#> 9 30 rep \n#> 10 33 dem \n#> # ℹ 490 more rows\n\nIf you’re doing inference on one proportion or a difference in proportions, you will need to use the success argument to specify which level of your response variable is a success. For instance, if you’re interested in the proportion of the population with a college degree, you might use the following code:\n\n# specifying for inference on proportions\ngss %>%\n specify(response = college, success = \"degree\")\n#> Response: college (factor)\n#> # A tibble: 500 × 1\n#> college \n#> <fct> \n#> 1 degree \n#> 2 no degree\n#> 3 degree \n#> 4 no degree\n#> 5 degree \n#> 6 no degree\n#> 7 no degree\n#> 8 degree \n#> 9 degree \n#> 10 no degree\n#> # ℹ 490 more rows" + }, + { + "objectID": "learn/statistics/infer/index.html#declare-the-hypothesis", + "href": "learn/statistics/infer/index.html#declare-the-hypothesis", + "title": "Hypothesis testing using resampling and tidy data", + "section": "Declare the hypothesis", + "text": "Declare the hypothesis\nThe next step in the infer pipeline is often to declare a null hypothesis using hypothesize(). The first step is to supply one of “independence” or “point” to the null argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to hypothesize():\n\ngss %>%\n specify(college ~ partyid, success = \"degree\") %>%\n hypothesize(null = \"independence\")\n#> Response: college (factor)\n#> Explanatory: partyid (factor)\n#> Null Hypothesis: independence\n#> # A tibble: 500 × 2\n#> college partyid\n#> <fct> <fct> \n#> 1 degree ind \n#> 2 no degree rep \n#> 3 degree ind \n#> 4 no degree ind \n#> 5 degree rep \n#> 6 no degree rep \n#> 7 no degree dem \n#> 8 degree ind \n#> 9 degree rep \n#> 10 no degree dem \n#> # ℹ 490 more rows\n\nIf you’re doing inference on a point estimate, you will also need to provide one of p (the true proportion of successes, between 0 and 1), mu (the true mean), med (the true median), or sigma (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write:\n\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40)\n#> Response: hours (numeric)\n#> Null Hypothesis: point\n#> # A tibble: 500 × 1\n#> hours\n#> <dbl>\n#> 1 50\n#> 2 31\n#> 3 40\n#> 4 40\n#> 5 40\n#> 6 53\n#> 7 32\n#> 8 20\n#> 9 40\n#> 10 40\n#> # ℹ 490 more rows\n\nAgain, from the front-end, the dataframe outputted from hypothesize() looks almost exactly the same as it did when it came out of specify(), but infer now “knows” your null hypothesis." + }, + { + "objectID": "learn/statistics/infer/index.html#generate-the-distribution", + "href": "learn/statistics/infer/index.html#generate-the-distribution", + "title": "Hypothesis testing using resampling and tidy data", + "section": "Generate the distribution", + "text": "Generate the distribution\nOnce we’ve asserted our null hypothesis using hypothesize(), we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the type argument:\n\nbootstrap: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data.\n\npermute: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample.\n\nsimulate: A value will be sampled from a theoretical distribution with parameters specified in hypothesize() for each replicate. (This option is currently only applicable for testing point estimates.)\n\nContinuing on with our example above, about the average number of hours worked a week, we might write:\n\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\")\n#> Response: hours (numeric)\n#> Null Hypothesis: point\n#> # A tibble: 2,500,000 × 2\n#> # Groups: replicate [5,000]\n#> replicate hours\n#> <int> <dbl>\n#> 1 1 28.6 \n#> 2 1 8.62\n#> 3 1 38.6 \n#> 4 1 18.6 \n#> 5 1 6.62\n#> 6 1 38.6 \n#> 7 1 53.6 \n#> 8 1 38.6 \n#> 9 1 35.6 \n#> 10 1 28.6 \n#> # ℹ 2,499,990 more rows\n\nIn the above example, we take 5000 bootstrap samples to form our null distribution.\nTo generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 5000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age:\n\ngss %>%\n specify(partyid ~ age) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\")\n#> Response: partyid (factor)\n#> Explanatory: age (numeric)\n#> Null Hypothesis: independence\n#> # A tibble: 2,500,000 × 3\n#> # Groups: replicate [5,000]\n#> partyid age replicate\n#> <fct> <dbl> <int>\n#> 1 dem 36 1\n#> 2 ind 34 1\n#> 3 dem 24 1\n#> 4 dem 42 1\n#> 5 ind 31 1\n#> 6 ind 32 1\n#> 7 ind 48 1\n#> 8 rep 36 1\n#> 9 rep 30 1\n#> 10 dem 33 1\n#> # ℹ 2,499,990 more rows" + }, + { + "objectID": "learn/statistics/infer/index.html#calculate-statistics", + "href": "learn/statistics/infer/index.html#calculate-statistics", + "title": "Hypothesis testing using resampling and tidy data", + "section": "Calculate statistics", + "text": "Calculate statistics\nDepending on whether you’re carrying out computation-based inference or theory-based inference, you will either supply calculate() with the output of generate() or hypothesize(), respectively. The function, for one, takes in a stat argument, which is currently one of \"mean\", \"median\", \"sum\", \"sd\", \"prop\", \"count\", \"diff in means\", \"diff in medians\", \"diff in props\", \"Chisq\", \"F\", \"t\", \"z\", \"slope\", or \"correlation\". For example, continuing our example above to calculate the null distribution of mean hours worked per week:\n\ngss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\") %>%\n calculate(stat = \"mean\")\n#> Response: hours (numeric)\n#> Null Hypothesis: point\n#> # A tibble: 5,000 × 2\n#> replicate stat\n#> <int> <dbl>\n#> 1 1 40.9\n#> 2 2 40.3\n#> 3 3 39.2\n#> 4 4 39.2\n#> 5 5 39.3\n#> 6 6 39.4\n#> 7 7 40.7\n#> 8 8 41.0\n#> 9 9 39.2\n#> 10 10 40.2\n#> # ℹ 4,990 more rows\n\nThe output of calculate() here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you’re carrying out inference on differences in means, medians, or proportions, or \\(t\\) and \\(z\\) statistics, you will need to supply an order argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don’t, we might write:\n\ngss %>%\n specify(age ~ college) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(\"diff in means\", order = c(\"degree\", \"no degree\"))\n#> Response: age (numeric)\n#> Explanatory: college (factor)\n#> Null Hypothesis: independence\n#> # A tibble: 5,000 × 2\n#> replicate stat\n#> <int> <dbl>\n#> 1 1 -0.796\n#> 2 2 -0.558\n#> 3 3 0.773\n#> 4 4 -0.390\n#> 5 5 -2.26 \n#> 6 6 -0.355\n#> 7 7 -1.11 \n#> 8 8 -0.628\n#> 9 9 1.14 \n#> 10 10 -0.928\n#> # ℹ 4,990 more rows" + }, + { + "objectID": "learn/statistics/infer/index.html#other-utilities", + "href": "learn/statistics/infer/index.html#other-utilities", + "title": "Hypothesis testing using resampling and tidy data", + "section": "Other utilities", + "text": "Other utilities\nThe infer package also offers several utilities to extract meaning out of summary statistics and null distributions; the package provides functions to visualize where a statistic is relative to a distribution (with visualize()), calculate p-values (with get_p_value()), and calculate confidence intervals (with get_confidence_interval()).\nTo illustrate, we’ll go back to the example of determining whether the mean number of hours worked per week is 40 hours.\n\n# find the point estimate\npoint_estimate <- gss %>%\n specify(response = hours) %>%\n calculate(stat = \"mean\")\n\n# generate a null distribution\nnull_dist <- gss %>%\n specify(response = hours) %>%\n hypothesize(null = \"point\", mu = 40) %>%\n generate(reps = 5000, type = \"bootstrap\") %>%\n calculate(stat = \"mean\")\n\n(Notice the warning: Removed 1244 rows containing missing values. This would be worth noting if you were actually carrying out this hypothesis test.)\nOur point estimate 41.382 seems pretty close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn’t 40.\nWe could initially just visualize the null distribution.\n\nnull_dist %>%\n visualize()\n\n\n\n\n\n\n\n\nWhere does our sample’s observed statistic lie on this distribution? We can use the obs_stat argument to specify this.\n\nnull_dist %>%\n visualize() +\n shade_p_value(obs_stat = point_estimate, direction = \"two_sided\")\n\n\n\n\n\n\n\n\nNotice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the + operator to apply the shade_p_value() function. This is because visualize() outputs a plot object from ggplot2 instead of a dataframe, and the + operator is needed to add the p-value layer to the plot object.) The red bar looks like it’s slightly far out on the right tail of the null distribution, so observing a sample mean of 41.382 hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though?\n\n# get a two-tailed p-value\np_value <- null_dist %>%\n get_p_value(obs_stat = point_estimate, direction = \"two_sided\")\n\np_value\n#> # A tibble: 1 × 1\n#> p_value\n#> <dbl>\n#> 1 0.0416\n\nIt looks like the p-value is 0.0416, which is pretty small—if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (1.382 hours) from 40 would be 0.0416. This may or may not be statistically significantly different, depending on the significance level \\(\\alpha\\) you decided on before you ran this analysis. If you had set \\(\\alpha = .05\\), then this difference would be statistically significant, but if you had set \\(\\alpha = .01\\), then it would not be.\nTo get a confidence interval around our estimate, we can write:\n\n# start with the null distribution\nnull_dist %>%\n # calculate the confidence interval around the point estimate\n get_confidence_interval(point_estimate = point_estimate,\n # at the 95% confidence level\n level = .95,\n # using the standard error\n type = \"se\")\n#> # A tibble: 1 × 2\n#> lower_ci upper_ci\n#> <dbl> <dbl>\n#> 1 40.1 42.7\n\nAs you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level \\(\\alpha = .05\\)." + }, + { + "objectID": "learn/statistics/infer/index.html#theoretical-methods", + "href": "learn/statistics/infer/index.html#theoretical-methods", + "title": "Hypothesis testing using resampling and tidy data", + "section": "Theoretical methods", + "text": "Theoretical methods\nThe infer package also provides functionality to use theoretical methods for \"Chisq\", \"F\" and \"t\" test statistics.\nGenerally, to find a null distribution using theory-based methods, use the same code that you would use to find the null distribution using randomization-based methods, but skip the generate() step. For example, if we wanted to find a null distribution for the relationship between age (age) and party identification (partyid) using randomization, we could write:\n\nnull_f_distn <- gss %>%\n specify(age ~ partyid) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(stat = \"F\")\n\nTo find the null distribution using theory-based methods, instead, skip the generate() step entirely:\n\nnull_f_distn_theoretical <- gss %>%\n specify(age ~ partyid) %>%\n hypothesize(null = \"independence\") %>%\n calculate(stat = \"F\")\n\nWe’ll calculate the observed statistic to make use of in the following visualizations; this procedure is the same, regardless of the methods used to find the null distribution.\n\nF_hat <- gss %>% \n specify(age ~ partyid) %>%\n calculate(stat = \"F\")\n\nNow, instead of just piping the null distribution into visualize(), as we would do if we wanted to visualize the randomization-based null distribution, we also need to provide method = \"theoretical\" to visualize().\n\nvisualize(null_f_distn_theoretical, method = \"theoretical\") +\n shade_p_value(obs_stat = F_hat, direction = \"greater\")\n\n\n\n\n\n\n\n\nTo get a sense of how the theory-based and randomization-based null distributions relate, we can pipe the randomization-based null distribution into visualize() and also specify method = \"both\"\n\nvisualize(null_f_distn, method = \"both\") +\n shade_p_value(obs_stat = F_hat, direction = \"greater\")\n\n\n\n\n\n\n\n\nThat’s it! This vignette covers most all of the key functionality of infer. See help(package = \"infer\") for a full list of functions and vignettes." + }, + { + "objectID": "learn/statistics/infer/index.html#session-info", + "href": "learn/statistics/infer/index.html#session-info", + "title": "Hypothesis testing using resampling and tidy data", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/statistics/k-means/index.html", + "href": "learn/statistics/k-means/index.html", + "title": "K-means clustering with tidy data principles", + "section": "", + "text": "This article only requires the tidymodels package.\nK-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions:\n\ntidy()\naugment()\nglance()\n\nLet’s start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster:\n\nlibrary(tidymodels)\n\nset.seed(27)\n\ncenters <- tibble(\n cluster = factor(1:3), \n num_points = c(100, 150, 50), # number points in each cluster\n x1 = c(5, 0, -3), # x1 coordinate of cluster center\n x2 = c(-1, 1, -2) # x2 coordinate of cluster center\n)\n\nlabelled_points <- \n centers %>%\n mutate(\n x1 = map2(num_points, x1, rnorm),\n x2 = map2(num_points, x2, rnorm)\n ) %>% \n select(-num_points) %>% \n unnest(cols = c(x1, x2))\n\nggplot(labelled_points, aes(x1, x2, color = cluster)) +\n geom_point(alpha = 0.3)\n\n\n\n\n\n\n\n\nThis is an ideal case for k-means clustering." + }, + { + "objectID": "learn/statistics/k-means/index.html#introduction", + "href": "learn/statistics/k-means/index.html#introduction", + "title": "K-means clustering with tidy data principles", + "section": "", + "text": "This article only requires the tidymodels package.\nK-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions:\n\ntidy()\naugment()\nglance()\n\nLet’s start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster:\n\nlibrary(tidymodels)\n\nset.seed(27)\n\ncenters <- tibble(\n cluster = factor(1:3), \n num_points = c(100, 150, 50), # number points in each cluster\n x1 = c(5, 0, -3), # x1 coordinate of cluster center\n x2 = c(-1, 1, -2) # x2 coordinate of cluster center\n)\n\nlabelled_points <- \n centers %>%\n mutate(\n x1 = map2(num_points, x1, rnorm),\n x2 = map2(num_points, x2, rnorm)\n ) %>% \n select(-num_points) %>% \n unnest(cols = c(x1, x2))\n\nggplot(labelled_points, aes(x1, x2, color = cluster)) +\n geom_point(alpha = 0.3)\n\n\n\n\n\n\n\n\nThis is an ideal case for k-means clustering." + }, + { + "objectID": "learn/statistics/k-means/index.html#how-does-k-means-work", + "href": "learn/statistics/k-means/index.html#how-does-k-means-work", + "title": "K-means clustering with tidy data principles", + "section": "How does K-means work?", + "text": "How does K-means work?\nRather than using equations, this short animation using the artwork of Allison Horst explains the clustering process:" + }, + { + "objectID": "learn/statistics/k-means/index.html#clustering-in-r", + "href": "learn/statistics/k-means/index.html#clustering-in-r", + "title": "K-means clustering with tidy data principles", + "section": "Clustering in R", + "text": "Clustering in R\nWe’ll use the built-in kmeans() function, which accepts a data frame with all numeric columns as it’s primary argument.\n\npoints <- \n labelled_points %>% \n select(-cluster)\n\nkclust <- kmeans(points, centers = 3)\nkclust\n#> K-means clustering with 3 clusters of sizes 148, 51, 101\n#> \n#> Cluster means:\n#> x1 x2\n#> 1 0.08853475 1.045461\n#> 2 -3.14292460 -2.000043\n#> 3 5.00401249 -1.045811\n#> \n#> Clustering vector:\n#> [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3\n#> [38] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3\n#> [75] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1\n#> [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n#> [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n#> [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n#> [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2\n#> [260] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n#> [297] 2 2 2 2\n#> \n#> Within cluster sum of squares by cluster:\n#> [1] 298.9415 108.8112 243.2092\n#> (between_SS / total_SS = 82.5 %)\n#> \n#> Available components:\n#> \n#> [1] \"cluster\" \"centers\" \"totss\" \"withinss\" \"tot.withinss\"\n#> [6] \"betweenss\" \"size\" \"iter\" \"ifault\"\nsummary(kclust)\n#> Length Class Mode \n#> cluster 300 -none- numeric\n#> centers 6 -none- numeric\n#> totss 1 -none- numeric\n#> withinss 3 -none- numeric\n#> tot.withinss 1 -none- numeric\n#> betweenss 1 -none- numeric\n#> size 3 -none- numeric\n#> iter 1 -none- numeric\n#> ifault 1 -none- numeric\n\nThe output is a list of vectors, where each component has a different length. There’s one of length 300, the same as our original data set. There are two elements of length 3 (withinss and tot.withinss) and centers is a matrix with 3 rows. And then there are the elements of length 1: totss, tot.withinss, betweenss, and iter. (The value ifault indicates possible algorithm problems.)\nThese differing lengths have important meaning when we want to tidy our data set; they signify that each type of component communicates a different kind of information.\n\ncluster (300 values) contains information about each point\ncenters, withinss, and size (3 values) contain information about each cluster\ntotss, tot.withinss, betweenss, and iter (1 value) contain information about the full clustering\n\nWhich of these do we want to extract? There is no right answer; each of them may be interesting to an analyst. Because they communicate entirely different information (not to mention there’s no straightforward way to combine them), they are extracted by separate functions. augment adds the point classifications to the original data set:\n\naugment(kclust, points)\n#> # A tibble: 300 × 3\n#> x1 x2 .cluster\n#> <dbl> <dbl> <fct> \n#> 1 6.91 -2.74 3 \n#> 2 6.14 -2.45 3 \n#> 3 4.24 -0.946 3 \n#> 4 3.54 0.287 3 \n#> 5 3.91 0.408 3 \n#> 6 5.30 -1.58 3 \n#> 7 5.01 -1.77 3 \n#> 8 6.16 -1.68 3 \n#> 9 7.13 -2.17 3 \n#> 10 5.24 -2.42 3 \n#> # ℹ 290 more rows\n\nThe tidy() function summarizes on a per-cluster level:\n\ntidy(kclust)\n#> # A tibble: 3 × 5\n#> x1 x2 size withinss cluster\n#> <dbl> <dbl> <int> <dbl> <fct> \n#> 1 0.0885 1.05 148 299. 1 \n#> 2 -3.14 -2.00 51 109. 2 \n#> 3 5.00 -1.05 101 243. 3\n\nAnd as it always does, the glance() function extracts a single-row summary:\n\nglance(kclust)\n#> # A tibble: 1 × 4\n#> totss tot.withinss betweenss iter\n#> <dbl> <dbl> <dbl> <int>\n#> 1 3724. 651. 3073. 2" + }, + { + "objectID": "learn/statistics/k-means/index.html#exploratory-clustering", + "href": "learn/statistics/k-means/index.html#exploratory-clustering", + "title": "K-means clustering with tidy data principles", + "section": "Exploratory clustering", + "text": "Exploratory clustering\nWhile these summaries are useful, they would not have been too difficult to extract out from the data set yourself. The real power comes from combining these analyses with other tools like dplyr.\nLet’s say we want to explore the effect of different choices of k, from 1 to 9, on this clustering. First cluster the data 9 times, each using a different value of k, then create columns containing the tidied, glanced and augmented data:\n\nkclusts <- \n tibble(k = 1:9) %>%\n mutate(\n kclust = map(k, ~kmeans(points, .x)),\n tidied = map(kclust, tidy),\n glanced = map(kclust, glance),\n augmented = map(kclust, augment, points)\n )\n\nkclusts\n#> # A tibble: 9 × 5\n#> k kclust tidied glanced augmented \n#> <int> <list> <list> <list> <list> \n#> 1 1 <kmeans> <tibble [1 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n#> 2 2 <kmeans> <tibble [2 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n#> 3 3 <kmeans> <tibble [3 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n#> 4 4 <kmeans> <tibble [4 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n#> 5 5 <kmeans> <tibble [5 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n#> 6 6 <kmeans> <tibble [6 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n#> 7 7 <kmeans> <tibble [7 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n#> 8 8 <kmeans> <tibble [8 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n#> 9 9 <kmeans> <tibble [9 × 5]> <tibble [1 × 4]> <tibble [300 × 3]>\n\nWe can turn these into three separate data sets each representing a different type of data: using tidy(), using augment(), and using glance(). Each of these goes into a separate data set as they represent different types of data.\n\nclusters <- \n kclusts %>%\n unnest(cols = c(tidied))\n\nassignments <- \n kclusts %>% \n unnest(cols = c(augmented))\n\nclusterings <- \n kclusts %>%\n unnest(cols = c(glanced))\n\nNow we can plot the original points using the data from augment(), with each point colored according to the predicted cluster.\n\np1 <- \n ggplot(assignments, aes(x = x1, y = x2)) +\n geom_point(aes(color = .cluster), alpha = 0.8) + \n facet_wrap(~ k)\np1\n\n\n\n\n\n\n\n\nAlready we get a good sense of the proper number of clusters (3), and how the k-means algorithm functions when k is too high or too low. We can then add the centers of the cluster using the data from tidy():\n\np2 <- p1 + geom_point(data = clusters, size = 10, shape = \"x\")\np2\n\n\n\n\n\n\n\n\nThe data from glance() fills a different but equally important purpose; it lets us view trends of some summary statistics across values of k. Of particular interest is the total within sum of squares, saved in the tot.withinss column.\n\nggplot(clusterings, aes(k, tot.withinss)) +\n geom_line() +\n geom_point()\n\n\n\n\n\n\n\n\nThis represents the variance within the clusters. It decreases as k increases, but notice a bend (or “elbow”) around k = 3. This bend indicates that additional clusters beyond the third have little value. (See here for a more mathematically rigorous interpretation and implementation of this method). Thus, all three methods of tidying data provided by broom are useful for summarizing clustering output." + }, + { + "objectID": "learn/statistics/k-means/index.html#session-info", + "href": "learn/statistics/k-means/index.html#session-info", + "title": "K-means clustering with tidy data principles", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/statistics/tidy-analysis/index.html", + "href": "learn/statistics/tidy-analysis/index.html", + "title": "Correlation and regression fundamentals with tidy data principles", + "section": "", + "text": "This article only requires the tidymodels package.\nWhile the tidymodels package broom is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the nest()/unnest() functions from tidyr and the map() function in purrr." + }, + { + "objectID": "learn/statistics/tidy-analysis/index.html#introduction", + "href": "learn/statistics/tidy-analysis/index.html#introduction", + "title": "Correlation and regression fundamentals with tidy data principles", + "section": "", + "text": "This article only requires the tidymodels package.\nWhile the tidymodels package broom is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the nest()/unnest() functions from tidyr and the map() function in purrr." + }, + { + "objectID": "learn/statistics/tidy-analysis/index.html#correlation-analysis", + "href": "learn/statistics/tidy-analysis/index.html#correlation-analysis", + "title": "Correlation and regression fundamentals with tidy data principles", + "section": "Correlation analysis", + "text": "Correlation analysis\nLet’s demonstrate this with a simple data set, the built-in Orange. We start by coercing Orange to a tibble. This gives a nicer print method that will be especially useful later on when we start working with list-columns.\n\nlibrary(tidymodels)\n\ndata(Orange)\n\nOrange <- as_tibble(Orange)\nOrange\n#> # A tibble: 35 × 3\n#> Tree age circumference\n#> <ord> <dbl> <dbl>\n#> 1 1 118 30\n#> 2 1 484 58\n#> 3 1 664 87\n#> 4 1 1004 115\n#> 5 1 1231 120\n#> 6 1 1372 142\n#> 7 1 1582 145\n#> 8 2 118 33\n#> 9 2 484 69\n#> 10 2 664 111\n#> # ℹ 25 more rows\n\nThis contains 35 observations of three variables: Tree, age, and circumference. Tree is a factor with five levels describing five trees. As might be expected, age and circumference are correlated:\n\ncor(Orange$age, Orange$circumference)\n#> [1] 0.9135189\n\nlibrary(ggplot2)\n\nggplot(Orange, aes(age, circumference, color = Tree)) +\n geom_line()\n\n\n\n\n\n\n\n\nSuppose you want to test for correlations individually within each tree. You can do this with dplyr’s group_by:\n\nOrange %>% \n group_by(Tree) %>%\n summarize(correlation = cor(age, circumference))\n#> # A tibble: 5 × 2\n#> Tree correlation\n#> <ord> <dbl>\n#> 1 3 0.988\n#> 2 1 0.985\n#> 3 5 0.988\n#> 4 2 0.987\n#> 5 4 0.984\n\n(Note that the correlations are much higher than the aggregated one, and also we can now see the correlation is similar across trees).\nSuppose that instead of simply estimating a correlation, we want to perform a hypothesis test with cor.test():\n\nct <- cor.test(Orange$age, Orange$circumference)\nct\n#> \n#> Pearson's product-moment correlation\n#> \n#> data: Orange$age and Orange$circumference\n#> t = 12.9, df = 33, p-value = 1.931e-14\n#> alternative hypothesis: true correlation is not equal to 0\n#> 95 percent confidence interval:\n#> 0.8342364 0.9557955\n#> sample estimates:\n#> cor \n#> 0.9135189\n\nThis test output contains multiple values we may be interested in. Some are vectors of length 1, such as the p-value and the estimate, and some are longer, such as the confidence interval. We can get this into a nicely organized tibble using the tidy() function:\n\ntidy(ct)\n#> # A tibble: 1 × 8\n#> estimate statistic p.value parameter conf.low conf.high method alternative\n#> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr> <chr> \n#> 1 0.914 12.9 1.93e-14 33 0.834 0.956 Pearson'… two.sided\n\nOften, we want to perform multiple tests or fit multiple models, each on a different part of the data. In this case, we recommend a nest-map-unnest workflow. For example, suppose we want to perform correlation tests for each different tree. We start by nesting our data based on the group of interest:\n\nnested <- \n Orange %>% \n nest(data = c(age, circumference))\n\nThen we perform a correlation test for each nested tibble using purrr::map():\n\nnested %>% \n mutate(test = map(data, ~ cor.test(.x$age, .x$circumference)))\n#> # A tibble: 5 × 3\n#> Tree data test \n#> <ord> <list> <list> \n#> 1 1 <tibble [7 × 2]> <htest>\n#> 2 2 <tibble [7 × 2]> <htest>\n#> 3 3 <tibble [7 × 2]> <htest>\n#> 4 4 <tibble [7 × 2]> <htest>\n#> 5 5 <tibble [7 × 2]> <htest>\n\nThis results in a list-column of S3 objects. We want to tidy each of the objects, which we can also do with map().\n\nnested %>% \n mutate(\n test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col\n tidied = map(test, tidy)\n ) \n#> # A tibble: 5 × 4\n#> Tree data test tidied \n#> <ord> <list> <list> <list> \n#> 1 1 <tibble [7 × 2]> <htest> <tibble [1 × 8]>\n#> 2 2 <tibble [7 × 2]> <htest> <tibble [1 × 8]>\n#> 3 3 <tibble [7 × 2]> <htest> <tibble [1 × 8]>\n#> 4 4 <tibble [7 × 2]> <htest> <tibble [1 × 8]>\n#> 5 5 <tibble [7 × 2]> <htest> <tibble [1 × 8]>\n\nFinally, we want to unnest the tidied data frames so we can see the results in a flat tibble. All together, this looks like:\n\nOrange %>% \n nest(data = c(age, circumference)) %>% \n mutate(\n test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col\n tidied = map(test, tidy)\n ) %>% \n unnest(cols = tidied) %>% \n select(-data, -test)\n#> # A tibble: 5 × 9\n#> Tree estimate statistic p.value parameter conf.low conf.high method \n#> <ord> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr> \n#> 1 1 0.985 13.0 0.0000485 5 0.901 0.998 Pearson's pro…\n#> 2 2 0.987 13.9 0.0000343 5 0.914 0.998 Pearson's pro…\n#> 3 3 0.988 14.4 0.0000290 5 0.919 0.998 Pearson's pro…\n#> 4 4 0.984 12.5 0.0000573 5 0.895 0.998 Pearson's pro…\n#> 5 5 0.988 14.1 0.0000318 5 0.916 0.998 Pearson's pro…\n#> # ℹ 1 more variable: alternative <chr>" + }, + { + "objectID": "learn/statistics/tidy-analysis/index.html#regression-models", + "href": "learn/statistics/tidy-analysis/index.html#regression-models", + "title": "Correlation and regression fundamentals with tidy data principles", + "section": "Regression models", + "text": "Regression models\nThis type of workflow becomes even more useful when applied to regressions. Untidy output for a regression looks like:\n\nlm_fit <- lm(age ~ circumference, data = Orange)\nsummary(lm_fit)\n#> \n#> Call:\n#> lm(formula = age ~ circumference, data = Orange)\n#> \n#> Residuals:\n#> Min 1Q Median 3Q Max \n#> -317.88 -140.90 -17.20 96.54 471.16 \n#> \n#> Coefficients:\n#> Estimate Std. Error t value Pr(>|t|) \n#> (Intercept) 16.6036 78.1406 0.212 0.833 \n#> circumference 7.8160 0.6059 12.900 1.93e-14 ***\n#> ---\n#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n#> \n#> Residual standard error: 203.1 on 33 degrees of freedom\n#> Multiple R-squared: 0.8345, Adjusted R-squared: 0.8295 \n#> F-statistic: 166.4 on 1 and 33 DF, p-value: 1.931e-14\n\nWhen we tidy these results, we get multiple rows of output for each model:\n\ntidy(lm_fit)\n#> # A tibble: 2 × 5\n#> term estimate std.error statistic p.value\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) 16.6 78.1 0.212 8.33e- 1\n#> 2 circumference 7.82 0.606 12.9 1.93e-14\n\nNow we can handle multiple regressions at once using exactly the same workflow as before:\n\nOrange %>%\n nest(data = c(-Tree)) %>% \n mutate(\n fit = map(data, ~ lm(age ~ circumference, data = .x)),\n tidied = map(fit, tidy)\n ) %>% \n unnest(tidied) %>% \n select(-data, -fit)\n#> # A tibble: 10 × 6\n#> Tree term estimate std.error statistic p.value\n#> <ord> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 1 (Intercept) -265. 98.6 -2.68 0.0436 \n#> 2 1 circumference 11.9 0.919 13.0 0.0000485\n#> 3 2 (Intercept) -132. 83.1 -1.59 0.172 \n#> 4 2 circumference 7.80 0.560 13.9 0.0000343\n#> 5 3 (Intercept) -210. 85.3 -2.46 0.0574 \n#> 6 3 circumference 12.0 0.835 14.4 0.0000290\n#> 7 4 (Intercept) -76.5 88.3 -0.867 0.426 \n#> 8 4 circumference 7.17 0.572 12.5 0.0000573\n#> 9 5 (Intercept) -54.5 76.9 -0.709 0.510 \n#> 10 5 circumference 8.79 0.621 14.1 0.0000318\n\nYou can just as easily use multiple predictors in the regressions, as shown here on the mtcars dataset. We nest the data into automatic vs. manual cars (the am column), then perform the regression within each nested tibble.\n\ndata(mtcars)\nmtcars <- as_tibble(mtcars) # to play nicely with list-cols\nmtcars\n#> # A tibble: 32 × 11\n#> mpg cyl disp hp drat wt qsec vs am gear carb\n#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>\n#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4\n#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4\n#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1\n#> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1\n#> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2\n#> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1\n#> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4\n#> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2\n#> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2\n#> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4\n#> # ℹ 22 more rows\n\nmtcars %>%\n nest(data = c(-am)) %>% \n mutate(\n fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), # S3 list-col\n tidied = map(fit, tidy)\n ) %>% \n unnest(tidied) %>% \n select(-data, -fit)\n#> # A tibble: 8 × 6\n#> am term estimate std.error statistic p.value\n#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 1 (Intercept) 4.28 3.46 1.24 0.247 \n#> 2 1 mpg -0.101 0.0294 -3.43 0.00750 \n#> 3 1 qsec 0.0398 0.151 0.264 0.798 \n#> 4 1 gear -0.0229 0.349 -0.0656 0.949 \n#> 5 0 (Intercept) 4.92 1.40 3.52 0.00309 \n#> 6 0 mpg -0.192 0.0443 -4.33 0.000591\n#> 7 0 qsec 0.0919 0.0983 0.935 0.365 \n#> 8 0 gear 0.147 0.368 0.398 0.696\n\nWhat if you want not just the tidy() output, but the augment() and glance() outputs as well, while still performing each regression only once? Since we’re using list-columns, we can just fit the model once and use multiple list-columns to store the tidied, glanced and augmented outputs.\n\nregressions <- \n mtcars %>%\n nest(data = c(-am)) %>% \n mutate(\n fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)),\n tidied = map(fit, tidy),\n glanced = map(fit, glance),\n augmented = map(fit, augment)\n )\n\nregressions %>% \n select(tidied) %>% \n unnest(tidied)\n#> # A tibble: 8 × 5\n#> term estimate std.error statistic p.value\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) 4.28 3.46 1.24 0.247 \n#> 2 mpg -0.101 0.0294 -3.43 0.00750 \n#> 3 qsec 0.0398 0.151 0.264 0.798 \n#> 4 gear -0.0229 0.349 -0.0656 0.949 \n#> 5 (Intercept) 4.92 1.40 3.52 0.00309 \n#> 6 mpg -0.192 0.0443 -4.33 0.000591\n#> 7 qsec 0.0919 0.0983 0.935 0.365 \n#> 8 gear 0.147 0.368 0.398 0.696\n\nregressions %>% \n select(glanced) %>% \n unnest(glanced)\n#> # A tibble: 2 × 12\n#> r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC\n#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>\n#> 1 0.833 0.778 0.291 15.0 0.000759 3 -0.00580 10.0 12.8\n#> 2 0.625 0.550 0.522 8.32 0.00170 3 -12.4 34.7 39.4\n#> # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>\n\nregressions %>% \n select(augmented) %>% \n unnest(augmented)\n#> # A tibble: 32 × 10\n#> wt mpg qsec gear .fitted .resid .hat .sigma .cooksd .std.resid\n#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>\n#> 1 2.62 21 16.5 4 2.73 -0.107 0.517 0.304 0.0744 -0.527 \n#> 2 2.88 21 17.0 4 2.75 0.126 0.273 0.304 0.0243 0.509 \n#> 3 2.32 22.8 18.6 4 2.63 -0.310 0.312 0.279 0.188 -1.29 \n#> 4 2.2 32.4 19.5 4 1.70 0.505 0.223 0.233 0.278 1.97 \n#> 5 1.62 30.4 18.5 4 1.86 -0.244 0.269 0.292 0.0889 -0.982 \n#> 6 1.84 33.9 19.9 4 1.56 0.274 0.286 0.286 0.125 1.12 \n#> 7 1.94 27.3 18.9 4 2.19 -0.253 0.151 0.293 0.0394 -0.942 \n#> 8 2.14 26 16.7 5 2.21 -0.0683 0.277 0.307 0.00732 -0.276 \n#> 9 1.51 30.4 16.9 5 1.77 -0.259 0.430 0.284 0.263 -1.18 \n#> 10 3.17 15.8 14.5 5 3.15 0.0193 0.292 0.308 0.000644 0.0789\n#> # ℹ 22 more rows\n\nBy combining the estimates and p-values across all groups into the same tidy data frame (instead of a list of output model objects), a new class of analyses and visualizations becomes straightforward. This includes:\n\nsorting by p-value or estimate to find the most significant terms across all tests,\np-value histograms, and\nvolcano plots comparing p-values to effect size estimates.\n\nIn each of these cases, we can easily filter, facet, or distinguish based on the term column. In short, this makes the tools of tidy data analysis available for the results of data analysis and models, not just the inputs." + }, + { + "objectID": "learn/statistics/tidy-analysis/index.html#session-info", + "href": "learn/statistics/tidy-analysis/index.html#session-info", + "title": "Correlation and regression fundamentals with tidy data principles", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/statistics/xtabs/index.html", + "href": "learn/statistics/xtabs/index.html", + "title": "Statistical analysis of contingency tables", + "section": "", + "text": "This article only requires that you have the tidymodels package installed.\nIn this vignette, we’ll walk through conducting a \\(\\chi^2\\) (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We’ll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we’ll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution.\nThroughout this vignette, we’ll make use of the ad_data data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from Craig-Schapiro et al (2011). See ?ad_data for more information on the variables included and their source. One of the main research questions in these data were how a person’s genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows:\n\nlibrary(tidymodels) # Includes the infer package\n\ndata(ad_data, package = \"modeldata\")\nad_data %>%\n select(Genotype, Class)\n#> # A tibble: 333 × 2\n#> Genotype Class \n#> <fct> <fct> \n#> 1 E3E3 Control \n#> 2 E3E4 Control \n#> 3 E3E4 Control \n#> 4 E3E4 Control \n#> 5 E3E3 Control \n#> 6 E4E4 Impaired\n#> 7 E2E3 Control \n#> 8 E2E3 Control \n#> 9 E3E3 Control \n#> 10 E2E3 Impaired\n#> # ℹ 323 more rows\n\nThe three main genetic variants are called E2, E3, and E4. The values in Genotype represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of “E2E4” means E2 from one parent and E4 from the other)." + }, + { + "objectID": "learn/statistics/xtabs/index.html#introduction", + "href": "learn/statistics/xtabs/index.html#introduction", + "title": "Statistical analysis of contingency tables", + "section": "", + "text": "This article only requires that you have the tidymodels package installed.\nIn this vignette, we’ll walk through conducting a \\(\\chi^2\\) (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We’ll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we’ll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution.\nThroughout this vignette, we’ll make use of the ad_data data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from Craig-Schapiro et al (2011). See ?ad_data for more information on the variables included and their source. One of the main research questions in these data were how a person’s genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows:\n\nlibrary(tidymodels) # Includes the infer package\n\ndata(ad_data, package = \"modeldata\")\nad_data %>%\n select(Genotype, Class)\n#> # A tibble: 333 × 2\n#> Genotype Class \n#> <fct> <fct> \n#> 1 E3E3 Control \n#> 2 E3E4 Control \n#> 3 E3E4 Control \n#> 4 E3E4 Control \n#> 5 E3E3 Control \n#> 6 E4E4 Impaired\n#> 7 E2E3 Control \n#> 8 E2E3 Control \n#> 9 E3E3 Control \n#> 10 E2E3 Impaired\n#> # ℹ 323 more rows\n\nThe three main genetic variants are called E2, E3, and E4. The values in Genotype represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of “E2E4” means E2 from one parent and E4 from the other)." + }, + { + "objectID": "learn/statistics/xtabs/index.html#test-of-independence", + "href": "learn/statistics/xtabs/index.html#test-of-independence", + "title": "Statistical analysis of contingency tables", + "section": "Test of independence", + "text": "Test of independence\nTo carry out a chi-squared test of independence, we’ll examine the association between their cognitive ability (impaired and healthy) and the genetic makeup. This is what the relationship looks like in the sample data:\n\n\n\n\n\n\n\n\n\nIf there were no relationship, we would expect to see the purple bars reaching to the same length, regardless of cognitive ability. Are the differences we see here, though, just due to random noise?\nFirst, to calculate the observed statistic, we can use specify() and calculate().\n\n# calculate the observed statistic\nobserved_indep_statistic <- ad_data %>%\n specify(Genotype ~ Class) %>%\n calculate(stat = \"Chisq\")\n\nThe observed \\(\\chi^2\\) statistic is 21.5774809. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between cognitive ability and genetics.\nWe can generate() the null distribution in one of two ways: using randomization or theory-based methods. The randomization approach permutes the response and explanatory variables, so that each person’s genetics is matched up with a random cognitive rating from the sample in order to break up any association between the two.\n\n# generate the null distribution using randomization\nnull_distribution_simulated <- ad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n generate(reps = 5000, type = \"permute\") %>%\n calculate(stat = \"Chisq\")\n\nNote that, in the line specify(Genotype ~ Class) above, we could use the equivalent syntax specify(response = Genotype, explanatory = Class). The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization.\n\n# generate the null distribution by theoretical approximation\nnull_distribution_theoretical <- ad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n # note that we skip the generation step here!\n calculate(stat = \"Chisq\")\n\nTo get a sense for what these distributions look like, and where our observed statistic falls, we can use visualize():\n\n# visualize the null distribution and test statistic!\nnull_distribution_simulated %>%\n visualize() + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n\n\n\n\n\n\n\n\nWe could also visualize the observed statistic against the theoretical null distribution. Note that we skip the generate() and calculate() steps when using the theoretical approach, and that we now need to provide method = \"theoretical\" to visualize().\n\n# visualize the theoretical null distribution and test statistic!\nad_data %>%\n specify(Genotype ~ Class) %>%\n hypothesize(null = \"independence\") %>%\n visualize(method = \"theoretical\") + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n\n\n\n\n\n\n\n\nTo visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into visualize(), and further provide method = \"both\".\n\n# visualize both null distributions and the test statistic!\nnull_distribution_simulated %>%\n visualize(method = \"both\") + \n shade_p_value(observed_indep_statistic,\n direction = \"greater\")\n\n\n\n\n\n\n\n\nEither way, it looks like our observed test statistic would be fairly unlikely if there were actually no association between cognition and genotype. More exactly, we can calculate the p-value:\n\n# calculate the p value from the observed statistic and null distribution\np_value_independence <- null_distribution_simulated %>%\n get_p_value(obs_stat = observed_indep_statistic,\n direction = \"greater\")\n\np_value_independence\n#> # A tibble: 1 × 1\n#> p_value\n#> <dbl>\n#> 1 0.0008\n\nThus, if there were really no relationship between cognition and genotype, the probability that we would see a statistic as or more extreme than 21.5774809 is approximately 8^{-4}.\nNote that, equivalently to the steps shown above, the package supplies a wrapper function, chisq_test, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this:\n\nchisq_test(ad_data, Genotype ~ Class)\n#> # A tibble: 1 × 3\n#> statistic chisq_df p_value\n#> <dbl> <int> <dbl>\n#> 1 21.6 5 0.000630" + }, + { + "objectID": "learn/statistics/xtabs/index.html#goodness-of-fit", + "href": "learn/statistics/xtabs/index.html#goodness-of-fit", + "title": "Statistical analysis of contingency tables", + "section": "Goodness of fit", + "text": "Goodness of fit\nNow, moving on to a chi-squared goodness of fit test, we’ll take a look at just the genotype data. Many papers have investigated the relationship of Apolipoprotein E to diseases. For example, Song et al (2004) conducted a meta-analysis of numerous studies that looked at this gene and heart disease. In their paper, they describe the frequency of the different genotypes across many samples. For the cognition study, it might be interesting to see if our sample of genotypes was consistent with this literature (treating the rates, for this analysis, as known).\nThe rates of the meta-analysis and our observed data are:\n\n# Song, Y., Stampfer, M. J., & Liu, S. (2004). Meta-Analysis: Apolipoprotein E \n# Genotypes and Risk for Coronary Heart Disease. Annals of Internal Medicine, \n# 141(2), 137.\nmeta_rates <- c(\"E2E2\" = 0.71, \"E2E3\" = 11.4, \"E2E4\" = 2.32,\n \"E3E3\" = 61.0, \"E3E4\" = 22.6, \"E4E4\" = 2.22)\nmeta_rates <- meta_rates/sum(meta_rates) # these add up to slightly > 100%\n\nobs_rates <- table(ad_data$Genotype)/nrow(ad_data)\nround(cbind(obs_rates, meta_rates) * 100, 2)\n#> obs_rates meta_rates\n#> E2E2 0.60 0.71\n#> E2E3 11.11 11.37\n#> E2E4 2.40 2.31\n#> E3E3 50.15 60.85\n#> E3E4 31.83 22.54\n#> E4E4 3.90 2.21\n\nSuppose our null hypothesis is that Genotype follows the same frequency distribution as the meta-analysis. Lets now test whether this difference in distributions is statistically significant.\nFirst, to carry out this hypothesis test, we would calculate our observed statistic.\n\n# calculating the null distribution\nobserved_gof_statistic <- ad_data %>%\n specify(response = Genotype) %>%\n hypothesize(null = \"point\", p = meta_rates) %>%\n calculate(stat = \"Chisq\")\n\nThe observed statistic is 23.3838483. Now, generating a null distribution, by just dropping in a call to generate():\n\n# generating a null distribution\nnull_distribution_gof <- ad_data %>%\n specify(response = Genotype) %>%\n hypothesize(null = \"point\", p = meta_rates) %>%\n generate(reps = 5000, type = \"simulate\") %>%\n calculate(stat = \"Chisq\")\n\nAgain, to get a sense for what these distributions look like, and where our observed statistic falls, we can use visualize():\n\n# visualize the null distribution and test statistic!\nnull_distribution_gof %>%\n visualize() + \n shade_p_value(observed_gof_statistic,\n direction = \"greater\")\n\n\n\n\n\n\n\n\nThis statistic seems like it would be unlikely if our rates were the same as the rates from the meta-analysis! How unlikely, though? Calculating the p-value:\n\n# calculate the p-value\np_value_gof <- null_distribution_gof %>%\n get_p_value(observed_gof_statistic,\n direction = \"greater\")\n\np_value_gof\n#> # A tibble: 1 × 1\n#> p_value\n#> <dbl>\n#> 1 0.0008\n\nThus, if each genotype occurred at the same rate as the Song paper, the probability that we would see a distribution like the one we did is approximately 8^{-4}.\nAgain, equivalently to the steps shown above, the package supplies a wrapper function, chisq_test, to carry out chi-squared goodness of fit tests on tidy data. The syntax goes like this:\n\nchisq_test(ad_data, response = Genotype, p = meta_rates)\n#> # A tibble: 1 × 3\n#> statistic chisq_df p_value\n#> <dbl> <dbl> <dbl>\n#> 1 23.4 5 0.000285" + }, + { + "objectID": "learn/statistics/xtabs/index.html#session-info", + "href": "learn/statistics/xtabs/index.html#session-info", + "title": "Statistical analysis of contingency tables", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/work/bayes-opt/index.html", + "href": "learn/work/bayes-opt/index.html", + "title": "Iterative Bayesian optimization of a classification model", + "section": "", + "text": "To use code in this article, you will need to install the following packages: kernlab, modeldata, themis, and tidymodels.\nMany of the examples for model tuning focus on grid search. For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, iterative search can be used to analyze the existing tuning parameter results and then predict which tuning parameters to try next.\nThere are a variety of methods for iterative search and the focus in this article is on Bayesian optimization. For more information on this method, these resources might be helpful:\n\nPractical bayesian optimization of machine learning algorithms (2012). J Snoek, H Larochelle, and RP Adams. Advances in neural information.\nA Tutorial on Bayesian Optimization for Machine Learning (2018). R Adams.\nGaussian Processes for Machine Learning (2006). C E Rasmussen and C Williams.\nOther articles!" + }, + { + "objectID": "learn/work/bayes-opt/index.html#introduction", + "href": "learn/work/bayes-opt/index.html#introduction", + "title": "Iterative Bayesian optimization of a classification model", + "section": "", + "text": "To use code in this article, you will need to install the following packages: kernlab, modeldata, themis, and tidymodels.\nMany of the examples for model tuning focus on grid search. For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, iterative search can be used to analyze the existing tuning parameter results and then predict which tuning parameters to try next.\nThere are a variety of methods for iterative search and the focus in this article is on Bayesian optimization. For more information on this method, these resources might be helpful:\n\nPractical bayesian optimization of machine learning algorithms (2012). J Snoek, H Larochelle, and RP Adams. Advances in neural information.\nA Tutorial on Bayesian Optimization for Machine Learning (2018). R Adams.\nGaussian Processes for Machine Learning (2006). C E Rasmussen and C Williams.\nOther articles!" + }, + { + "objectID": "learn/work/bayes-opt/index.html#cell-segmenting-revisited", + "href": "learn/work/bayes-opt/index.html#cell-segmenting-revisited", + "title": "Iterative Bayesian optimization of a classification model", + "section": "Cell segmenting revisited", + "text": "Cell segmenting revisited\nTo demonstrate this approach to tuning models, let’s return to the cell segmentation data from the Getting Started article on resampling:\n\nlibrary(tidymodels)\nlibrary(modeldata)\n\n# Load data\ndata(cells)\n\nset.seed(2369)\ntr_te_split <- initial_split(cells %>% select(-case), prop = 3/4)\ncell_train <- training(tr_te_split)\ncell_test <- testing(tr_te_split)\n\nset.seed(1697)\nfolds <- vfold_cv(cell_train, v = 10)" + }, + { + "objectID": "learn/work/bayes-opt/index.html#the-tuning-scheme", + "href": "learn/work/bayes-opt/index.html#the-tuning-scheme", + "title": "Iterative Bayesian optimization of a classification model", + "section": "The tuning scheme", + "text": "The tuning scheme\nSince the predictors are highly correlated, we can used a recipe to convert the original predictors to principal component scores. There is also slight class imbalance in these data; about 64% of the data are poorly segmented. To mitigate this, the data will be down-sampled at the end of the pre-processing so that the number of poorly and well segmented cells occur with equal frequency. We can use a recipe for all this pre-processing, but the number of principal components will need to be tuned so that we have enough (but not too many) representations of the data.\n\nlibrary(themis)\n\ncell_pre_proc <-\n recipe(class ~ ., data = cell_train) %>%\n step_YeoJohnson(all_predictors()) %>%\n step_normalize(all_predictors()) %>%\n step_pca(all_predictors(), num_comp = tune()) %>%\n step_downsample(class)\n\nIn this analysis, we will use a support vector machine to model the data. Let’s use a radial basis function (RBF) kernel and tune its main parameter (\\(\\sigma\\)). Additionally, the main SVM parameter, the cost value, also needs optimization.\n\nsvm_mod <-\n svm_rbf(mode = \"classification\", cost = tune(), rbf_sigma = tune()) %>%\n set_engine(\"kernlab\")\n\nThese two objects (the recipe and model) will be combined into a single object via the workflow() function from the workflows package; this object will be used in the optimization process.\n\nsvm_wflow <-\n workflow() %>%\n add_model(svm_mod) %>%\n add_recipe(cell_pre_proc)\n\nFrom this object, we can derive information about what parameters are slated to be tuned. A parameter set is derived by:\n\nsvm_set <- extract_parameter_set_dials(svm_wflow)\nsvm_set\n#> Collection of 3 parameters for tuning\n#> \n#> identifier type object\n#> cost cost nparam[+]\n#> rbf_sigma rbf_sigma nparam[+]\n#> num_comp num_comp nparam[+]\n\nThe default range for the number of PCA components is rather small for this data set. A member of the parameter set can be modified using the update() function. Let’s constrain the search to one to twenty components by updating the num_comp parameter. Additionally, the lower bound of this parameter is set to zero which specifies that the original predictor set should also be evaluated (i.e., with no PCA step at all):\n\nsvm_set <- \n svm_set %>% \n update(num_comp = num_comp(c(0L, 20L)))" + }, + { + "objectID": "learn/work/bayes-opt/index.html#sequential-tuning", + "href": "learn/work/bayes-opt/index.html#sequential-tuning", + "title": "Iterative Bayesian optimization of a classification model", + "section": "Sequential tuning", + "text": "Sequential tuning\nBayesian optimization is a sequential method that uses a model to predict new candidate parameters for assessment. When scoring potential parameter value, the mean and variance of performance are predicted. The strategy used to define how these two statistical quantities are used is defined by an acquisition function.\nFor example, one approach for scoring new candidates is to use a confidence bound. Suppose accuracy is being optimized. For a metric that we want to maximize, a lower confidence bound can be used. The multiplier on the standard error (denoted as \\(\\kappa\\)) is a value that can be used to make trade-offs between exploration and exploitation.\n\nExploration means that the search will consider candidates in untested space.\nExploitation focuses in areas where the previous best results occurred.\n\nThe variance predicted by the Bayesian model is mostly spatial variation; the value will be large for candidate values that are not close to values that have already been evaluated. If the standard error multiplier is high, the search process will be more likely to avoid areas without candidate values in the vicinity.\nWe’ll use another acquisition function, expected improvement, that determines which candidates are likely to be helpful relative to the current best results. This is the default acquisition function. More information on these functions can be found in the package vignette for acquisition functions.\n\nset.seed(12)\nsearch_res <-\n svm_wflow %>% \n tune_bayes(\n resamples = folds,\n # To use non-default parameter ranges\n param_info = svm_set,\n # Generate five at semi-random to start\n initial = 5,\n iter = 50,\n # How to measure performance?\n metrics = metric_set(roc_auc),\n control = control_bayes(no_improve = 30, verbose = TRUE)\n )\n#> \n#> ❯ Generating a set of 5 initial parameter results\n#> ✓ Initialization complete\n#> \n#> \n#> ── Iteration 1 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8794 (@iter 0)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=29.2, rbf_sigma=0.707, num_comp=17\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.7883 (+/-0.0111)\n#> \n#> ── Iteration 2 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8794 (@iter 0)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=30.4, rbf_sigma=0.0087, num_comp=13\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results: roc_auc=0.8954 (+/-0.0101)\n#> \n#> ── Iteration 3 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0374, rbf_sigma=0.00425, num_comp=11\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8749 (+/-0.0123)\n#> \n#> ── Iteration 4 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=28.8, rbf_sigma=0.00386, num_comp=4\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8738 (+/-0.012)\n#> \n#> ── Iteration 5 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=21.5, rbf_sigma=0.0738, num_comp=11\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8523 (+/-0.0115)\n#> \n#> ── Iteration 6 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=24.1, rbf_sigma=0.0111, num_comp=18\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8829 (+/-0.0101)\n#> \n#> ── Iteration 7 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=1.48, rbf_sigma=0.00629, num_comp=13\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8801 (+/-0.0118)\n#> \n#> ── Iteration 8 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8954 (@iter 2)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=25.3, rbf_sigma=0.011, num_comp=11\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results: roc_auc=0.8985 (+/-0.0102)\n#> \n#> ── Iteration 9 ───────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8985 (@iter 8)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=14.8, rbf_sigma=0.628, num_comp=0\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.7852 (+/-0.0173)\n#> \n#> ── Iteration 10 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8985 (@iter 8)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=30.1, rbf_sigma=0.0102, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results: roc_auc=0.899 (+/-0.00955)\n#> \n#> ── Iteration 11 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.899 (@iter 10)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=25.1, rbf_sigma=0.0111, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results: roc_auc=0.8994 (+/-0.00996)\n#> \n#> ── Iteration 12 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=28.3, rbf_sigma=0.0118, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8989 (+/-0.00928)\n#> \n#> ── Iteration 13 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=28.5, rbf_sigma=0.0026, num_comp=19\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8879 (+/-0.00951)\n#> \n#> ── Iteration 14 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=30.4, rbf_sigma=0.00245, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.886 (+/-0.0111)\n#> \n#> ── Iteration 15 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=31.5, rbf_sigma=0.0179, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8964 (+/-0.00967)\n#> \n#> ── Iteration 16 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=31.1, rbf_sigma=0.00933, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8994 (+/-0.00968)\n#> \n#> ── Iteration 17 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8994 (@iter 11)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=27.3, rbf_sigma=0.00829, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ♥ Newest results: roc_auc=0.8995 (+/-0.00996)\n#> \n#> ── Iteration 18 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=31.7, rbf_sigma=0.00363, num_comp=12\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8909 (+/-0.00973)\n#> \n#> ── Iteration 19 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=29.6, rbf_sigma=0.0119, num_comp=8\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8964 (+/-0.0104)\n#> \n#> ── Iteration 20 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=23.6, rbf_sigma=0.0121, num_comp=0\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8636 (+/-0.0122)\n#> \n#> ── Iteration 21 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=27.6, rbf_sigma=0.00824, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8993 (+/-0.00961)\n#> \n#> ── Iteration 22 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=27.6, rbf_sigma=0.00901, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8993 (+/-0.01)\n#> \n#> ── Iteration 23 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=24.1, rbf_sigma=0.0133, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8988 (+/-0.00935)\n#> \n#> ── Iteration 24 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=18.8, rbf_sigma=0.00058, num_comp=20\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8784 (+/-0.0112)\n#> \n#> ── Iteration 25 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=29.3, rbf_sigma=0.00958, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8994 (+/-0.00959)\n#> \n#> ── Iteration 26 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=30.6, rbf_sigma=0.00841, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8993 (+/-0.00949)\n#> \n#> ── Iteration 27 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00169, rbf_sigma=0.0201, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8794 (+/-0.0119)\n#> \n#> ── Iteration 28 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0012, rbf_sigma=0.000867, num_comp=20\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.3452 (+/-0.116)\n#> \n#> ── Iteration 29 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00128, rbf_sigma=0.0138, num_comp=4\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8671 (+/-0.0132)\n#> \n#> ── Iteration 30 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0319, rbf_sigma=0.0279, num_comp=9\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8805 (+/-0.0121)\n#> \n#> ── Iteration 31 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0011, rbf_sigma=0.00787, num_comp=8\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8762 (+/-0.0121)\n#> \n#> ── Iteration 32 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=7.06, rbf_sigma=0.00645, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.89 (+/-0.0102)\n#> \n#> ── Iteration 33 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.000998, rbf_sigma=0.305, num_comp=7\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8757 (+/-0.0126)\n#> \n#> ── Iteration 34 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00615, rbf_sigma=0.0134, num_comp=8\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.877 (+/-0.0122)\n#> \n#> ── Iteration 35 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.208, rbf_sigma=0.00946, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8793 (+/-0.0122)\n#> \n#> ── Iteration 36 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=31.6, rbf_sigma=0.00481, num_comp=15\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8961 (+/-0.00885)\n#> \n#> ── Iteration 37 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00108, rbf_sigma=0.653, num_comp=11\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.3649 (+/-0.106)\n#> \n#> ── Iteration 38 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00156, rbf_sigma=0.13, num_comp=5\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8816 (+/-0.0121)\n#> \n#> ── Iteration 39 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=7.03, rbf_sigma=0.235, num_comp=16\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8572 (+/-0.0117)\n#> \n#> ── Iteration 40 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00466, rbf_sigma=0.211, num_comp=1\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.7714 (+/-0.0105)\n#> \n#> ── Iteration 41 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.0357, rbf_sigma=0.00126, num_comp=1\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.7725 (+/-0.0106)\n#> \n#> ── Iteration 42 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=23.1, rbf_sigma=0.0332, num_comp=16\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8612 (+/-0.0137)\n#> \n#> ── Iteration 43 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=3.56, rbf_sigma=0.0294, num_comp=3\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8724 (+/-0.0126)\n#> \n#> ── Iteration 44 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00168, rbf_sigma=0.0337, num_comp=7\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8791 (+/-0.0123)\n#> \n#> ── Iteration 45 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00128, rbf_sigma=0.00258, num_comp=10\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.3444 (+/-0.114)\n#> \n#> ── Iteration 46 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.506, rbf_sigma=0.000548, num_comp=4\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.8653 (+/-0.0131)\n#> \n#> ── Iteration 47 ──────────────────────────────────────────────────────\n#> \n#> i Current best: roc_auc=0.8995 (@iter 17)\n#> i Gaussian process model\n#> ✓ Gaussian process model\n#> i Generating 5000 candidates\n#> i Predicted candidates\n#> i cost=0.00142, rbf_sigma=0.204, num_comp=18\n#> i Estimating performance\n#> ✓ Estimating performance\n#> ⓧ Newest results: roc_auc=0.3586 (+/-0.113)\n#> ! No improvement for 30 iterations; returning current results.\n\nThe resulting tibble is a stacked set of rows of the rsample object with an additional column for the iteration number:\n\nsearch_res\n#> # Tuning results\n#> # 10-fold cross-validation \n#> # A tibble: 480 × 5\n#> splits id .metrics .notes .iter\n#> <list> <chr> <list> <list> <int>\n#> 1 <split [1362/152]> Fold01 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 2 <split [1362/152]> Fold02 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 3 <split [1362/152]> Fold03 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 4 <split [1362/152]> Fold04 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 5 <split [1363/151]> Fold05 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 6 <split [1363/151]> Fold06 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 7 <split [1363/151]> Fold07 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 8 <split [1363/151]> Fold08 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 9 <split [1363/151]> Fold09 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 10 <split [1363/151]> Fold10 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> # ℹ 470 more rows\n\nAs with grid search, we can summarize the results over resamples:\n\nestimates <- \n collect_metrics(search_res) %>% \n arrange(.iter)\n\nestimates\n#> # A tibble: 52 × 10\n#> cost rbf_sigma num_comp .metric .estimator mean n std_err .config\n#> <dbl> <dbl> <int> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 0.00383 2.72e-6 17 roc_auc binary 0.348 10 0.114 Prepro…\n#> 2 0.250 1.55e-2 7 roc_auc binary 0.879 10 0.0122 Prepro…\n#> 3 0.0372 1.02e-9 3 roc_auc binary 0.242 10 0.0574 Prepro…\n#> 4 1.28 8.13e-8 8 roc_auc binary 0.344 10 0.114 Prepro…\n#> 5 10.3 1.37e-3 14 roc_auc binary 0.877 10 0.0117 Prepro…\n#> 6 29.2 7.07e-1 17 roc_auc binary 0.788 10 0.0111 Iter1 \n#> 7 30.4 8.70e-3 13 roc_auc binary 0.895 10 0.0101 Iter2 \n#> 8 0.0374 4.25e-3 11 roc_auc binary 0.875 10 0.0123 Iter3 \n#> 9 28.8 3.86e-3 4 roc_auc binary 0.874 10 0.0120 Iter4 \n#> 10 21.5 7.38e-2 11 roc_auc binary 0.852 10 0.0115 Iter5 \n#> # ℹ 42 more rows\n#> # ℹ 1 more variable: .iter <int>\n\nThe best performance of the initial set of candidate values was AUC = 0.8793995. The best results were achieved at iteration 17 with a corresponding AUC value of 0.8995344. The five best results are:\n\nshow_best(search_res, metric = \"roc_auc\")\n#> # A tibble: 5 × 10\n#> cost rbf_sigma num_comp .metric .estimator mean n std_err .config .iter\n#> <dbl> <dbl> <int> <chr> <chr> <dbl> <int> <dbl> <chr> <int>\n#> 1 27.3 0.00829 9 roc_auc binary 0.900 10 0.00996 Iter17 17\n#> 2 29.3 0.00958 10 roc_auc binary 0.899 10 0.00959 Iter25 25\n#> 3 25.1 0.0111 9 roc_auc binary 0.899 10 0.00996 Iter11 11\n#> 4 31.1 0.00933 10 roc_auc binary 0.899 10 0.00968 Iter16 16\n#> 5 27.6 0.00901 9 roc_auc binary 0.899 10 0.0100 Iter22 22\n\nA plot of the search iterations can be created via:\n\nautoplot(search_res, type = \"performance\")\n\n\n\n\n\n\n\n\nThere are many parameter combinations have roughly equivalent results.\nHow did the parameters change over iterations?\n\nautoplot(search_res, type = \"parameters\") + \n labs(x = \"Iterations\", y = NULL)" + }, + { + "objectID": "learn/work/bayes-opt/index.html#session-info", + "href": "learn/work/bayes-opt/index.html#session-info", + "title": "Iterative Bayesian optimization of a classification model", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> kernlab * 0.9-32 2023-01-31 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang * 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> themis * 1.0.1 2023-04-14 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/work/case-weights/index.html", + "href": "learn/work/case-weights/index.html", + "title": "Creating case weights based on time", + "section": "", + "text": "To use code in this article, you will need to install the following packages: tidymodels.\nThis article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others." + }, + { + "objectID": "learn/work/case-weights/index.html#introduction", + "href": "learn/work/case-weights/index.html#introduction", + "title": "Creating case weights based on time", + "section": "", + "text": "To use code in this article, you will need to install the following packages: tidymodels.\nThis article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others." + }, + { + "objectID": "learn/work/case-weights/index.html#example-data", + "href": "learn/work/case-weights/index.html#example-data", + "title": "Creating case weights based on time", + "section": "Example Data", + "text": "Example Data\nTo demonstrate we will use the Chicago data from the modeldata package.\n\nlibrary(tidymodels)\ndata(Chicago)\n\nChicago <- Chicago %>%\n select(ridership, date, one_of(stations))\n\nFrom ?Chicago\n\nThese data are from Kuhn and Johnson (2020) and contain an abbreviated training set for modeling the number of people (in thousands) who enter the Clark and Lake L station.\n\n\nThe date column corresponds to the current date. The columns with station names (Austin through California) are a sample of the columns used in the original analysis (for filesize reasons). These are 14 day lag variables (i.e. date - 14 days). There are columns related to weather and sports team schedules.\n\nFor simplicity, we have limited our view to the date and station variables." + }, + { + "objectID": "learn/work/case-weights/index.html#creating-weights", + "href": "learn/work/case-weights/index.html#creating-weights", + "title": "Creating case weights based on time", + "section": "Creating weights", + "text": "Creating weights\nThis data set contains daily information from 2001-01-22 to 2016-08-28. We will pretend that it is January 1st, 2016 and we want to predict the ridership for the remainder of 2016 using the date and station variables as predictors. Without any weighting, all the previous observations would have the same influence on the model. This may not be ideal since some observations appear a long time ago and not be as representative of the future as more recent observations.\nWe could just use recent observations to fit the model, ensuring that the training data stays as close to the testing data as possible. While a tempting idea, it would throw out a lot of informative data. Instead let us assign a weight to each observation, related to how long ago the observation was taken. This way we are not completely throwing away any observation; we are only giving less weight to data farther in the past.\nWe need to decide on a way to calculate the case weights. The main thing constraint is that the weight cannot be negative, and it would be nice if today was weighted as 1. So we need a function that is 1 when x = 0 and decreasing otherwise. There are many kinds of functions like that, and we will be using this exponential decay function\n\\[ weight = base ^ x \\]\nwhere base is some constant and x is the number of days. To make sure that we select a reasonable base, we need to do some manual testing, starting with looking at how old the oldest observation is.\n\ndifftime(\"2016-01-01\", min(Chicago$date))\n#> Time difference of 5457.333 days\n\nUsing this information we can visualize the weight curve, to see if we like the value of base.\n\ntibble_days <- tibble(days = 0:5457)\n\ntibble_days %>%\n ggplot(aes(days)) +\n geom_function(fun = ~ 0.99 ^ .x)\n\n\n\n\n\n\n\n\nsetting base to 0.99 appears to be down weighted too much. Any observation more than a year old would have no influence.\nLet us try a few more values to find\n\nmap_dfr(\n c(0.99, 0.999, 0.9999),\n ~ tibble_days %>% mutate(base = factor(.x), value = .x ^ days)\n) %>%\n ggplot(aes(days, value, group = base, color = base)) +\n geom_line()\n\n\n\n\n\n\n\n\nFrom this, we could pick something around 0.999 since it gives a better balance. Let’s create a small function to help us encode this weight.\n\nweights_from_dates <- function(x, ref) {\n if_else(\n condition = x >= ref,\n true = 1, # <- Notice that I'm setting any future weight to 1.\n false = 0.999 ^ as.numeric(difftime(ref, x, units = \"days\"))\n )\n}\n\nWe then modify Chicago to add a weight column, explicitly making it an importance weight with importance_weight().\n\nChicago <- Chicago %>%\n mutate(weight = weights_from_dates(date, \"2016-01-01\"),\n weight = importance_weights(weight))\n\nThis approach to creating importance weights from dates is not limited to cases where we have daily observations. You are free to create similar weights if you have gaps or repeated observations within the same day. Likewise, you don’t need to use days as the unit. Seconds, weeks, or years could be used as well." + }, + { + "objectID": "learn/work/case-weights/index.html#modeling", + "href": "learn/work/case-weights/index.html#modeling", + "title": "Creating case weights based on time", + "section": "Modeling", + "text": "Modeling\nWe start by splitting up our data into a training and testing set based on the day \"2016-01-01\". We added weights to the data set before splitting it so each set has weights.\n\nChicago_train <- Chicago %>% filter(date < \"2016-01-01\")\nChicago_test <- Chicago %>% filter(date >= \"2016-01-01\")\n\nNext, we are going to create a recipe. The weights won’t have any influence on the preprocessing since none of these operations are supervised and we are using importance weights.\n\nbase_recipe <-\n recipe(ridership ~ ., data = Chicago_train) %>%\n # Create date features\n step_date(date) %>%\n step_holiday(date, keep_original_cols = FALSE) %>%\n # Remove any columns with a single unique value\n step_zv(all_predictors()) %>%\n # Normalize all the numerical features\n step_normalize(all_numeric_predictors()) %>%\n # Perform PCA to reduce the correlation bet the stations\n step_pca(all_numeric_predictors(), threshold = 0.95)\n\nNext we need to build the rest of the workflow. We use a linear regression specification\n\nlm_spec <-\n linear_reg() %>%\n set_engine(\"lm\")\n\nand we add these together in the workflow. To activate the case weights, we use the add_case_weights() function to specify the name of the case weights being used.\n\nlm_wflow <-\n workflow() %>% \n add_case_weights(weight) %>%\n add_recipe(base_recipe) %>%\n add_model(lm_spec)\n\nlm_wflow\n#> ══ Workflow ══════════════════════════════════════════════════════════\n#> Preprocessor: Recipe\n#> Model: linear_reg()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> 5 Recipe Steps\n#> \n#> • step_date()\n#> • step_holiday()\n#> • step_zv()\n#> • step_normalize()\n#> • step_pca()\n#> \n#> ── Case Weights ──────────────────────────────────────────────────────\n#> weight\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> Linear Regression Model Specification (regression)\n#> \n#> Computational engine: lm\n\nWith all that done we can fit the workflow with the usual syntax:\n\nlm_fit <- fit(lm_wflow, data = Chicago_train)\nlm_fit\n#> ══ Workflow [trained] ════════════════════════════════════════════════\n#> Preprocessor: Recipe\n#> Model: linear_reg()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> 5 Recipe Steps\n#> \n#> • step_date()\n#> • step_holiday()\n#> • step_zv()\n#> • step_normalize()\n#> • step_pca()\n#> \n#> ── Case Weights ──────────────────────────────────────────────────────\n#> weight\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> \n#> Call:\n#> stats::lm(formula = ..y ~ ., data = data, weights = weights)\n#> \n#> Coefficients:\n#> (Intercept) date_dowMon date_dowTue date_dowWed date_dowThu \n#> 1.762599 13.307654 14.689027 14.620178 14.382313 \n#> date_dowFri date_dowSat date_monthFeb date_monthMar date_monthApr \n#> 13.695433 1.228233 0.364342 1.348229 1.409897 \n#> date_monthMay date_monthJun date_monthJul date_monthAug date_monthSep \n#> 1.188189 2.598296 2.219721 2.406998 1.932061 \n#> date_monthOct date_monthNov date_monthDec PC1 PC2 \n#> 2.655552 0.909007 -0.004751 0.073014 -1.591021 \n#> PC3 PC4 PC5 \n#> -0.608386 -0.205305 0.696010" + }, + { + "objectID": "learn/work/case-weights/index.html#session-info", + "href": "learn/work/case-weights/index.html#session-info", + "title": "Creating case weights based on time", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/work/nested-resampling/index.html", + "href": "learn/work/nested-resampling/index.html", + "title": "Nested resampling", + "section": "", + "text": "To use code in this article, you will need to install the following packages: furrr, kernlab, mlbench, scales, and tidymodels.\nIn this article, we discuss an alternative method for evaluating and tuning models, called nested resampling. While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance." + }, + { + "objectID": "learn/work/nested-resampling/index.html#introduction", + "href": "learn/work/nested-resampling/index.html#introduction", + "title": "Nested resampling", + "section": "", + "text": "To use code in this article, you will need to install the following packages: furrr, kernlab, mlbench, scales, and tidymodels.\nIn this article, we discuss an alternative method for evaluating and tuning models, called nested resampling. While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance." + }, + { + "objectID": "learn/work/nested-resampling/index.html#resampling-models", + "href": "learn/work/nested-resampling/index.html#resampling-models", + "title": "Nested resampling", + "section": "Resampling models", + "text": "Resampling models\nA typical scheme for splitting the data when developing a predictive model is to create an initial split of the data into a training and test set. If resampling is used, it is executed on the training set. A series of binary splits is created. In rsample, we use the term analysis set for the data that are used to fit the model and the term assessment set for the set used to compute performance:\n\n\n\n\n\n\n\n\n\nA common method for tuning models is grid search where a candidate set of tuning parameters is created. The full set of models for every combination of the tuning parameter grid and the resamples is fitted. Each time, the assessment data are used to measure performance and the average value is determined for each tuning parameter.\nThe potential problem is that once we pick the tuning parameter associated with the best performance, this performance value is usually quoted as the performance of the model. There is serious potential for optimization bias since we use the same data to tune the model and to assess performance. This would result in an optimistic estimate of performance.\nNested resampling uses an additional layer of resampling that separates the tuning activities from the process used to estimate the efficacy of the model. An outer resampling scheme is used and, for every split in the outer resample, another full set of resampling splits are created on the original analysis set. For example, if 10-fold cross-validation is used on the outside and 5-fold cross-validation on the inside, a total of 500 models will be fit. The parameter tuning will be conducted 10 times and the best parameters are determined from the average of the 5 assessment sets. This process occurs 10 times.\nOnce the tuning results are complete, a model is fit to each of the outer resampling splits using the best parameter associated with that resample. The average of the outer method’s assessment sets are a unbiased estimate of the model.\nWe will simulate some regression data to illustrate the methods. The mlbench package has a function mlbench::mlbench.friedman1() that can simulate a complex regression data structure from the original MARS publication. A training set size of 100 data points are generated as well as a large set that will be used to characterize how well the resampling procedure performed.\n\nlibrary(mlbench)\nsim_data <- function(n) {\n tmp <- mlbench.friedman1(n, sd = 1)\n tmp <- cbind(tmp$x, tmp$y)\n tmp <- as.data.frame(tmp)\n names(tmp)[ncol(tmp)] <- \"y\"\n tmp\n}\n\nset.seed(9815)\ntrain_dat <- sim_data(100)\nlarge_dat <- sim_data(10^5)" + }, + { + "objectID": "learn/work/nested-resampling/index.html#nested-resampling", + "href": "learn/work/nested-resampling/index.html#nested-resampling", + "title": "Nested resampling", + "section": "Nested resampling", + "text": "Nested resampling\nTo get started, the types of resampling methods need to be specified. This isn’t a large data set, so 5 repeats of 10-fold cross validation will be used as the outer resampling method for generating the estimate of overall performance. To tune the model, it would be good to have precise estimates for each of the values of the tuning parameter so let’s use 25 iterations of the bootstrap. This means that there will eventually be 5 * 10 * 25 = 1250 models that are fit to the data per tuning parameter. These models will be discarded once the performance of the model has been quantified.\nTo create the tibble with the resampling specifications:\n\nlibrary(tidymodels)\nresults <- nested_cv(train_dat, \n outside = vfold_cv(repeats = 5), \n inside = bootstraps(times = 25))\nresults\n#> # Nested resampling:\n#> # outer: 10-fold cross-validation repeated 5 times\n#> # inner: Bootstrap sampling\n#> # A tibble: 50 × 4\n#> splits id id2 inner_resamples\n#> <list> <chr> <chr> <list> \n#> 1 <split [90/10]> Repeat1 Fold01 <boot [25 × 2]>\n#> 2 <split [90/10]> Repeat1 Fold02 <boot [25 × 2]>\n#> 3 <split [90/10]> Repeat1 Fold03 <boot [25 × 2]>\n#> 4 <split [90/10]> Repeat1 Fold04 <boot [25 × 2]>\n#> 5 <split [90/10]> Repeat1 Fold05 <boot [25 × 2]>\n#> 6 <split [90/10]> Repeat1 Fold06 <boot [25 × 2]>\n#> 7 <split [90/10]> Repeat1 Fold07 <boot [25 × 2]>\n#> 8 <split [90/10]> Repeat1 Fold08 <boot [25 × 2]>\n#> 9 <split [90/10]> Repeat1 Fold09 <boot [25 × 2]>\n#> 10 <split [90/10]> Repeat1 Fold10 <boot [25 × 2]>\n#> # ℹ 40 more rows\n\nThe splitting information for each resample is contained in the split objects. Focusing on the second fold of the first repeat:\n\nresults$splits[[2]]\n#> <Analysis/Assess/Total>\n#> <90/10/100>\n\n<90/10/100> indicates the number of observations in the analysis set, assessment set, and the original data.\nEach element of inner_resamples has its own tibble with the bootstrapping splits.\n\nresults$inner_resamples[[5]]\n#> # Bootstrap sampling \n#> # A tibble: 25 × 2\n#> splits id \n#> <list> <chr> \n#> 1 <split [90/31]> Bootstrap01\n#> 2 <split [90/33]> Bootstrap02\n#> 3 <split [90/37]> Bootstrap03\n#> 4 <split [90/31]> Bootstrap04\n#> 5 <split [90/32]> Bootstrap05\n#> 6 <split [90/32]> Bootstrap06\n#> 7 <split [90/36]> Bootstrap07\n#> 8 <split [90/34]> Bootstrap08\n#> 9 <split [90/29]> Bootstrap09\n#> 10 <split [90/31]> Bootstrap10\n#> # ℹ 15 more rows\n\nThese are self-contained, meaning that the bootstrap sample is aware that it is a sample of a specific 90% of the data:\n\nresults$inner_resamples[[5]]$splits[[1]]\n#> <Analysis/Assess/Total>\n#> <90/31/90>\n\nTo start, we need to define how the model will be created and measured. Let’s use a radial basis support vector machine model via the function kernlab::ksvm. This model is generally considered to have two tuning parameters: the SVM cost value and the kernel parameter sigma. For illustration purposes here, only the cost value will be tuned and the function kernlab::sigest will be used to estimate sigma during each model fit. This is automatically done by ksvm.\nAfter the model is fit to the analysis set, the root-mean squared error (RMSE) is computed on the assessment set. One important note: for this model, it is critical to center and scale the predictors before computing dot products. We don’t do this operation here because mlbench.friedman1 simulates all of the predictors to be standardized uniform random variables.\nOur function to fit the model and compute the RMSE is:\n\nlibrary(kernlab)\n\n# `object` will be an `rsplit` object from our `results` tibble\n# `cost` is the tuning parameter\nsvm_rmse <- function(object, cost = 1) {\n y_col <- ncol(object$data)\n mod <- \n svm_rbf(mode = \"regression\", cost = cost) %>% \n set_engine(\"kernlab\") %>% \n fit(y ~ ., data = analysis(object))\n \n holdout_pred <- \n predict(mod, assessment(object) %>% dplyr::select(-y)) %>% \n bind_cols(assessment(object) %>% dplyr::select(y))\n rmse(holdout_pred, truth = y, estimate = .pred)$.estimate\n}\n\n# In some case, we want to parameterize the function over the tuning parameter:\nrmse_wrapper <- function(cost, object) svm_rmse(object, cost)\n\nFor the nested resampling, a model needs to be fit for each tuning parameter and each bootstrap split. To do this, create a wrapper:\n\n# `object` will be an `rsplit` object for the bootstrap samples\ntune_over_cost <- function(object) {\n tibble(cost = 2 ^ seq(-2, 8, by = 1)) %>% \n mutate(RMSE = map_dbl(cost, rmse_wrapper, object = object))\n}\n\nSince this will be called across the set of outer cross-validation splits, another wrapper is required:\n\n# `object` is an `rsplit` object in `results$inner_resamples` \nsummarize_tune_results <- function(object) {\n # Return row-bound tibble that has the 25 bootstrap results\n map_df(object$splits, tune_over_cost) %>%\n # For each value of the tuning parameter, compute the \n # average RMSE which is the inner bootstrap estimate. \n group_by(cost) %>%\n summarize(mean_RMSE = mean(RMSE, na.rm = TRUE),\n n = length(RMSE),\n .groups = \"drop\")\n}\n\nNow that those functions are defined, we can execute all the inner resampling loops:\n\ntuning_results <- map(results$inner_resamples, summarize_tune_results) \n\nAlternatively, since these computations can be run in parallel, we can use the furrr package. Instead of using map(), the function future_map() parallelizes the iterations using the future package. The multisession plan uses the local cores to process the inner resampling loop. The end results are the same as the sequential computations.\n\nlibrary(furrr)\nplan(multisession)\n\ntuning_results <- future_map(results$inner_resamples, summarize_tune_results) \n\nThe object tuning_results is a list of data frames for each of the 50 outer resamples.\nLet’s make a plot of the averaged results to see what the relationship is between the RMSE and the tuning parameters for each of the inner bootstrapping operations:\n\nlibrary(scales)\n\npooled_inner <- tuning_results %>% bind_rows\n\nbest_cost <- function(dat) dat[which.min(dat$mean_RMSE),]\n\np <- \n ggplot(pooled_inner, aes(x = cost, y = mean_RMSE)) + \n scale_x_continuous(trans = 'log2') +\n xlab(\"SVM Cost\") + ylab(\"Inner RMSE\")\n\nfor (i in 1:length(tuning_results))\n p <- p +\n geom_line(data = tuning_results[[i]], alpha = .2) +\n geom_point(data = best_cost(tuning_results[[i]]), pch = 16, alpha = 3/4)\n\np <- p + geom_smooth(data = pooled_inner, se = FALSE)\np\n\n\n\n\n\n\n\n\nEach gray line is a separate bootstrap resampling curve created from a different 90% of the data. The blue line is a LOESS smooth of all the results pooled together.\nTo determine the best parameter estimate for each of the outer resampling iterations:\n\ncost_vals <- \n tuning_results %>% \n map_df(best_cost) %>% \n select(cost)\n\nresults <- \n bind_cols(results, cost_vals) %>% \n mutate(cost = factor(cost, levels = paste(2 ^ seq(-2, 8, by = 1))))\n\nggplot(results, aes(x = cost)) + \n geom_bar() + \n xlab(\"SVM Cost\") + \n scale_x_discrete(drop = FALSE)\n\n\n\n\n\n\n\n\nMost of the resamples produced an optimal cost value of 2.0, but the distribution is right-skewed due to the flat trend in the resampling profile once the cost value becomes 10 or larger.\nNow that we have these estimates, we can compute the outer resampling results for each of the 50 splits using the corresponding tuning parameter value:\n\nresults <- \n results %>% \n mutate(RMSE = map2_dbl(splits, cost, svm_rmse))\n\nsummary(results$RMSE)\n#> Min. 1st Qu. Median Mean 3rd Qu. Max. \n#> 1.672 2.095 2.685 2.690 3.252 4.254\n\nThe estimated RMSE for the model tuning process is 2.69.\nWhat is the RMSE estimate for the non-nested procedure when only the outer resampling method is used? For each cost value in the tuning grid, 50 SVM models are fit and their RMSE values are averaged. The table of cost values and mean RMSE estimates is used to determine the best cost value. The associated RMSE is the biased estimate.\n\nnot_nested <- \n map(results$splits, tune_over_cost) %>%\n bind_rows\n\nouter_summary <- not_nested %>% \n group_by(cost) %>% \n summarize(outer_RMSE = mean(RMSE), n = length(RMSE))\n\nouter_summary\n#> # A tibble: 11 × 3\n#> cost outer_RMSE n\n#> <dbl> <dbl> <int>\n#> 1 0.25 3.54 50\n#> 2 0.5 3.11 50\n#> 3 1 2.77 50\n#> 4 2 2.62 50\n#> 5 4 2.65 50\n#> 6 8 2.75 50\n#> 7 16 2.82 50\n#> 8 32 2.82 50\n#> 9 64 2.83 50\n#> 10 128 2.83 50\n#> 11 256 2.82 50\n\nggplot(outer_summary, aes(x = cost, y = outer_RMSE)) + \n geom_point() + \n geom_line() + \n scale_x_continuous(trans = 'log2') +\n xlab(\"SVM Cost\") + ylab(\"RMSE\")\n\n\n\n\n\n\n\n\nThe non-nested procedure estimates the RMSE to be 2.62. Both estimates are fairly close.\nThe approximately true RMSE for an SVM model with a cost value of 2.0 can be approximated with the large sample that was simulated at the beginning.\n\nfinalModel <- ksvm(y ~ ., data = train_dat, C = 2)\nlarge_pred <- predict(finalModel, large_dat[, -ncol(large_dat)])\nsqrt(mean((large_dat$y - large_pred) ^ 2, na.rm = TRUE))\n#> [1] 2.712059\n\nThe nested procedure produces a closer estimate to the approximate truth but the non-nested estimate is very similar." + }, + { + "objectID": "learn/work/nested-resampling/index.html#session-info", + "href": "learn/work/nested-resampling/index.html#session-info", + "title": "Nested resampling", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> furrr * 0.3.1 2022-08-15 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> kernlab * 0.9-32 2023-01-31 [1] CRAN (R 4.3.0)\n#> mlbench * 2.1-3.1 2023-05-05 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> scales * 1.2.1 2022-08-20 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/work/tune-svm/index.html", + "href": "learn/work/tune-svm/index.html", + "title": "Model tuning via grid search", + "section": "", + "text": "To use code in this article, you will need to install the following packages: kernlab, mlbench, and tidymodels.\nThis article demonstrates how to tune a model using grid search. Many models have hyperparameters that can’t be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best." + }, + { + "objectID": "learn/work/tune-svm/index.html#introduction", + "href": "learn/work/tune-svm/index.html#introduction", + "title": "Model tuning via grid search", + "section": "", + "text": "To use code in this article, you will need to install the following packages: kernlab, mlbench, and tidymodels.\nThis article demonstrates how to tune a model using grid search. Many models have hyperparameters that can’t be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best." + }, + { + "objectID": "learn/work/tune-svm/index.html#example-data", + "href": "learn/work/tune-svm/index.html#example-data", + "title": "Model tuning via grid search", + "section": "Example data", + "text": "Example data\nTo demonstrate model tuning, we’ll use the Ionosphere data in the mlbench package:\n\nlibrary(tidymodels)\nlibrary(mlbench)\ndata(Ionosphere)\n\nFrom ?Ionosphere:\n\nThis radar data was collected by a system in Goose Bay, Labrador. This system consists of a phased array of 16 high-frequency antennas with a total transmitted power on the order of 6.4 kilowatts. See the paper for more details. The targets were free electrons in the ionosphere. “good” radar returns are those showing evidence of some type of structure in the ionosphere. “bad” returns are those that do not; their signals pass through the ionosphere.\n\n\nReceived signals were processed using an autocorrelation function whose arguments are the time of a pulse and the pulse number. There were 17 pulse numbers for the Goose Bay system. Instances in this databse are described by 2 attributes per pulse number, corresponding to the complex values returned by the function resulting from the complex electromagnetic signal. See cited below for more details.\n\nThere are 43 predictors and a factor outcome. Two of the predictors are factors (V1 and V2) and the rest are numeric variables that have been scaled to a range of -1 to 1. Note that the two factor predictors have sparse distributions:\n\ntable(Ionosphere$V1)\n#> \n#> 0 1 \n#> 38 313\ntable(Ionosphere$V2)\n#> \n#> 0 \n#> 351\n\nThere’s no point of putting V2 into any model since is is a zero-variance predictor. V1 is not but it could be if the resampling process ends up sampling all of the same value. Is this an issue? It might be since the standard R formula infrastructure fails when there is only a single observed value:\n\nglm(Class ~ ., data = Ionosphere, family = binomial)\n\n# Surprisingly, this doesn't help: \n\nglm(Class ~ . - V2, data = Ionosphere, family = binomial)\n\nLet’s remove these two problematic variables:\n\nIonosphere <- Ionosphere %>% select(-V1, -V2)" + }, + { + "objectID": "learn/work/tune-svm/index.html#inputs-for-the-search", + "href": "learn/work/tune-svm/index.html#inputs-for-the-search", + "title": "Model tuning via grid search", + "section": "Inputs for the search", + "text": "Inputs for the search\nTo demonstrate, we’ll fit a radial basis function support vector machine to these data and tune the SVM cost parameter and the \\(\\sigma\\) parameter in the kernel function:\n\nsvm_mod <-\n svm_rbf(cost = tune(), rbf_sigma = tune()) %>%\n set_mode(\"classification\") %>%\n set_engine(\"kernlab\")\n\nIn this article, tuning will be demonstrated in two ways, using:\n\na standard R formula, and\na recipe.\n\nLet’s create a simple recipe here:\n\niono_rec <-\n recipe(Class ~ ., data = Ionosphere) %>%\n # remove any zero variance predictors\n step_zv(all_predictors()) %>% \n # remove any linear combinations\n step_lincomb(all_numeric())\n\nThe only other required item for tuning is a resampling strategy as defined by an rsample object. Let’s demonstrate using basic bootstrapping:\n\nset.seed(4943)\niono_rs <- bootstraps(Ionosphere, times = 30)" + }, + { + "objectID": "learn/work/tune-svm/index.html#optional-inputs", + "href": "learn/work/tune-svm/index.html#optional-inputs", + "title": "Model tuning via grid search", + "section": "Optional inputs", + "text": "Optional inputs\nAn optional step for model tuning is to specify which metrics should be computed using the out-of-sample predictions. For classification, the default is to calculate the log-likelihood statistic and overall accuracy. Instead of the defaults, the area under the ROC curve will be used. To do this, a yardstick package function can be used to create a metric set:\n\nroc_vals <- metric_set(roc_auc)\n\nIf no grid or parameters are provided, a set of 10 hyperparameters are created using a space-filling design (via a Latin hypercube). A grid can be given in a data frame where the parameters are in columns and parameter combinations are in rows. Here, the default will be used.\nAlso, a control object can be passed that specifies different aspects of the search. Here, the verbose option is turned off and the option to save the out-of-sample predictions is turned on.\n\nctrl <- control_grid(verbose = FALSE, save_pred = TRUE)" + }, + { + "objectID": "learn/work/tune-svm/index.html#executing-with-a-formula", + "href": "learn/work/tune-svm/index.html#executing-with-a-formula", + "title": "Model tuning via grid search", + "section": "Executing with a formula", + "text": "Executing with a formula\nFirst, we can use the formula interface:\n\nset.seed(35)\nformula_res <-\n svm_mod %>% \n tune_grid(\n Class ~ .,\n resamples = iono_rs,\n metrics = roc_vals,\n control = ctrl\n )\nformula_res\n#> # Tuning results\n#> # Bootstrap sampling \n#> # A tibble: 30 × 5\n#> splits id .metrics .notes .predictions\n#> <list> <chr> <list> <list> <list> \n#> 1 <split [351/120]> Bootstrap01 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 2 <split [351/130]> Bootstrap02 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 3 <split [351/137]> Bootstrap03 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 4 <split [351/141]> Bootstrap04 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 5 <split [351/131]> Bootstrap05 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 6 <split [351/131]> Bootstrap06 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 7 <split [351/127]> Bootstrap07 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 8 <split [351/123]> Bootstrap08 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 9 <split [351/131]> Bootstrap09 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 10 <split [351/117]> Bootstrap10 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> # ℹ 20 more rows\n\nThe .metrics column contains tibbles of the performance metrics for each tuning parameter combination:\n\nformula_res %>% \n select(.metrics) %>% \n slice(1) %>% \n pull(1)\n#> [[1]]\n#> # A tibble: 10 × 6\n#> cost rbf_sigma .metric .estimator .estimate .config \n#> <dbl> <dbl> <chr> <chr> <dbl> <chr> \n#> 1 0.00849 1.11e-10 roc_auc binary 0.815 Preprocessor1_Model01\n#> 2 0.176 7.28e- 8 roc_auc binary 0.839 Preprocessor1_Model02\n#> 3 14.9 3.93e- 4 roc_auc binary 0.870 Preprocessor1_Model03\n#> 4 5.51 2.10e- 3 roc_auc binary 0.919 Preprocessor1_Model04\n#> 5 1.87 3.53e- 7 roc_auc binary 0.838 Preprocessor1_Model05\n#> 6 0.00719 1.45e- 5 roc_auc binary 0.832 Preprocessor1_Model06\n#> 7 0.00114 8.41e- 2 roc_auc binary 0.969 Preprocessor1_Model07\n#> 8 0.950 1.74e- 1 roc_auc binary 0.984 Preprocessor1_Model08\n#> 9 0.189 3.13e- 6 roc_auc binary 0.832 Preprocessor1_Model09\n#> 10 0.0364 4.96e- 9 roc_auc binary 0.839 Preprocessor1_Model10\n\nTo get the final resampling estimates, the collect_metrics() function can be used on the grid object:\n\nestimates <- collect_metrics(formula_res)\nestimates\n#> # A tibble: 10 × 8\n#> cost rbf_sigma .metric .estimator mean n std_err .config \n#> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 0.00849 1.11e-10 roc_auc binary 0.822 30 0.00718 Preprocessor1_Mode…\n#> 2 0.176 7.28e- 8 roc_auc binary 0.871 30 0.00525 Preprocessor1_Mode…\n#> 3 14.9 3.93e- 4 roc_auc binary 0.916 30 0.00497 Preprocessor1_Mode…\n#> 4 5.51 2.10e- 3 roc_auc binary 0.960 30 0.00378 Preprocessor1_Mode…\n#> 5 1.87 3.53e- 7 roc_auc binary 0.871 30 0.00524 Preprocessor1_Mode…\n#> 6 0.00719 1.45e- 5 roc_auc binary 0.871 30 0.00534 Preprocessor1_Mode…\n#> 7 0.00114 8.41e- 2 roc_auc binary 0.966 30 0.00301 Preprocessor1_Mode…\n#> 8 0.950 1.74e- 1 roc_auc binary 0.979 30 0.00204 Preprocessor1_Mode…\n#> 9 0.189 3.13e- 6 roc_auc binary 0.871 30 0.00536 Preprocessor1_Mode…\n#> 10 0.0364 4.96e- 9 roc_auc binary 0.871 30 0.00537 Preprocessor1_Mode…\n\nThe top combinations are:\n\nshow_best(formula_res, metric = \"roc_auc\")\n#> # A tibble: 5 × 8\n#> cost rbf_sigma .metric .estimator mean n std_err .config \n#> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 0.950 0.174 roc_auc binary 0.979 30 0.00204 Preprocessor1_Model…\n#> 2 0.00114 0.0841 roc_auc binary 0.966 30 0.00301 Preprocessor1_Model…\n#> 3 5.51 0.00210 roc_auc binary 0.960 30 0.00378 Preprocessor1_Model…\n#> 4 14.9 0.000393 roc_auc binary 0.916 30 0.00497 Preprocessor1_Model…\n#> 5 0.00719 0.0000145 roc_auc binary 0.871 30 0.00534 Preprocessor1_Model…" + }, + { + "objectID": "learn/work/tune-svm/index.html#executing-with-a-recipe", + "href": "learn/work/tune-svm/index.html#executing-with-a-recipe", + "title": "Model tuning via grid search", + "section": "Executing with a recipe", + "text": "Executing with a recipe\nNext, we can use the same syntax but pass a recipe in as the pre-processor argument:\n\nset.seed(325)\nrecipe_res <-\n svm_mod %>% \n tune_grid(\n iono_rec,\n resamples = iono_rs,\n metrics = roc_vals,\n control = ctrl\n )\nrecipe_res\n#> # Tuning results\n#> # Bootstrap sampling \n#> # A tibble: 30 × 5\n#> splits id .metrics .notes .predictions\n#> <list> <chr> <list> <list> <list> \n#> 1 <split [351/120]> Bootstrap01 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 2 <split [351/130]> Bootstrap02 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 3 <split [351/137]> Bootstrap03 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 4 <split [351/141]> Bootstrap04 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 5 <split [351/131]> Bootstrap05 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 6 <split [351/131]> Bootstrap06 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 7 <split [351/127]> Bootstrap07 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 8 <split [351/123]> Bootstrap08 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 9 <split [351/131]> Bootstrap09 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> 10 <split [351/117]> Bootstrap10 <tibble [10 × 6]> <tibble [0 × 3]> <tibble> \n#> # ℹ 20 more rows\n\nThe best setting here is:\n\nshow_best(recipe_res, metric = \"roc_auc\")\n#> # A tibble: 5 × 8\n#> cost rbf_sigma .metric .estimator mean n std_err .config \n#> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 15.6 0.182 roc_auc binary 0.981 30 0.00213 Preprocessor1_Model04\n#> 2 0.385 0.0276 roc_auc binary 0.978 30 0.00222 Preprocessor1_Model03\n#> 3 0.143 0.00243 roc_auc binary 0.930 30 0.00443 Preprocessor1_Model06\n#> 4 0.841 0.000691 roc_auc binary 0.892 30 0.00504 Preprocessor1_Model07\n#> 5 0.0499 0.0000335 roc_auc binary 0.872 30 0.00521 Preprocessor1_Model08" + }, + { + "objectID": "learn/work/tune-svm/index.html#out-of-sample-predictions", + "href": "learn/work/tune-svm/index.html#out-of-sample-predictions", + "title": "Model tuning via grid search", + "section": "Out-of-sample predictions", + "text": "Out-of-sample predictions\nIf we used save_pred = TRUE to keep the out-of-sample predictions for each resample during tuning, we can obtain those predictions, along with the tuning parameters and resample identifier, using collect_predictions():\n\ncollect_predictions(recipe_res)\n#> # A tibble: 38,740 × 8\n#> id .pred_bad .pred_good .row cost rbf_sigma Class .config \n#> <chr> <dbl> <dbl> <int> <dbl> <dbl> <fct> <chr> \n#> 1 Bootstrap01 0.333 0.667 1 0.00296 0.00000383 good Preprocessor…\n#> 2 Bootstrap01 0.333 0.667 9 0.00296 0.00000383 good Preprocessor…\n#> 3 Bootstrap01 0.333 0.667 10 0.00296 0.00000383 bad Preprocessor…\n#> 4 Bootstrap01 0.333 0.667 12 0.00296 0.00000383 bad Preprocessor…\n#> 5 Bootstrap01 0.333 0.667 14 0.00296 0.00000383 bad Preprocessor…\n#> 6 Bootstrap01 0.333 0.667 15 0.00296 0.00000383 good Preprocessor…\n#> 7 Bootstrap01 0.333 0.667 16 0.00296 0.00000383 bad Preprocessor…\n#> 8 Bootstrap01 0.334 0.666 22 0.00296 0.00000383 bad Preprocessor…\n#> 9 Bootstrap01 0.333 0.667 23 0.00296 0.00000383 good Preprocessor…\n#> 10 Bootstrap01 0.334 0.666 24 0.00296 0.00000383 bad Preprocessor…\n#> # ℹ 38,730 more rows\n\nWe can obtain the hold-out sets for all the resamples augmented with the predictions using augment(), which provides opportunities for flexible visualization of model results:\n\naugment(recipe_res) %>%\n ggplot(aes(V3, .pred_good, color = Class)) +\n geom_point(show.legend = FALSE) +\n facet_wrap(~Class)" + }, + { + "objectID": "learn/work/tune-svm/index.html#session-info", + "href": "learn/work/tune-svm/index.html#session-info", + "title": "Model tuning via grid search", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> kernlab * 0.9-32 2023-01-31 [1] CRAN (R 4.3.0)\n#> mlbench * 2.1-3.1 2023-05-05 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "learn/work/tune-text/index.html", + "href": "learn/work/tune-text/index.html", + "title": "Tuning text models", + "section": "", + "text": "To use code in this article, you will need to install the following packages: stopwords, textfeatures, textrecipes, and tidymodels.\nThis article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on." + }, + { + "objectID": "learn/work/tune-text/index.html#introduction", + "href": "learn/work/tune-text/index.html#introduction", + "title": "Tuning text models", + "section": "", + "text": "To use code in this article, you will need to install the following packages: stopwords, textfeatures, textrecipes, and tidymodels.\nThis article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on." + }, + { + "objectID": "learn/work/tune-text/index.html#text-as-data", + "href": "learn/work/tune-text/index.html#text-as-data", + "title": "Tuning text models", + "section": "Text as data", + "text": "Text as data\nThe text data we’ll use in this article are from Amazon:\n\nThis dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review.\n\nThis article uses a small subset of the total reviews available at the original source. We sampled a single review from 5,000 random products and allocated 80% of these data to the training set, with the remaining 1,000 reviews held out for the test set.\nThere is a column for the product, a column for the text of the review, and a factor column for the outcome variable. The outcome is whether the reviewer gave the product a five-star rating or not.\n\nlibrary(tidymodels)\n\ndata(\"small_fine_foods\")\ntraining_data\n#> # A tibble: 4,000 × 3\n#> product review score\n#> <chr> <chr> <fct>\n#> 1 B000J0LSBG \"this stuff is not stuffing its not good at all save yo… other\n#> 2 B000EYLDYE \"I absolutely LOVE this dried fruit. LOVE IT. Whenever I … great\n#> 3 B0026LIO9A \"GREAT DEAL, CONVENIENT TOO. Much cheaper than WalMart and… great\n#> 4 B00473P8SK \"Great flavor, we go through a ton of this sauce! I discove… great\n#> 5 B001SAWTNM \"This is excellent salsa/hot sauce, but you can get it for … great\n#> 6 B000FAG90U \"Again, this is the best dogfood out there. One suggestion… great\n#> 7 B006BXTCEK \"The box I received was filled with teas, hot chocolates, a… other\n#> 8 B002GWH5OY \"This is delicious coffee which compares favorably with muc… great\n#> 9 B003R0MFYY \"Don't let these little tiny cans fool you. They pack a lo… great\n#> 10 B001EO5ZXI \"One of the nicest, smoothest cup of chai I've made. Nice m… great\n#> # ℹ 3,990 more rows\n\nOur modeling goal is to create modeling features from the text of the reviews to predict whether the review was five-star or not." + }, + { + "objectID": "learn/work/tune-text/index.html#inputs-for-the-search", + "href": "learn/work/tune-text/index.html#inputs-for-the-search", + "title": "Tuning text models", + "section": "Inputs for the search", + "text": "Inputs for the search\nText, perhaps more so than tabular data we often deal with, must be heavily processed to be used as predictor data for modeling. There are multiple ways to process and prepare text for modeling; let’s add several steps together to create different kinds of features:\n\nCreate an initial set of count-based features, such as the number of words, spaces, lower- or uppercase characters, URLs, and so on; we can use the textfeatures package for this.\nTokenize the text (i.e. break the text into smaller components such as words).\nRemove stop words such as “the”, “an”, “of”, etc.\nStem tokens to a common root where possible.\nConvert tokens to dummy variables via a signed, binary hash function.\nOptionally transform non-token features (the count-based features like number of lowercase characters) to a more symmetric state using a Yeo-Johnson transformation.\nRemove predictors with a single distinct value.\nCenter and scale all predictors.\n\n\n\n\n\n\n\nNote\n\n\n\nWe will end up with two kinds of features:\n\ndummy/indicator variables for the count-based features like number of digits or punctuation characters\nhash features for the tokens like “salsa” or “delicious”.\n\n\n\nSome of these preprocessing steps (such as stemming) may or may not be good ideas but a full discussion of their effects is beyond the scope of this article. In this preprocessing approach, the main tuning parameter is the number of hashing features to use.\nBefore we start building our preprocessing recipe, we need some helper objects. For example, for the Yeo-Johnson transformation, we need to know the set of count-based text features:\n\nlibrary(textfeatures)\n\nbasics <- names(textfeatures:::count_functions)\nhead(basics)\n#> [1] \"n_words\" \"n_uq_words\" \"n_charS\" \"n_uq_charS\" \"n_digits\" \n#> [6] \"n_hashtags\"\n\nAlso, the implementation of feature hashes does not produce the binary values we need. This small function will help convert the scores to values of -1, 0, or 1:\n\nbinary_hash <- function(x) {\n x <- ifelse(x < 0, -1, x)\n x <- ifelse(x > 0, 1, x)\n x\n}\n\nNow, let’s put this all together in one recipe:\n\nlibrary(textrecipes)\n\npre_proc <-\n recipe(score ~ product + review, data = training_data) %>%\n # Do not use the product ID as a predictor\n update_role(product, new_role = \"id\") %>%\n # Make a copy of the raw text\n step_mutate(review_raw = review) %>%\n # Compute the initial features. This removes the `review_raw` column\n step_textfeature(review_raw) %>%\n # Make the feature names shorter\n step_rename_at(\n starts_with(\"textfeature_\"),\n fn = ~ gsub(\"textfeature_review_raw_\", \"\", .)\n ) %>%\n step_tokenize(review) %>%\n step_stopwords(review) %>%\n step_stem(review) %>%\n # Here is where the tuning parameter is declared\n step_texthash(review, signed = TRUE, num_terms = tune()) %>%\n # Simplify these names\n step_rename_at(starts_with(\"review_hash\"), fn = ~ gsub(\"review_\", \"\", .)) %>%\n # Convert the features from counts to values of -1, 0, or 1\n step_mutate_at(starts_with(\"hash\"), fn = binary_hash) %>%\n # Transform the initial feature set\n step_YeoJohnson(one_of(!!basics)) %>%\n step_zv(all_predictors()) %>%\n step_normalize(all_predictors())\n\n\n\n\n\n\n\nWarning\n\n\n\nNote that, when objects from the global environment are used, they are injected into the step objects via !!. For some parallel processing technologies, these objects may not be found by the worker processes.\n\n\nThe preprocessing recipe is long and complex (often typical for working with text data) but the model we’ll use is more straightforward. Let’s stick with a regularized logistic regression model:\n\nlr_mod <-\n logistic_reg(penalty = tune(), mixture = tune()) %>%\n set_engine(\"glmnet\")\n\nThere are three tuning parameters for this data analysis:\n\nnum_terms, the number of feature hash variables to create\npenalty, the amount of regularization for the model\nmixture, the proportion of L1 regularization" + }, + { + "objectID": "learn/work/tune-text/index.html#resampling", + "href": "learn/work/tune-text/index.html#resampling", + "title": "Tuning text models", + "section": "Resampling", + "text": "Resampling\nThere are enough data here so that 10-fold resampling would hold out 400 reviews at a time to estimate performance. Performance estimates using this many observations have sufficiently low noise to measure and tune models.\n\nset.seed(8935)\nfolds <- vfold_cv(training_data)\nfolds\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 2\n#> splits id \n#> <list> <chr> \n#> 1 <split [3600/400]> Fold01\n#> 2 <split [3600/400]> Fold02\n#> 3 <split [3600/400]> Fold03\n#> 4 <split [3600/400]> Fold04\n#> 5 <split [3600/400]> Fold05\n#> 6 <split [3600/400]> Fold06\n#> 7 <split [3600/400]> Fold07\n#> 8 <split [3600/400]> Fold08\n#> 9 <split [3600/400]> Fold09\n#> 10 <split [3600/400]> Fold10" + }, + { + "objectID": "learn/work/tune-text/index.html#grid-search", + "href": "learn/work/tune-text/index.html#grid-search", + "title": "Tuning text models", + "section": "Grid search", + "text": "Grid search\nLet’s begin our tuning with grid search and a regular grid. For glmnet models, evaluating penalty values is fairly cheap because of the use of the “submodel-trick”. The grid will use 20 penalty values, 5 mixture values, and 3 values for the number of hash features.\n\nfive_star_grid <- \n crossing(\n penalty = 10^seq(-3, 0, length = 20),\n mixture = c(0.01, 0.25, 0.50, 0.75, 1),\n num_terms = 2^c(8, 10, 12)\n )\nfive_star_grid\n#> # A tibble: 300 × 3\n#> penalty mixture num_terms\n#> <dbl> <dbl> <dbl>\n#> 1 0.001 0.01 256\n#> 2 0.001 0.01 1024\n#> 3 0.001 0.01 4096\n#> 4 0.001 0.25 256\n#> 5 0.001 0.25 1024\n#> 6 0.001 0.25 4096\n#> 7 0.001 0.5 256\n#> 8 0.001 0.5 1024\n#> 9 0.001 0.5 4096\n#> 10 0.001 0.75 256\n#> # ℹ 290 more rows\n\nNote that, for each resample, the (computationally expensive) text preprocessing recipe is only prepped 6 times. This increases the efficiency of the analysis by avoiding redundant work.\nLet’s save information on the number of predictors by penalty value for each glmnet model. This can help us understand how many features were used across the penalty values. Use an extraction function to do this:\n\nglmnet_vars <- function(x) {\n # `x` will be a workflow object\n mod <- extract_model(x)\n # `df` is the number of model terms for each penalty value\n tibble(penalty = mod$lambda, num_vars = mod$df)\n}\n\nctrl <- control_grid(extract = glmnet_vars, verbose = TRUE)\n\nFinally, let’s run the grid search:\n\nroc_scores <- metric_set(roc_auc)\n\nset.seed(1559)\nfive_star_glmnet <- \n tune_grid(\n lr_mod, \n pre_proc, \n resamples = folds, \n grid = five_star_grid, \n metrics = roc_scores, \n control = ctrl\n )\n\nfive_star_glmnet\n#> # Tuning results\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 5\n#> splits id .metrics .notes .extracts\n#> <list> <chr> <list> <list> <list> \n#> 1 <split [3600/400]> Fold01 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 2 <split [3600/400]> Fold02 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 3 <split [3600/400]> Fold03 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 4 <split [3600/400]> Fold04 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 5 <split [3600/400]> Fold05 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 6 <split [3600/400]> Fold06 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 7 <split [3600/400]> Fold07 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 8 <split [3600/400]> Fold08 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 9 <split [3600/400]> Fold09 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> 10 <split [3600/400]> Fold10 <tibble [300 × 7]> <tibble [1 × 3]> <tibble> \n#> \n#> There were issues with some computations:\n#> \n#> - Warning(s) x10: `extract_model()` was deprecated in tune 0.1.6. ℹ Please use `ext...\n#> \n#> Run `show_notes(.Last.tune.result)` for more information.\n\nThis took a while to complete! What do the results look like? Let’s get the resampling estimates of the area under the ROC curve for each tuning parameter:\n\ngrid_roc <- \n collect_metrics(five_star_glmnet) %>% \n arrange(desc(mean))\ngrid_roc\n#> # A tibble: 300 × 9\n#> penalty mixture num_terms .metric .estimator mean n std_err .config \n#> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 0.695 0.01 4096 roc_auc binary 0.811 10 0.00799 Preprocesso…\n#> 2 0.483 0.01 4096 roc_auc binary 0.811 10 0.00797 Preprocesso…\n#> 3 0.0379 0.25 4096 roc_auc binary 0.809 10 0.00755 Preprocesso…\n#> 4 0.0183 0.5 4096 roc_auc binary 0.807 10 0.00776 Preprocesso…\n#> 5 0.0264 0.25 4096 roc_auc binary 0.807 10 0.00792 Preprocesso…\n#> 6 0.0127 0.75 4096 roc_auc binary 0.807 10 0.00773 Preprocesso…\n#> 7 0.336 0.01 4096 roc_auc binary 0.806 10 0.00781 Preprocesso…\n#> 8 0.00886 1 4096 roc_auc binary 0.806 10 0.00783 Preprocesso…\n#> 9 1 0.01 4096 roc_auc binary 0.806 10 0.00801 Preprocesso…\n#> 10 0.0546 0.25 4096 roc_auc binary 0.805 10 0.00783 Preprocesso…\n#> # ℹ 290 more rows\n\nThe best results have a fairly high penalty value and focus on the ridge penalty (i.e. no feature selection via the lasso’s L1 penalty). The best solutions also use the largest number of hashing features.\nWhat is the relationship between performance and the tuning parameters?\n\nautoplot(five_star_glmnet, metric = \"roc_auc\")\n\n\n\n\n\n\n\n\n\nWe can definitely see that performance improves with the number of features included. In this article, we’ve used a small sample of the overall data set available. When more data are used, an even larger feature set is optimal.\nThe profiles with larger mixture values (greater than 0.01) have steep drop-offs in performance. What’s that about? Those are cases where the lasso penalty is removing too many (and perhaps all) features from the model.\n\nThe panel with at least 4096 features shows that there are several parameter combinations that have about the same performance; there isn’t much difference between the best performance for the different mixture values. A case could be made that we should choose a larger mixture value and a smaller penalty to select a simpler model that contains fewer predictors.\nIf more experimentation were conducted, a larger set of features (more than 4096) should also be considered.\n\nWe’ll come back to the extracted glmnet components at the end of this article." + }, + { + "objectID": "learn/work/tune-text/index.html#directed-search", + "href": "learn/work/tune-text/index.html#directed-search", + "title": "Tuning text models", + "section": "Directed search", + "text": "Directed search\nWhat if we had started with Bayesian optimization? Would a good set of conditions have been found more efficiently?\nLet’s pretend that we haven’t seen the grid search results. We’ll initialize the Gaussian process model with five tuning parameter combinations chosen with a space-filling design.\nIt might be good to use a custom dials object for the number of hash terms. The default object, num_terms(), uses a linear range and tries to set the upper bound of the parameter using the data. Instead, let’s create a parameter set, change the scale to be log2, and define the same range as was used in grid search.\n\nhash_range <- num_terms(c(8, 12), trans = log2_trans())\nhash_range\n#> # Model Terms (quantitative)\n#> Transformer: log-2 [1e-100, Inf]\n#> Range (transformed scale): [8, 12]\n\nTo use this, we have to merge the recipe and parsnip model object into a workflow:\n\nfive_star_wflow <-\n workflow() %>%\n add_recipe(pre_proc) %>%\n add_model(lr_mod)\n\nThen we can extract and manipulate the corresponding parameter set:\n\nfive_star_set <-\n five_star_wflow %>%\n parameters() %>%\n update(\n num_terms = hash_range, \n penalty = penalty(c(-3, 0)),\n mixture = mixture(c(0.05, 1.00))\n )\n#> Warning: `parameters.workflow()` was deprecated in tune 0.1.6.9003.\n#> ℹ Please use `hardhat::extract_parameter_set_dials()` instead.\n\nThis is passed to the search function via the param_info argument.\nThe initial rounds of search can be biased more towards exploration of the parameter space (as opposed to staying near the current best results). If expected improvement is used as the acquisition function, the trade-off value can be slowly moved from exploration to exploitation over iterations (see the tune vignette on acquisition functions for more details). The tune package has a built-in function called expo_decay() that can help accomplish this:\n\ntrade_off_decay <- function(iter) {\n expo_decay(iter, start_val = .01, limit_val = 0, slope = 1/4)\n}\n\nUsing these values, let’s run the search:\n\nset.seed(12)\nfive_star_search <-\n tune_bayes(\n five_star_wflow, \n resamples = folds,\n param_info = five_star_set,\n initial = 5,\n iter = 30,\n metrics = roc_scores,\n objective = exp_improve(trade_off_decay),\n control = control_bayes(verbose_iter = TRUE)\n )\n#> Optimizing roc_auc using the expected improvement with variable trade-off\n#> values.\n#> ! No improvement for 10 iterations; returning current results.\n\nfive_star_search\n#> # Tuning results\n#> # 10-fold cross-validation \n#> # A tibble: 290 × 5\n#> splits id .metrics .notes .iter\n#> <list> <chr> <list> <list> <int>\n#> 1 <split [3600/400]> Fold01 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 2 <split [3600/400]> Fold02 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 3 <split [3600/400]> Fold03 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 4 <split [3600/400]> Fold04 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 5 <split [3600/400]> Fold05 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 6 <split [3600/400]> Fold06 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 7 <split [3600/400]> Fold07 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 8 <split [3600/400]> Fold08 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 9 <split [3600/400]> Fold09 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> 10 <split [3600/400]> Fold10 <tibble [5 × 7]> <tibble [0 × 3]> 0\n#> # ℹ 280 more rows\n\nThese results show some improvement over the initial set. One issue is that so many settings are sub-optimal (as shown in the plot above for grid search) so there are poor results periodically. There are regions where the penalty parameter becomes too large and all of the predictors are removed from the model. These regions are also dependent on the number of terms. There is a fairly narrow ridge (sorry, pun intended!) where good performance can be achieved. Using more iterations would probably result in the search finding better results. Let’s look at a plot of model performance versus the search iterations:\n\nautoplot(five_star_search, type = \"performance\")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nNote\n\n\n\nWhat would we do if we knew about the grid search results and wanted to try directed, iterative search? We would restrict the range for the number of hash features to be larger (especially with more data). We might also restrict the penalty and mixture parameters to have a lower upper bound." + }, + { + "objectID": "learn/work/tune-text/index.html#extracted-results", + "href": "learn/work/tune-text/index.html#extracted-results", + "title": "Tuning text models", + "section": "Extracted results", + "text": "Extracted results\nLet’s return to the grid search results and examine the results of our extract function. For each fitted model, a tibble was saved that contains the relationship between the number of predictors and the penalty value. Let’s look at these results for the best model:\n\nparams <- select_best(five_star_glmnet, metric = \"roc_auc\")\nparams\n#> # A tibble: 1 × 4\n#> penalty mixture num_terms .config \n#> <dbl> <dbl> <dbl> <chr> \n#> 1 0.695 0.01 4096 Preprocessor3_Model019\n\nRecall that we saved the glmnet results in a tibble. The column five_star_glmnet$.extracts is a list of tibbles. As an example, the first element of the list is:\n\nfive_star_glmnet$.extracts[[1]]\n#> # A tibble: 300 × 5\n#> num_terms penalty mixture .extracts .config \n#> <dbl> <dbl> <dbl> <list> <chr> \n#> 1 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model001\n#> 2 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model002\n#> 3 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model003\n#> 4 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model004\n#> 5 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model005\n#> 6 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model006\n#> 7 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model007\n#> 8 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model008\n#> 9 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model009\n#> 10 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model010\n#> # ℹ 290 more rows\n\nMore nested tibbles! Let’s unnest() the five_star_glmnet$.extracts column:\n\nlibrary(tidyr)\nextracted <- \n five_star_glmnet %>% \n dplyr::select(id, .extracts) %>% \n unnest(cols = .extracts)\nextracted\n#> # A tibble: 3,000 × 6\n#> id num_terms penalty mixture .extracts .config \n#> <chr> <dbl> <dbl> <dbl> <list> <chr> \n#> 1 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model001\n#> 2 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model002\n#> 3 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model003\n#> 4 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model004\n#> 5 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model005\n#> 6 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model006\n#> 7 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model007\n#> 8 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model008\n#> 9 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model009\n#> 10 Fold01 256 1 0.01 <tibble [100 × 2]> Preprocessor1_Model010\n#> # ℹ 2,990 more rows\n\nOne thing to realize here is that tune_grid() may not fit all of the models that are evaluated. In this case, for each value of mixture and num_terms, the model is fit over all penalty values (this is a feature of this particular model and is not generally true for other engines). To select the best parameter set, we can exclude the penalty column in extracted:\n\nextracted <- \n extracted %>% \n dplyr::select(-penalty) %>% \n inner_join(params, by = c(\"num_terms\", \"mixture\")) %>% \n # Now remove it from the final results\n dplyr::select(-penalty)\nextracted\n#> # A tibble: 200 × 6\n#> id num_terms mixture .extracts .config.x .config.y \n#> <chr> <dbl> <dbl> <list> <chr> <chr> \n#> 1 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model001 Preproces…\n#> 2 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model002 Preproces…\n#> 3 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model003 Preproces…\n#> 4 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model004 Preproces…\n#> 5 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model005 Preproces…\n#> 6 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model006 Preproces…\n#> 7 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model007 Preproces…\n#> 8 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model008 Preproces…\n#> 9 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model009 Preproces…\n#> 10 Fold01 4096 0.01 <tibble [100 × 2]> Preprocessor3_Model010 Preproces…\n#> # ℹ 190 more rows\n\nNow we can get at the results that we want using another unnest():\n\nextracted <- \n extracted %>% \n unnest(col = .extracts) # <- these contain a `penalty` column\nextracted\n#> # A tibble: 20,000 × 7\n#> id num_terms mixture penalty num_vars .config.x .config.y \n#> <chr> <dbl> <dbl> <dbl> <int> <chr> <chr> \n#> 1 Fold01 4096 0.01 8.60 0 Preprocessor3_Model001 Preprocesso…\n#> 2 Fold01 4096 0.01 8.21 2 Preprocessor3_Model001 Preprocesso…\n#> 3 Fold01 4096 0.01 7.84 2 Preprocessor3_Model001 Preprocesso…\n#> 4 Fold01 4096 0.01 7.48 3 Preprocessor3_Model001 Preprocesso…\n#> 5 Fold01 4096 0.01 7.14 3 Preprocessor3_Model001 Preprocesso…\n#> 6 Fold01 4096 0.01 6.82 3 Preprocessor3_Model001 Preprocesso…\n#> 7 Fold01 4096 0.01 6.51 4 Preprocessor3_Model001 Preprocesso…\n#> 8 Fold01 4096 0.01 6.21 6 Preprocessor3_Model001 Preprocesso…\n#> 9 Fold01 4096 0.01 5.93 7 Preprocessor3_Model001 Preprocesso…\n#> 10 Fold01 4096 0.01 5.66 7 Preprocessor3_Model001 Preprocesso…\n#> # ℹ 19,990 more rows\n\nLet’s look at a plot of these results (per resample):\n\nggplot(extracted, aes(x = penalty, y = num_vars)) + \n geom_line(aes(group = id, col = id), alpha = .5) + \n ylab(\"Number of retained predictors\") + \n scale_x_log10() + \n ggtitle(paste(\"mixture = \", params$mixture, \"and\", params$num_terms, \"features\")) + \n theme(legend.position = \"none\")\n\n\n\n\n\n\n\n\nThese results might help guide the choice of the penalty range if more optimization was conducted." + }, + { + "objectID": "learn/work/tune-text/index.html#session-info", + "href": "learn/work/tune-text/index.html#session-info", + "title": "Tuning text models", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> stopwords * 2.3 2021-10-28 [1] CRAN (R 4.3.0)\n#> textfeatures * 0.3.3 2019-09-03 [1] CRAN (R 4.3.0)\n#> textrecipes * 1.0.3 2023-04-14 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "packages/index.html", + "href": "packages/index.html", + "title": "Tidymodels packages", + "section": "", + "text": "Install many of the packages in the tidymodels ecosystem by running install.packages(\"tidymodels\").\nRun library(tidymodels) to load the core packages and make them available in your current R session.\nLearn more about the tidymodels metapackage itself at https://tidymodels.tidymodels.org/." + }, + { + "objectID": "packages/index.html#installation-and-use", + "href": "packages/index.html#installation-and-use", + "title": "Tidymodels packages", + "section": "", + "text": "Install many of the packages in the tidymodels ecosystem by running install.packages(\"tidymodels\").\nRun library(tidymodels) to load the core packages and make them available in your current R session." + }, + { + "objectID": "packages/index.html#core-tidymodels", + "href": "packages/index.html#core-tidymodels", + "title": "Tidymodels packages", + "section": "Core tidymodels", + "text": "Core tidymodels\n\nThe core tidymodels packages work together to enable a wide variety of modeling approaches:" + }, + { + "objectID": "packages/index.html#specialized-packages", + "href": "packages/index.html#specialized-packages", + "title": "Tidymodels packages", + "section": "Specialized packages", + "text": "Specialized packages\nThe tidymodels framework also includes many other packages designed for specialized data analysis and modeling tasks. They are not loaded automatically with library(tidymodels), so you’ll need to load each one with its own call to library(). These packages include:\n\nPerform statistical analysis\n\ninfer is a high-level API for tidyverse-friendly statistical inference.\nThe corrr package has tidy interfaces for working with correlation matrices.\n\n\n\nCreate robust models\n\nThe spatialsample package provides resampling functions and classes like rsample, but specialized for spatial data.\nparsnip also has additional packages that contain more model definitions. discrim contains definitions for discriminant analysis models, poissonreg provides definitions for Poisson regression models, plsmod enables linear projection models, and rules does the same for rule-based classification and regression models. baguette creates ensemble models via bagging, and multilevelmod provides support for multilevel models (otherwise known as mixed models or hierarchical models).\nThere are several add-on packages for creating recipes. embed contains steps to create embeddings or projections of predictors. textrecipes has extra steps for text processing, and themis can help alleviate class imbalance using sampling methods.\ntidypredict and modeldb can convert prediction equations to different languages (e.g. SQL) and fit some models in-database.\n\n\n\nTune, compare, and work with your models\n\nTo try out multiple different workflows (i.e. bundles of pre-processor and model) at once, workflowsets lets you create sets of workflow objects for tuning and resampling.\nTo integrate predictions from many models, the stacks package provides tools for stacked ensemble modeling.\nThe finetune package extends the tune package with more approaches such as racing and simulated annealing.\nThe usemodels package creates templates and automatically generates code to fit and tune models.\nprobably has tools for post-processing class probability estimates.\nThe tidyposterior package enables users to make formal statistical comparisons between models using resampling and Bayesian methods.\nSome R objects become inconveniently large when saved to disk. The butcher package can reduce the size of those objects by removing the sub-components.\nTo know whether the data that you are predicting are extrapolations from the training set, applicable can produce metrics that measure extrapolation.\nshinymodels lets you explore tuning or resampling results via a Shiny app.\n\n\n\nDevelop custom modeling tools\n\nhardhat is a developer-focused package that helps beginners create high-quality R packages for modeling." + }, + { + "objectID": "start/case-study/index.html", + "href": "start/case-study/index.html", + "title": "A predictive modeling case study", + "section": "", + "text": "Each of the four previous Get Started articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you’ll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays.\n\n\n\n\n\n\n\n\n\nTo use code in this article, you will need to install the following packages: glmnet, ranger, readr, tidymodels, and vip.\n\nlibrary(tidymodels) \n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(vip) # for variable importance plots" + }, + { + "objectID": "start/case-study/index.html#intro", + "href": "start/case-study/index.html#intro", + "title": "A predictive modeling case study", + "section": "", + "text": "Each of the four previous Get Started articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you’ll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays.\n\n\n\n\n\n\n\n\n\nTo use code in this article, you will need to install the following packages: glmnet, ranger, readr, tidymodels, and vip.\n\nlibrary(tidymodels) \n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(vip) # for variable importance plots" + }, + { + "objectID": "start/case-study/index.html#data", + "href": "start/case-study/index.html#data", + "title": "A predictive modeling case study", + "section": "The Hotel Bookings Data", + "text": "The Hotel Bookings Data\nLet’s use hotel bookings data from Antonio, Almeida, and Nunes (2019) to predict which hotel stays included children and/or babies, based on the other characteristics of the stays such as which hotel the guests stay at, how much they pay, etc. This was also a #TidyTuesday dataset with a data dictionary you may want to look over to learn more about the variables. We’ll use a slightly edited version of the dataset for this case study.\nTo start, let’s read our hotel data into R, which we’ll do by providing readr::read_csv() with a url where our CSV data is located (“https://tidymodels.org/start/case-study/hotels.csv”):\n\nlibrary(tidymodels)\nlibrary(readr)\n\nhotels <- \n read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>%\n mutate(across(where(is.character), as.factor))\n\ndim(hotels)\n#> [1] 50000 23\n\nIn the original paper, the authors caution that the distribution of many variables (such as number of adults/children, room type, meals bought, country of origin of the guests, and so forth) is different for hotel stays that were canceled versus not canceled. This makes sense because much of that information is gathered (or gathered again more accurately) when guests check in for their stay, so canceled bookings are likely to have more missing data than non-canceled bookings, and/or to have different characteristics when data is not missing. Given this, it is unlikely that we can reliably detect meaningful differences between guests who cancel their bookings and those who do not with this dataset. To build our models here, we have already filtered the data to include only the bookings that did not cancel, so we’ll be analyzing hotel stays only.\n\nglimpse(hotels)\n#> Rows: 50,000\n#> Columns: 23\n#> $ hotel <fct> City_Hotel, City_Hotel, Resort_Hotel, R…\n#> $ lead_time <dbl> 217, 2, 95, 143, 136, 67, 47, 56, 80, 6…\n#> $ stays_in_weekend_nights <dbl> 1, 0, 2, 2, 1, 2, 0, 0, 0, 2, 1, 0, 1, …\n#> $ stays_in_week_nights <dbl> 3, 1, 5, 6, 4, 2, 2, 3, 4, 2, 2, 1, 2, …\n#> $ adults <dbl> 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 1, 2, …\n#> $ children <fct> none, none, none, none, none, none, chi…\n#> $ meal <fct> BB, BB, BB, HB, HB, SC, BB, BB, BB, BB,…\n#> $ country <fct> DEU, PRT, GBR, ROU, PRT, GBR, ESP, ESP,…\n#> $ market_segment <fct> Offline_TA/TO, Direct, Online_TA, Onlin…\n#> $ distribution_channel <fct> TA/TO, Direct, TA/TO, TA/TO, Direct, TA…\n#> $ is_repeated_guest <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ previous_cancellations <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ previous_bookings_not_canceled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ reserved_room_type <fct> A, D, A, A, F, A, C, B, D, A, A, D, A, …\n#> $ assigned_room_type <fct> A, K, A, A, F, A, C, A, D, A, D, D, A, …\n#> $ booking_changes <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ deposit_type <fct> No_Deposit, No_Deposit, No_Deposit, No_…\n#> $ days_in_waiting_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …\n#> $ customer_type <fct> Transient-Party, Transient, Transient, …\n#> $ average_daily_rate <dbl> 80.75, 170.00, 8.00, 81.00, 157.60, 49.…\n#> $ required_car_parking_spaces <fct> none, none, none, none, none, none, non…\n#> $ total_of_special_requests <dbl> 1, 3, 2, 1, 4, 1, 1, 1, 1, 1, 0, 1, 0, …\n#> $ arrival_date <date> 2016-09-01, 2017-08-25, 2016-11-19, 20…\n\nWe will build a model to predict which actual hotel stays included children and/or babies, and which did not. Our outcome variable children is a factor variable with two levels:\n\nhotels %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> children n prop\n#> <fct> <int> <dbl>\n#> 1 children 4038 0.0808\n#> 2 none 45962 0.919\n\nWe can see that children were only in 8.1% of the reservations. This type of class imbalance can often wreak havoc on an analysis. While there are several methods for combating this issue using recipes (search for steps to upsample or downsample) or other more specialized packages like themis, the analyses shown below analyze the data as-is." + }, + { + "objectID": "start/case-study/index.html#data-split", + "href": "start/case-study/index.html#data-split", + "title": "A predictive modeling case study", + "section": "Data Splitting & Resampling", + "text": "Data Splitting & Resampling\nFor a data splitting strategy, let’s reserve 25% of the stays to the test set. As in our Evaluate your model with resampling article, we know our outcome variable children is pretty imbalanced so we’ll use a stratified random sample:\n\nset.seed(123)\nsplits <- initial_split(hotels, strata = children)\n\nhotel_other <- training(splits)\nhotel_test <- testing(splits)\n\n# training set proportions by children\nhotel_other %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> children n prop\n#> <fct> <int> <dbl>\n#> 1 children 3027 0.0807\n#> 2 none 34473 0.919\n\n# test set proportions by children\nhotel_test %>% \n count(children) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> children n prop\n#> <fct> <int> <dbl>\n#> 1 children 1011 0.0809\n#> 2 none 11489 0.919\n\nIn our articles so far, we’ve relied on 10-fold cross-validation as the primary resampling method using rsample::vfold_cv(). This has created 10 different resamples of the training set (which we further split into analysis and assessment sets), producing 10 different performance metrics that we then aggregated.\nFor this case study, rather than using multiple iterations of resampling, let’s create a single resample called a validation set. In tidymodels, a validation set is treated as a single iteration of resampling. This will be a split from the 37,500 stays that were not used for testing, which we called hotel_other. This split creates two new datasets:\n\nthe set held out for the purpose of measuring performance, called the validation set, and\nthe remaining data used to fit the model, called the training set.\n\n\n\n\n\n\n\n\n\n\nWe’ll use the validation_split() function to allocate 20% of the hotel_other stays to the validation set and 30,000 stays to the training set. This means that our model performance metrics will be computed on a single set of 7,500 hotel stays. This is fairly large, so the amount of data should provide enough precision to be a reliable indicator for how well each model predicts the outcome with a single iteration of resampling.\n\nset.seed(234)\nval_set <- validation_split(hotel_other, \n strata = children, \n prop = 0.80)\nval_set\n#> # Validation Set Split (0.8/0.2) using stratification \n#> # A tibble: 1 × 2\n#> splits id \n#> <list> <chr> \n#> 1 <split [30000/7500]> validation\n\nThis function, like initial_split(), has the same strata argument, which uses stratified sampling to create the resample. This means that we’ll have roughly the same proportions of hotel stays with and without children in our new validation and training sets, as compared to the original hotel_other proportions." + }, + { + "objectID": "start/case-study/index.html#first-model", + "href": "start/case-study/index.html#first-model", + "title": "A predictive modeling case study", + "section": "A first model: penalized logistic regression", + "text": "A first model: penalized logistic regression\nSince our outcome variable children is categorical, logistic regression would be a good first model to start. Let’s use a model that can perform feature selection during training. The glmnet R package fits a generalized linear model via penalized maximum likelihood. This method of estimating the logistic regression slope parameters uses a penalty on the process so that less relevant predictors are driven towards a value of zero. One of the glmnet penalization methods, called the lasso method, can actually set the predictor slopes to zero if a large enough penalty is used.\n\nBuild the model\nTo specify a penalized logistic regression model that uses a feature selection penalty, let’s use the parsnip package with the glmnet engine:\n\nlr_mod <- \n logistic_reg(penalty = tune(), mixture = 1) %>% \n set_engine(\"glmnet\")\n\nWe’ll set the penalty argument to tune() as a placeholder for now. This is a model hyperparameter that we will tune to find the best value for making predictions with our data. Setting mixture to a value of one means that the glmnet model will potentially remove irrelevant predictors and choose a simpler model.\n\n\nCreate the recipe\nLet’s create a recipe to define the preprocessing steps we need to prepare our hotel stays data for this model. It might make sense to create a set of date-based predictors that reflect important components related to the arrival date. We have already introduced a number of useful recipe steps for creating features from dates:\n\nstep_date() creates predictors for the year, month, and day of the week.\nstep_holiday() generates a set of indicator variables for specific holidays. Although we don’t know where these two hotels are located, we do know that the countries for origin for most stays are based in Europe.\nstep_rm() removes variables; here we’ll use it to remove the original date variable since we no longer want it in the model.\n\nAdditionally, all categorical predictors (e.g., distribution_channel, hotel, …) should be converted to dummy variables, and all numeric predictors need to be centered and scaled.\n\nstep_dummy() converts characters or factors (i.e., nominal variables) into one or more numeric binary model terms for the levels of the original data.\nstep_zv() removes indicator variables that only contain a single unique value (e.g. all zeros). This is important because, for penalized models, the predictors should be centered and scaled.\nstep_normalize() centers and scales numeric variables.\n\nPutting all these steps together into a recipe for a penalized logistic regression model, we have:\n\nholidays <- c(\"AllSouls\", \"AshWednesday\", \"ChristmasEve\", \"Easter\", \n \"ChristmasDay\", \"GoodFriday\", \"NewYearsDay\", \"PalmSunday\")\n\nlr_recipe <- \n recipe(children ~ ., data = hotel_other) %>% \n step_date(arrival_date) %>% \n step_holiday(arrival_date, holidays = holidays) %>% \n step_rm(arrival_date) %>% \n step_dummy(all_nominal_predictors()) %>% \n step_zv(all_predictors()) %>% \n step_normalize(all_predictors())\n\n\n\nCreate the workflow\nAs we introduced in Preprocess your data with recipes, let’s bundle the model and recipe into a single workflow() object to make management of the R objects easier:\n\nlr_workflow <- \n workflow() %>% \n add_model(lr_mod) %>% \n add_recipe(lr_recipe)\n\n\n\nCreate the grid for tuning\nBefore we fit this model, we need to set up a grid of penalty values to tune. In our Tune model parameters article, we used dials::grid_regular() to create an expanded grid based on a combination of two hyperparameters. Since we have only one hyperparameter to tune here, we can set the grid up manually using a one-column tibble with 30 candidate values:\n\nlr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30))\n\nlr_reg_grid %>% top_n(-5) # lowest penalty values\n#> Selecting by penalty\n#> # A tibble: 5 × 1\n#> penalty\n#> <dbl>\n#> 1 0.0001 \n#> 2 0.000127\n#> 3 0.000161\n#> 4 0.000204\n#> 5 0.000259\nlr_reg_grid %>% top_n(5) # highest penalty values\n#> Selecting by penalty\n#> # A tibble: 5 × 1\n#> penalty\n#> <dbl>\n#> 1 0.0386\n#> 2 0.0489\n#> 3 0.0621\n#> 4 0.0788\n#> 5 0.1\n\n\n\nTrain and tune the model\nLet’s use tune::tune_grid() to train these 30 penalized logistic regression models. We’ll also save the validation set predictions (via the call to control_grid()) so that diagnostic information can be available after the model fit. The area under the ROC curve will be used to quantify how well the model performs across a continuum of event thresholds (recall that the event rate—the proportion of stays including children— is very low for these data).\n\nlr_res <- \n lr_workflow %>% \n tune_grid(val_set,\n grid = lr_reg_grid,\n control = control_grid(save_pred = TRUE),\n metrics = metric_set(roc_auc))\n\nIt might be easier to visualize the validation set metrics by plotting the area under the ROC curve against the range of penalty values:\n\nlr_plot <- \n lr_res %>% \n collect_metrics() %>% \n ggplot(aes(x = penalty, y = mean)) + \n geom_point() + \n geom_line() + \n ylab(\"Area under the ROC Curve\") +\n scale_x_log10(labels = scales::label_number())\n\nlr_plot \n\n\n\n\n\n\n\n\nThis plots shows us that model performance is generally better at the smaller penalty values. This suggests that the majority of the predictors are important to the model. We also see a steep drop in the area under the ROC curve towards the highest penalty values. This happens because a large enough penalty will remove all predictors from the model, and not surprisingly predictive accuracy plummets with no predictors in the model (recall that an ROC AUC value of 0.50 means that the model does no better than chance at predicting the correct class).\nOur model performance seems to plateau at the smaller penalty values, so going by the roc_auc metric alone could lead us to multiple options for the “best” value for this hyperparameter:\n\ntop_models <-\n lr_res %>% \n show_best(\"roc_auc\", n = 15) %>% \n arrange(penalty) \ntop_models\n#> # A tibble: 15 × 7\n#> penalty .metric .estimator mean n std_err .config \n#> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 0.000127 roc_auc binary 0.872 1 NA Preprocessor1_Model02\n#> 2 0.000161 roc_auc binary 0.872 1 NA Preprocessor1_Model03\n#> 3 0.000204 roc_auc binary 0.873 1 NA Preprocessor1_Model04\n#> 4 0.000259 roc_auc binary 0.873 1 NA Preprocessor1_Model05\n#> 5 0.000329 roc_auc binary 0.874 1 NA Preprocessor1_Model06\n#> 6 0.000418 roc_auc binary 0.874 1 NA Preprocessor1_Model07\n#> 7 0.000530 roc_auc binary 0.875 1 NA Preprocessor1_Model08\n#> 8 0.000672 roc_auc binary 0.875 1 NA Preprocessor1_Model09\n#> 9 0.000853 roc_auc binary 0.876 1 NA Preprocessor1_Model10\n#> 10 0.00108 roc_auc binary 0.876 1 NA Preprocessor1_Model11\n#> 11 0.00137 roc_auc binary 0.876 1 NA Preprocessor1_Model12\n#> 12 0.00174 roc_auc binary 0.876 1 NA Preprocessor1_Model13\n#> 13 0.00221 roc_auc binary 0.876 1 NA Preprocessor1_Model14\n#> 14 0.00281 roc_auc binary 0.875 1 NA Preprocessor1_Model15\n#> 15 0.00356 roc_auc binary 0.873 1 NA Preprocessor1_Model16\n\nEvery candidate model in this tibble likely includes more predictor variables than the model in the row below it. If we used select_best(), it would return candidate model 11 with a penalty value of 0.00137, shown with the dotted line below.\n\n\n\n\n\n\n\n\n\nHowever, we may want to choose a penalty value further along the x-axis, closer to where we start to see the decline in model performance. For example, candidate model 12 with a penalty value of 0.00174 has effectively the same performance as the numerically best model, but might eliminate more predictors. This penalty value is marked by the solid line above. In general, fewer irrelevant predictors is better. If performance is about the same, we’d prefer to choose a higher penalty value.\nLet’s select this value and visualize the validation set ROC curve:\n\nlr_best <- \n lr_res %>% \n collect_metrics() %>% \n arrange(penalty) %>% \n slice(12)\nlr_best\n#> # A tibble: 1 × 7\n#> penalty .metric .estimator mean n std_err .config \n#> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 0.00137 roc_auc binary 0.876 1 NA Preprocessor1_Model12\n\n\nlr_auc <- \n lr_res %>% \n collect_predictions(parameters = lr_best) %>% \n roc_curve(children, .pred_children) %>% \n mutate(model = \"Logistic Regression\")\n\nautoplot(lr_auc)\n\n\n\n\n\n\n\n\nThe level of performance generated by this logistic regression model is good, but not groundbreaking. Perhaps the linear nature of the prediction equation is too limiting for this data set. As a next step, we might consider a highly non-linear model generated using a tree-based ensemble method." + }, + { + "objectID": "start/case-study/index.html#second-model", + "href": "start/case-study/index.html#second-model", + "title": "A predictive modeling case study", + "section": "A second model: tree-based ensemble", + "text": "A second model: tree-based ensemble\nAn effective and low-maintenance modeling technique is a random forest. This model was also used in our Evaluate your model with resampling article. Compared to logistic regression, a random forest model is more flexible. A random forest is an ensemble model typically made up of thousands of decision trees, where each individual tree sees a slightly different version of the training data and learns a sequence of splitting rules to predict new data. Each tree is non-linear, and aggregating across trees makes random forests also non-linear but more robust and stable compared to individual trees. Tree-based models like random forests require very little preprocessing and can effectively handle many types of predictors (sparse, skewed, continuous, categorical, etc.).\n\nBuild the model and improve training time\nAlthough the default hyperparameters for random forests tend to give reasonable results, we’ll plan to tune two hyperparameters that we think could improve performance. Unfortunately, random forest models can be computationally expensive to train and to tune. The computations required for model tuning can usually be easily parallelized to improve training time. The tune package can do parallel processing for you, and allows users to use multiple cores or separate machines to fit models.\nBut, here we are using a single validation set, so parallelization isn’t an option using the tune package. For this specific case study, a good alternative is provided by the engine itself. The ranger package offers a built-in way to compute individual random forest models in parallel. To do this, we need to know the the number of cores we have to work with. We can use the parallel package to query the number of cores on your own computer to understand how much parallelization you can do:\n\ncores <- parallel::detectCores()\ncores\n#> [1] 10\n\nWe have 10 cores to work with. We can pass this information to the ranger engine when we set up our parsnip rand_forest() model. To enable parallel processing, we can pass engine-specific arguments like num.threads to ranger when we set the engine:\n\nrf_mod <- \n rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% \n set_engine(\"ranger\", num.threads = cores) %>% \n set_mode(\"classification\")\n\nThis works well in this modeling context, but it bears repeating: if you use any other resampling method, let tune do the parallel processing for you — we typically do not recommend relying on the modeling engine (like we did here) to do this.\nIn this model, we used tune() as a placeholder for the mtry and min_n argument values, because these are our two hyperparameters that we will tune.\n\n\nCreate the recipe and workflow\nUnlike penalized logistic regression models, random forest models do not require dummy or normalized predictor variables. Nevertheless, we want to do some feature engineering again with our arrival_date variable. As before, the date predictor is engineered so that the random forest model does not need to work hard to tease these potential patterns from the data.\n\nrf_recipe <- \n recipe(children ~ ., data = hotel_other) %>% \n step_date(arrival_date) %>% \n step_holiday(arrival_date) %>% \n step_rm(arrival_date) \n\nAdding this recipe to our parsnip model gives us a new workflow for predicting whether a hotel stay included children and/or babies as guests with a random forest:\n\nrf_workflow <- \n workflow() %>% \n add_model(rf_mod) %>% \n add_recipe(rf_recipe)\n\n\n\nTrain and tune the model\nWhen we set up our parsnip model, we chose two hyperparameters for tuning:\n\nrf_mod\n#> Random Forest Model Specification (classification)\n#> \n#> Main Arguments:\n#> mtry = tune()\n#> trees = 1000\n#> min_n = tune()\n#> \n#> Engine-Specific Arguments:\n#> num.threads = cores\n#> \n#> Computational engine: ranger\n\n# show what will be tuned\nextract_parameter_set_dials(rf_mod)\n#> Collection of 2 parameters for tuning\n#> \n#> identifier type object\n#> mtry mtry nparam[?]\n#> min_n min_n nparam[+]\n#> \n#> Model parameters needing finalization:\n#> # Randomly Selected Predictors ('mtry')\n#> \n#> See `?dials::finalize` or `?dials::update.parameters` for more information.\n\nThe mtry hyperparameter sets the number of predictor variables that each node in the decision tree “sees” and can learn about, so it can range from 1 to the total number of features present; when mtry = all possible features, the model is the same as bagging decision trees. The min_n hyperparameter sets the minimum n to split at any node.\nWe will use a space-filling design to tune, with 25 candidate models:\n\nset.seed(345)\nrf_res <- \n rf_workflow %>% \n tune_grid(val_set,\n grid = 25,\n control = control_grid(save_pred = TRUE),\n metrics = metric_set(roc_auc))\n#> i Creating pre-processing data to finalize unknown parameter: mtry\n\nThe message printed above “Creating pre-processing data to finalize unknown parameter: mtry” is related to the size of the data set. Since mtry depends on the number of predictors in the data set, tune_grid() determines the upper bound for mtry once it receives the data.\nHere are our top 5 random forest models, out of the 25 candidates:\n\nrf_res %>% \n show_best(metric = \"roc_auc\")\n#> # A tibble: 5 × 8\n#> mtry min_n .metric .estimator mean n std_err .config \n#> <int> <int> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 8 7 roc_auc binary 0.926 1 NA Preprocessor1_Model13\n#> 2 12 7 roc_auc binary 0.926 1 NA Preprocessor1_Model01\n#> 3 13 4 roc_auc binary 0.925 1 NA Preprocessor1_Model05\n#> 4 9 12 roc_auc binary 0.924 1 NA Preprocessor1_Model19\n#> 5 6 18 roc_auc binary 0.924 1 NA Preprocessor1_Model24\n\nRight away, we see that these values for area under the ROC look more promising than our top model using penalized logistic regression, which yielded an ROC AUC of 0.876.\nPlotting the results of the tuning process highlights that both mtry (number of predictors at each node) and min_n (minimum number of data points required to keep splitting) should be fairly small to optimize performance. However, the range of the y-axis indicates that the model is very robust to the choice of these parameter values — all but one of the ROC AUC values are greater than 0.90.\n\nautoplot(rf_res)\n\n\n\n\n\n\n\n\nLet’s select the best model according to the ROC AUC metric. Our final tuning parameter values are:\n\nrf_best <- \n rf_res %>% \n select_best(metric = \"roc_auc\")\nrf_best\n#> # A tibble: 1 × 3\n#> mtry min_n .config \n#> <int> <int> <chr> \n#> 1 8 7 Preprocessor1_Model13\n\nTo calculate the data needed to plot the ROC curve, we use collect_predictions(). This is only possible after tuning with control_grid(save_pred = TRUE). In the output, you can see the two columns that hold our class probabilities for predicting hotel stays including and not including children.\n\nrf_res %>% \n collect_predictions()\n#> # A tibble: 187,500 × 8\n#> id .pred_children .pred_none .row mtry min_n children .config \n#> <chr> <dbl> <dbl> <int> <int> <int> <fct> <chr> \n#> 1 validation 0.152 0.848 13 12 7 none Preprocessor…\n#> 2 validation 0.0302 0.970 20 12 7 none Preprocessor…\n#> 3 validation 0.513 0.487 22 12 7 children Preprocessor…\n#> 4 validation 0.0103 0.990 23 12 7 none Preprocessor…\n#> 5 validation 0.0111 0.989 31 12 7 none Preprocessor…\n#> 6 validation 0 1 38 12 7 none Preprocessor…\n#> 7 validation 0 1 39 12 7 none Preprocessor…\n#> 8 validation 0.00325 0.997 50 12 7 none Preprocessor…\n#> 9 validation 0.0241 0.976 54 12 7 none Preprocessor…\n#> 10 validation 0.0441 0.956 57 12 7 children Preprocessor…\n#> # ℹ 187,490 more rows\n\nTo filter the predictions for only our best random forest model, we can use the parameters argument and pass it our tibble with the best hyperparameter values from tuning, which we called rf_best:\n\nrf_auc <- \n rf_res %>% \n collect_predictions(parameters = rf_best) %>% \n roc_curve(children, .pred_children) %>% \n mutate(model = \"Random Forest\")\n\nNow, we can compare the validation set ROC curves for our top penalized logistic regression model and random forest model:\n\nbind_rows(rf_auc, lr_auc) %>% \n ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) + \n geom_path(lwd = 1.5, alpha = 0.8) +\n geom_abline(lty = 3) + \n coord_equal() + \n scale_color_viridis_d(option = \"plasma\", end = .6)\n\n\n\n\n\n\n\n\nThe random forest is uniformly better across event probability thresholds." + }, + { + "objectID": "start/case-study/index.html#last-fit", + "href": "start/case-study/index.html#last-fit", + "title": "A predictive modeling case study", + "section": "The last fit", + "text": "The last fit\nOur goal was to predict which hotel stays included children and/or babies. The random forest model clearly performed better than the penalized logistic regression model, and would be our best bet for predicting hotel stays with and without children. After selecting our best model and hyperparameter values, our last step is to fit the final model on all the rows of data not originally held out for testing (both the training and the validation sets combined), and then evaluate the model performance one last time with the held-out test set.\nWe’ll start by building our parsnip model object again from scratch. We take our best hyperparameter values from our random forest model. When we set the engine, we add a new argument: importance = \"impurity\". This will provide variable importance scores for this last model, which gives some insight into which predictors drive model performance.\n\n# the last model\nlast_rf_mod <- \n rand_forest(mtry = 8, min_n = 7, trees = 1000) %>% \n set_engine(\"ranger\", num.threads = cores, importance = \"impurity\") %>% \n set_mode(\"classification\")\n\n# the last workflow\nlast_rf_workflow <- \n rf_workflow %>% \n update_model(last_rf_mod)\n\n# the last fit\nset.seed(345)\nlast_rf_fit <- \n last_rf_workflow %>% \n last_fit(splits)\n\nlast_rf_fit\n#> # Resampling results\n#> # Manual resampling \n#> # A tibble: 1 × 6\n#> splits id .metrics .notes .predictions .workflow \n#> <list> <chr> <list> <list> <list> <list> \n#> 1 <split [37500/12500]> train/test sp… <tibble> <tibble> <tibble> <workflow>\n\nThis fitted workflow contains everything, including our final metrics based on the test set. So, how did this model do on the test set? Was the validation set a good estimate of future performance?\n\nlast_rf_fit %>% \n collect_metrics()\n#> # A tibble: 2 × 4\n#> .metric .estimator .estimate .config \n#> <chr> <chr> <dbl> <chr> \n#> 1 accuracy binary 0.946 Preprocessor1_Model1\n#> 2 roc_auc binary 0.923 Preprocessor1_Model1\n\nThis ROC AUC value is pretty close to what we saw when we tuned the random forest model with the validation set, which is good news. That means that our estimate of how well our model would perform with new data was not too far off from how well our model actually performed with the unseen test data.\nWe can access those variable importance scores via the .workflow column. We can extract out the fit from the workflow object, and then use the vip package to visualize the variable importance scores for the top 20 features:\n\nlast_rf_fit %>% \n extract_fit_parsnip() %>% \n vip(num_features = 20)\n\n\n\n\n\n\n\n\nThe most important predictors in whether a hotel stay had children or not were the daily cost for the room, the type of room reserved, the time between the creation of the reservation and the arrival date, and the type of room that was ultimately assigned.\nLet’s generate our last ROC curve to visualize. Since the event we are predicting is the first level in the children factor (“children”), we provide roc_curve() with the relevant class probability .pred_children:\n\nlast_rf_fit %>% \n collect_predictions() %>% \n roc_curve(children, .pred_children) %>% \n autoplot()\n\n\n\n\n\n\n\n\nBased on these results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data." + }, + { + "objectID": "start/case-study/index.html#next", + "href": "start/case-study/index.html#next", + "title": "A predictive modeling case study", + "section": "Where to next?", + "text": "Where to next?\nIf you’ve made it to the end of this series of Get Started articles, we hope you feel ready to learn more! You now know the core tidymodels packages and how they fit together. After you are comfortable with the basics we introduced in this series, you can learn how to go farther with tidymodels in your modeling and machine learning projects.\nHere are some more ideas for where to go next:\n\nStudy up on statistics and modeling with our comprehensive books.\nDig deeper into the package documentation sites to find functions that meet your modeling needs. Use the searchable tables to explore what is possible.\nKeep up with the latest about tidymodels packages at the tidyverse blog.\nFind ways to ask for help and contribute to tidymodels to help others.\n\n###\n\nHappy modeling!" + }, + { + "objectID": "start/case-study/index.html#session-info", + "href": "start/case-study/index.html#session-info", + "title": "A predictive modeling case study", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> glmnet * 4.1-7 2023-03-23 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> ranger * 0.15.1 2023-04-03 [1] CRAN (R 4.3.0)\n#> readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> vip * 0.3.2 2020-12-17 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "start/index.html", + "href": "start/index.html", + "title": "Welcome!", + "section": "", + "text": "Here, learn what you need to get started with tidymodels in five articles, starting with how to create a model and ending with a beginning-to-end modeling case study. After you are comfortable with these basics, you can learn how to go farther with tidymodels." + }, + { + "objectID": "start/index.html#if-you-are-new-to-r-or-the-tidyverse", + "href": "start/index.html#if-you-are-new-to-r-or-the-tidyverse", + "title": "Welcome!", + "section": "If you are new to R or the tidyverse", + "text": "If you are new to R or the tidyverse\nTo get the most out of tidymodels, we recommend that you start by learning some basics about R and the tidyverse first, then return here when you feel ready. Here are some resources to start learning:\n\nFinding Your Way To R, from the RStudio Education team.\nLearn the tidyverse, from the tidyverse team.\nStatistical Inference via Data Science: A ModernDive into R and the Tidyverse." + }, + { + "objectID": "start/models/index.html", + "href": "start/models/index.html", + "title": "Build a model", + "section": "", + "text": "How do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the parsnip package, and understand why these functions are designed this way.\nTo use code in this article, you will need to install the following packages: broom.mixed, dotwhisker, readr, rstanarm, and tidymodels.\n\nlibrary(tidymodels) # for the parsnip package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(broom.mixed) # for converting bayesian models to tidy tibbles\nlibrary(dotwhisker) # for visualizing regression results" + }, + { + "objectID": "start/models/index.html#intro", + "href": "start/models/index.html#intro", + "title": "Build a model", + "section": "", + "text": "How do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the parsnip package, and understand why these functions are designed this way.\nTo use code in this article, you will need to install the following packages: broom.mixed, dotwhisker, readr, rstanarm, and tidymodels.\n\nlibrary(tidymodels) # for the parsnip package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(readr) # for importing data\nlibrary(broom.mixed) # for converting bayesian models to tidy tibbles\nlibrary(dotwhisker) # for visualizing regression results" + }, + { + "objectID": "start/models/index.html#data", + "href": "start/models/index.html#data", + "title": "Build a model", + "section": "The Sea Urchins Data", + "text": "The Sea Urchins Data\nLet’s use the data from Constable (1993) to explore how three different feeding regimes affect the size of sea urchins over time. The initial size of the sea urchins at the beginning of the experiment probably affects how big they grow as they are fed.\nTo start, let’s read our urchins data into R, which we’ll do by providing readr::read_csv() with a url where our CSV data is located (“https://tidymodels.org/start/models/urchins.csv”):\n\nurchins <-\n # Data were assembled for a tutorial \n # at https://www.flutterbys.com.au/stats/tut/tut7.5a.html\n read_csv(\"https://tidymodels.org/start/models/urchins.csv\") %>% \n # Change the names to be a little more verbose\n setNames(c(\"food_regime\", \"initial_volume\", \"width\")) %>% \n # Factors are very helpful for modeling, so we convert one column\n mutate(food_regime = factor(food_regime, levels = c(\"Initial\", \"Low\", \"High\")))\n#> Rows: 72 Columns: 3\n#> ── Column specification ──────────────────────────────────────────────\n#> Delimiter: \",\"\n#> chr (1): TREAT\n#> dbl (2): IV, SUTW\n#> \n#> ℹ Use `spec()` to retrieve the full column specification for this data.\n#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.\n\nLet’s take a quick look at the data:\n\nurchins\n#> # A tibble: 72 × 3\n#> food_regime initial_volume width\n#> <fct> <dbl> <dbl>\n#> 1 Initial 3.5 0.01 \n#> 2 Initial 5 0.02 \n#> 3 Initial 8 0.061\n#> 4 Initial 10 0.051\n#> 5 Initial 13 0.041\n#> 6 Initial 13 0.061\n#> 7 Initial 15 0.041\n#> 8 Initial 15 0.071\n#> 9 Initial 16 0.092\n#> 10 Initial 17 0.051\n#> # ℹ 62 more rows\n\nThe urchins data is a tibble. If you are new to tibbles, the best place to start is the tibbles chapter in R for Data Science. For each of the 72 urchins, we know their:\n\nexperimental feeding regime group (food_regime: either Initial, Low, or High),\nsize in milliliters at the start of the experiment (initial_volume), and\nsuture width at the end of the experiment (width).\n\nAs a first step in modeling, it’s always a good idea to plot the data:\n\nggplot(urchins,\n aes(x = initial_volume, \n y = width, \n group = food_regime, \n col = food_regime)) + \n geom_point() + \n geom_smooth(method = lm, se = FALSE) +\n scale_color_viridis_d(option = \"plasma\", end = .7)\n#> `geom_smooth()` using formula = 'y ~ x'\n\n\n\n\n\n\n\n\nWe can see that urchins that were larger in volume at the start of the experiment tended to have wider sutures at the end, but the slopes of the lines look different so this effect may depend on the feeding regime condition." + }, + { + "objectID": "start/models/index.html#build-model", + "href": "start/models/index.html#build-model", + "title": "Build a model", + "section": "Build and fit a model", + "text": "Build and fit a model\nA standard two-way analysis of variance (ANOVA) model makes sense for this dataset because we have both a continuous predictor and a categorical predictor. Since the slopes appear to be different for at least two of the feeding regimes, let’s build a model that allows for two-way interactions. Specifying an R formula with our variables in this way:\n\nwidth ~ initial_volume * food_regime\n\nallows our regression model depending on initial volume to have separate slopes and intercepts for each food regime.\nFor this kind of model, ordinary least squares is a good initial approach. With tidymodels, we start by specifying the functional form of the model that we want using the parsnip package. Since there is a numeric outcome and the model should be linear with slopes and intercepts, the model type is “linear regression”. We can declare this with:\n\nlinear_reg()\n#> Linear Regression Model Specification (regression)\n#> \n#> Computational engine: lm\n\nThat is pretty underwhelming since, on its own, it doesn’t really do much. However, now that the type of model has been specified, we can think about a method for fitting or training the model, the model engine. The engine value is often a mash-up of the software that can be used to fit or train the model as well as the estimation method. The default for linear_reg() is \"lm\" for ordinary least squares, as you can see above. We could set a non-default option instead:\n\nlinear_reg() %>% \n set_engine(\"keras\")\n#> Linear Regression Model Specification (regression)\n#> \n#> Computational engine: keras\n\nThe documentation page for linear_reg() lists all the possible engines. We’ll save our model object using the default engine as lm_mod.\n\nlm_mod <- linear_reg()\n\nFrom here, the model can be estimated or trained using the fit() function:\n\nlm_fit <- \n lm_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\nlm_fit\n#> parsnip model object\n#> \n#> \n#> Call:\n#> stats::lm(formula = width ~ initial_volume * food_regime, data = data)\n#> \n#> Coefficients:\n#> (Intercept) initial_volume \n#> 0.0331216 0.0015546 \n#> food_regimeLow food_regimeHigh \n#> 0.0197824 0.0214111 \n#> initial_volume:food_regimeLow initial_volume:food_regimeHigh \n#> -0.0012594 0.0005254\n\nPerhaps our analysis requires a description of the model parameter estimates and their statistical properties. Although the summary() function for lm objects can provide that, it gives the results back in an unwieldy format. Many models have a tidy() method that provides the summary results in a more predictable and useful format (e.g. a data frame with standard column names):\n\ntidy(lm_fit)\n#> # A tibble: 6 × 5\n#> term estimate std.error statistic p.value\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) 0.0331 0.00962 3.44 0.00100 \n#> 2 initial_volume 0.00155 0.000398 3.91 0.000222\n#> 3 food_regimeLow 0.0198 0.0130 1.52 0.133 \n#> 4 food_regimeHigh 0.0214 0.0145 1.47 0.145 \n#> 5 initial_volume:food_regimeLow -0.00126 0.000510 -2.47 0.0162 \n#> 6 initial_volume:food_regimeHigh 0.000525 0.000702 0.748 0.457\n\nThis kind of output can be used to generate a dot-and-whisker plot of our regression results using the dotwhisker package:\n\ntidy(lm_fit) %>% \n dwplot(dot_args = list(size = 2, color = \"black\"),\n whisker_args = list(color = \"black\"),\n vline = geom_vline(xintercept = 0, colour = \"grey50\", linetype = 2))" + }, + { + "objectID": "start/models/index.html#predict-model", + "href": "start/models/index.html#predict-model", + "title": "Build a model", + "section": "Use a model to predict", + "text": "Use a model to predict\nThis fitted object lm_fit has the lm model output built-in, which you can access with lm_fit$fit, but there are some benefits to using the fitted parsnip model object when it comes to predicting.\nSuppose that, for a publication, it would be particularly interesting to make a plot of the mean body size for urchins that started the experiment with an initial volume of 20ml. To create such a graph, we start with some new example data that we will make predictions for, to show in our graph:\n\nnew_points <- expand.grid(initial_volume = 20, \n food_regime = c(\"Initial\", \"Low\", \"High\"))\nnew_points\n#> initial_volume food_regime\n#> 1 20 Initial\n#> 2 20 Low\n#> 3 20 High\n\nTo get our predicted results, we can use the predict() function to find the mean values at 20ml.\nIt is also important to communicate the variability, so we also need to find the predicted confidence intervals. If we had used lm() to fit the model directly, a few minutes of reading the documentation page for predict.lm() would explain how to do this. However, if we decide to use a different model to estimate urchin size (spoiler: we will!), it is likely that a completely different syntax would be required.\nInstead, with tidymodels, the types of predicted values are standardized so that we can use the same syntax to get these values.\nFirst, let’s generate the mean body width values:\n\nmean_pred <- predict(lm_fit, new_data = new_points)\nmean_pred\n#> # A tibble: 3 × 1\n#> .pred\n#> <dbl>\n#> 1 0.0642\n#> 2 0.0588\n#> 3 0.0961\n\nWhen making predictions, the tidymodels convention is to always produce a tibble of results with standardized column names. This makes it easy to combine the original data and the predictions in a usable format:\n\nconf_int_pred <- predict(lm_fit, \n new_data = new_points, \n type = \"conf_int\")\nconf_int_pred\n#> # A tibble: 3 × 2\n#> .pred_lower .pred_upper\n#> <dbl> <dbl>\n#> 1 0.0555 0.0729\n#> 2 0.0499 0.0678\n#> 3 0.0870 0.105\n\n# Now combine: \nplot_data <- \n new_points %>% \n bind_cols(mean_pred) %>% \n bind_cols(conf_int_pred)\n\n# and plot:\nggplot(plot_data, aes(x = food_regime)) + \n geom_point(aes(y = .pred)) + \n geom_errorbar(aes(ymin = .pred_lower, \n ymax = .pred_upper),\n width = .2) + \n labs(y = \"urchin size\")" + }, + { + "objectID": "start/models/index.html#new-engine", + "href": "start/models/index.html#new-engine", + "title": "Build a model", + "section": "Model with a different engine", + "text": "Model with a different engine\nEvery one on your team is happy with that plot except that one person who just read their first book on Bayesian analysis. They are interested in knowing if the results would be different if the model were estimated using a Bayesian approach. In such an analysis, a prior distribution needs to be declared for each model parameter that represents the possible values of the parameters (before being exposed to the observed data). After some discussion, the group agrees that the priors should be bell-shaped but, since no one has any idea what the range of values should be, to take a conservative approach and make the priors wide using a Cauchy distribution (which is the same as a t-distribution with a single degree of freedom).\nThe documentation on the rstanarm package shows us that the stan_glm() function can be used to estimate this model, and that the function arguments that need to be specified are called prior and prior_intercept. It turns out that linear_reg() has a stan engine. Since these prior distribution arguments are specific to the Stan software, they are passed as arguments to parsnip::set_engine(). After that, the same exact fit() call is used:\n\n# set the prior distribution\nprior_dist <- rstanarm::student_t(df = 1)\n\nset.seed(123)\n\n# make the parsnip model\nbayes_mod <- \n linear_reg() %>% \n set_engine(\"stan\", \n prior_intercept = prior_dist, \n prior = prior_dist) \n\n# train the model\nbayes_fit <- \n bayes_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\n\nprint(bayes_fit, digits = 5)\n#> parsnip model object\n#> \n#> stan_glm\n#> family: gaussian [identity]\n#> formula: width ~ initial_volume * food_regime\n#> observations: 72\n#> predictors: 6\n#> ------\n#> Median MAD_SD \n#> (Intercept) 0.03336 0.01003\n#> initial_volume 0.00156 0.00040\n#> food_regimeLow 0.01963 0.01308\n#> food_regimeHigh 0.02120 0.01421\n#> initial_volume:food_regimeLow -0.00126 0.00051\n#> initial_volume:food_regimeHigh 0.00054 0.00070\n#> \n#> Auxiliary parameter(s):\n#> Median MAD_SD \n#> sigma 0.02129 0.00188\n#> \n#> ------\n#> * For help interpreting the printed output see ?print.stanreg\n#> * For info on the priors used see ?prior_summary.stanreg\n\nThis kind of Bayesian analysis (like many models) involves randomly generated numbers in its fitting procedure. We can use set.seed() to ensure that the same (pseudo-)random numbers are generated each time we run this code. The number 123 isn’t special or related to our data; it is just a “seed” used to choose random numbers.\nTo update the parameter table, the tidy() method is once again used:\n\ntidy(bayes_fit, conf.int = TRUE)\n#> # A tibble: 6 × 5\n#> term estimate std.error conf.low conf.high\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) 0.0334 0.0100 0.0179 0.0493 \n#> 2 initial_volume 0.00156 0.000404 0.000876 0.00219 \n#> 3 food_regimeLow 0.0196 0.0131 -0.00271 0.0414 \n#> 4 food_regimeHigh 0.0212 0.0142 -0.00289 0.0455 \n#> 5 initial_volume:food_regimeLow -0.00126 0.000515 -0.00213 -0.000364\n#> 6 initial_volume:food_regimeHigh 0.000541 0.000696 -0.000669 0.00174\n\nA goal of the tidymodels packages is that the interfaces to common tasks are standardized (as seen in the tidy() results above). The same is true for getting predictions; we can use the same code even though the underlying packages use very different syntax:\n\nbayes_plot_data <- \n new_points %>% \n bind_cols(predict(bayes_fit, new_data = new_points)) %>% \n bind_cols(predict(bayes_fit, new_data = new_points, type = \"conf_int\"))\n\nggplot(bayes_plot_data, aes(x = food_regime)) + \n geom_point(aes(y = .pred)) + \n geom_errorbar(aes(ymin = .pred_lower, ymax = .pred_upper), width = .2) + \n labs(y = \"urchin size\") + \n ggtitle(\"Bayesian model with t(1) prior distribution\")\n\n\n\n\n\n\n\n\nThis isn’t very different from the non-Bayesian results (except in interpretation).\n\n\n\n\n\n\nNote\n\n\n\nThe parsnip package can work with many model types, engines, and arguments. Check out tidymodels.org/find/parsnip to see what is available." + }, + { + "objectID": "start/models/index.html#why", + "href": "start/models/index.html#why", + "title": "Build a model", + "section": "Why does it work that way?", + "text": "Why does it work that way?\nThe extra step of defining the model using a function like linear_reg() might seem superfluous since a call to lm() is much more succinct. However, the problem with standard modeling functions is that they don’t separate what you want to do from the execution. For example, the process of executing a formula has to happen repeatedly across model calls even when the formula does not change; we can’t recycle those computations.\nAlso, using the tidymodels framework, we can do some interesting things by incrementally creating a model (instead of using single function call). Model tuning with tidymodels uses the specification of the model to declare what parts of the model should be tuned. That would be very difficult to do if linear_reg() immediately fit the model.\nIf you are familiar with the tidyverse, you may have noticed that our modeling code uses the magrittr pipe (%>%). With dplyr and other tidyverse packages, the pipe works well because all of the functions take the data as the first argument. For example:\n\nurchins %>% \n group_by(food_regime) %>% \n summarize(med_vol = median(initial_volume))\n#> # A tibble: 3 × 2\n#> food_regime med_vol\n#> <fct> <dbl>\n#> 1 Initial 20.5\n#> 2 Low 19.2\n#> 3 High 15\n\nwhereas the modeling code uses the pipe to pass around the model object:\n\nbayes_mod %>% \n fit(width ~ initial_volume * food_regime, data = urchins)\n\nThis may seem jarring if you have used dplyr a lot, but it is extremely similar to how ggplot2 operates:\n\nggplot(urchins,\n aes(initial_volume, width)) + # returns a ggplot object \n geom_jitter() + # same\n geom_smooth(method = lm, se = FALSE) + # same \n labs(x = \"Volume\", y = \"Width\") # etc" + }, + { + "objectID": "start/models/index.html#session-info", + "href": "start/models/index.html#session-info", + "title": "Build a model", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> broom.mixed * 0.2.9.4 2022-04-17 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dotwhisker * 0.7.4 2021-09-02 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> readr * 2.1.4 2023-02-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> rstanarm * 2.21.4 2023-04-08 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "start/recipes/index.html", + "href": "start/recipes/index.html", + "title": "Preprocess your data with recipes", + "section": "", + "text": "In our Build a Model article, we learned how to specify and train models with different engines using the parsnip package. In this article, we’ll explore another tidymodels package, recipes, which is designed to help you preprocess your data before training your model. Recipes are built as a series of preprocessing steps, such as:\n\nconverting qualitative predictors to indicator variables (also known as dummy variables),\ntransforming data to be on a different scale (e.g., taking the logarithm of a variable),\ntransforming whole groups of predictors together,\nextracting key features from raw variables (e.g., getting the day of the week out of a date variable),\n\nand so on. If you are familiar with R’s formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling.\nTo use code in this article, you will need to install the following packages: nycflights13, skimr, and tidymodels.\n\nlibrary(tidymodels) # for the recipes package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(nycflights13) # for flight data\nlibrary(skimr) # for variable summaries" + }, + { + "objectID": "start/recipes/index.html#intro", + "href": "start/recipes/index.html#intro", + "title": "Preprocess your data with recipes", + "section": "", + "text": "In our Build a Model article, we learned how to specify and train models with different engines using the parsnip package. In this article, we’ll explore another tidymodels package, recipes, which is designed to help you preprocess your data before training your model. Recipes are built as a series of preprocessing steps, such as:\n\nconverting qualitative predictors to indicator variables (also known as dummy variables),\ntransforming data to be on a different scale (e.g., taking the logarithm of a variable),\ntransforming whole groups of predictors together,\nextracting key features from raw variables (e.g., getting the day of the week out of a date variable),\n\nand so on. If you are familiar with R’s formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling.\nTo use code in this article, you will need to install the following packages: nycflights13, skimr, and tidymodels.\n\nlibrary(tidymodels) # for the recipes package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(nycflights13) # for flight data\nlibrary(skimr) # for variable summaries" + }, + { + "objectID": "start/recipes/index.html#data", + "href": "start/recipes/index.html#data", + "title": "Preprocess your data with recipes", + "section": "The New York City flight data", + "text": "The New York City flight data\nLet’s use the nycflights13 data to predict whether a plane arrives more than 30 minutes late. This data set contains information on 325,819 flights departing near New York City in 2013. Let’s start by loading the data and making a few changes to the variables:\n\nset.seed(123)\n\nflight_data <- \n flights %>% \n mutate(\n # Convert the arrival delay to a factor\n arr_delay = ifelse(arr_delay >= 30, \"late\", \"on_time\"),\n arr_delay = factor(arr_delay),\n # We will use the date (not date-time) in the recipe below\n date = lubridate::as_date(time_hour)\n ) %>% \n # Include the weather data\n inner_join(weather, by = c(\"origin\", \"time_hour\")) %>% \n # Only retain the specific columns we will use\n select(dep_time, flight, origin, dest, air_time, distance, \n carrier, date, arr_delay, time_hour) %>% \n # Exclude missing data\n na.omit() %>% \n # For creating models, it is better to have qualitative columns\n # encoded as factors (instead of character strings)\n mutate_if(is.character, as.factor)\n\nWe can see that about 16% of the flights in this data set arrived more than 30 minutes late.\n\nflight_data %>% \n count(arr_delay) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> arr_delay n prop\n#> <fct> <int> <dbl>\n#> 1 late 52540 0.161\n#> 2 on_time 273279 0.839\n\nBefore we start building up our recipe, let’s take a quick look at a few specific variables that will be important for both preprocessing and modeling.\nFirst, notice that the variable we created called arr_delay is a factor variable; it is important that our outcome variable for training a logistic regression model is a factor.\n\nglimpse(flight_data)\n#> Rows: 325,819\n#> Columns: 10\n#> $ dep_time <int> 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 558, …\n#> $ flight <int> 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49, 71…\n#> $ origin <fct> EWR, LGA, JFK, JFK, LGA, EWR, EWR, LGA, JFK, LGA, JFK, JFK, …\n#> $ dest <fct> IAH, IAH, MIA, BQN, ATL, ORD, FLL, IAD, MCO, ORD, PBI, TPA, …\n#> $ air_time <dbl> 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158, 3…\n#> $ distance <dbl> 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 1028,…\n#> $ carrier <fct> UA, UA, AA, B6, DL, UA, B6, EV, B6, AA, B6, B6, UA, UA, AA, …\n#> $ date <date> 2013-01-01, 2013-01-01, 2013-01-01, 2013-01-01, 2013-01-01,…\n#> $ arr_delay <fct> on_time, on_time, late, on_time, on_time, on_time, on_time, …\n#> $ time_hour <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:…\n\nSecond, there are two variables that we don’t want to use as predictors in our model, but that we would like to retain as identification variables that can be used to troubleshoot poorly predicted data points. These are flight, a numeric value, and time_hour, a date-time value.\nThird, there are 104 flight destinations contained in dest and 16 distinct carriers.\n\nflight_data %>% \n skimr::skim(dest, carrier) \n\n\n\n\nData summary\n\n\n\n\nName\n\n\nPiped data\n\n\n\n\nNumber of rows\n\n\n325819\n\n\n\n\nNumber of columns\n\n\n10\n\n\n\n\n_______________________\n\n\n\n\n\n\nColumn type frequency:\n\n\n\n\n\n\nfactor\n\n\n2\n\n\n\n\n________________________\n\n\n\n\n\n\nGroup variables\n\n\nNone\n\n\n\n\n\nVariable type: factor\n\n\n\n\n\nskim_variable\n\n\nn_missing\n\n\ncomplete_rate\n\n\nordered\n\n\nn_unique\n\n\ntop_counts\n\n\n\n\n\n\ndest\n\n\n0\n\n\n1\n\n\nFALSE\n\n\n104\n\n\nATL: 16771, ORD: 16507, LAX: 15942, BOS: 14948\n\n\n\n\ncarrier\n\n\n0\n\n\n1\n\n\nFALSE\n\n\n16\n\n\nUA: 57489, B6: 53715, EV: 50868, DL: 47465\n\n\n\n\n\n\n\nBecause we’ll be using a simple logistic regression model, the variables dest and carrier will be converted to dummy variables. However, some of these values do not occur very frequently and this could complicate our analysis. We’ll discuss specific steps later in this article that we can add to our recipe to address this issue before modeling." + }, + { + "objectID": "start/recipes/index.html#data-split", + "href": "start/recipes/index.html#data-split", + "title": "Preprocess your data with recipes", + "section": "Data splitting", + "text": "Data splitting\nTo get started, let’s split this single dataset into two: a training set and a testing set. We’ll keep most of the rows in the original dataset (subset chosen randomly) in the training set. The training data will be used to fit the model, and the testing set will be used to measure model performance.\nTo do this, we can use the rsample package to create an object that contains the information on how to split the data, and then two more rsample functions to create data frames for the training and testing sets:\n\n# Fix the random numbers by setting the seed \n# This enables the analysis to be reproducible when random numbers are used \nset.seed(222)\n# Put 3/4 of the data into the training set \ndata_split <- initial_split(flight_data, prop = 3/4)\n\n# Create data frames for the two sets:\ntrain_data <- training(data_split)\ntest_data <- testing(data_split)" + }, + { + "objectID": "start/recipes/index.html#recipe", + "href": "start/recipes/index.html#recipe", + "title": "Preprocess your data with recipes", + "section": "Create recipe and roles", + "text": "Create recipe and roles\nTo get started, let’s create a recipe for a simple logistic regression model. Before training the model, we can use a recipe to create a few new predictors and conduct some preprocessing required by the model.\nLet’s initiate a new recipe:\n\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) \n\nThe recipe() function as we used it here has two arguments:\n\nA formula. Any variable on the left-hand side of the tilde (~) is considered the model outcome (here, arr_delay). On the right-hand side of the tilde are the predictors. Variables may be listed by name, or you can use the dot (.) to indicate all other variables as predictors.\nThe data. A recipe is associated with the data set used to create the model. This will typically be the training set, so data = train_data here. Naming a data set doesn’t actually change the data itself; it is only used to catalog the names of the variables and their types, like factors, integers, dates, etc.\n\nNow we can add roles to this recipe. We can use the update_role() function to let recipes know that flight and time_hour are variables with a custom role that we called \"ID\" (a role can have any character value). Whereas our formula included all variables in the training set other than arr_delay as predictors, this tells the recipe to keep these two variables but not use them as either outcomes or predictors.\n\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") \n\nThis step of adding roles to a recipe is optional; the purpose of using it here is that those two variables can be retained in the data but not included in the model. This can be convenient when, after the model is fit, we want to investigate some poorly predicted value. These ID columns will be available and can be used to try to understand what went wrong.\nTo get the current set of variables and roles, use the summary() function:\n\nsummary(flights_rec)\n#> # A tibble: 10 × 4\n#> variable type role source \n#> <chr> <list> <chr> <chr> \n#> 1 dep_time <chr [2]> predictor original\n#> 2 flight <chr [2]> ID original\n#> 3 origin <chr [3]> predictor original\n#> 4 dest <chr [3]> predictor original\n#> 5 air_time <chr [2]> predictor original\n#> 6 distance <chr [2]> predictor original\n#> 7 carrier <chr [3]> predictor original\n#> 8 date <chr [1]> predictor original\n#> 9 time_hour <chr [1]> ID original\n#> 10 arr_delay <chr [3]> outcome original" + }, + { + "objectID": "start/recipes/index.html#features", + "href": "start/recipes/index.html#features", + "title": "Preprocess your data with recipes", + "section": "Create features", + "text": "Create features\nNow we can start adding steps onto our recipe using the pipe operator. Perhaps it is reasonable for the date of the flight to have an effect on the likelihood of a late arrival. A little bit of feature engineering might go a long way to improving our model. How should the date be encoded into the model? The date column has an R date object so including that column “as is” will mean that the model will convert it to a numeric format equal to the number of days after a reference date:\n\nflight_data %>% \n distinct(date) %>% \n mutate(numeric_date = as.numeric(date)) \n#> # A tibble: 364 × 2\n#> date numeric_date\n#> <date> <dbl>\n#> 1 2013-01-01 15706\n#> 2 2013-01-02 15707\n#> 3 2013-01-03 15708\n#> 4 2013-01-04 15709\n#> 5 2013-01-05 15710\n#> 6 2013-01-06 15711\n#> 7 2013-01-07 15712\n#> 8 2013-01-08 15713\n#> 9 2013-01-09 15714\n#> 10 2013-01-10 15715\n#> # ℹ 354 more rows\n\nIt’s possible that the numeric date variable is a good option for modeling; perhaps the model would benefit from a linear trend between the log-odds of a late arrival and the numeric date variable. However, it might be better to add model terms derived from the date that have a better potential to be important to the model. For example, we could derive the following meaningful features from the single date variable:\n\nthe day of the week,\nthe month, and\nwhether or not the date corresponds to a holiday.\n\nLet’s do all three of these by adding steps to our recipe:\n\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE)\n\nWhat do each of these steps do?\n\nWith step_date(), we created two new factor columns with the appropriate day of the week and the month.\nWith step_holiday(), we created a binary variable indicating whether the current date is a holiday or not. The argument value of timeDate::listHolidays(\"US\") uses the timeDate package to list the 18 standard US holidays.\nWith keep_original_cols = FALSE, we remove the original date variable since we no longer want it in the model. Many recipe steps that create new variables have this argument.\n\nNext, we’ll turn our attention to the variable types of our predictors. Because we plan to train a logistic regression model, we know that predictors will ultimately need to be numeric, as opposed to nominal data like strings and factor variables. In other words, there may be a difference in how we store our data (in factors inside a data frame), and how the underlying equations require them (a purely numeric matrix).\nFor factors like dest and origin, standard practice is to convert them into dummy or indicator variables to make them numeric. These are binary values for each level of the factor. For example, our origin variable has values of \"EWR\", \"JFK\", and \"LGA\". The standard dummy variable encoding, shown below, will create two numeric columns of the data that are 1 when the originating airport is \"JFK\" or \"LGA\" and zero otherwise, respectively.\n\n\n\n\n\norigin\norigin_JFK\norigin_LGA\n\n\n\n\nJFK\n1\n0\n\n\nEWR\n0\n0\n\n\nLGA\n0\n1\n\n\n\n\n\n\n\nBut, unlike the standard model formula methods in R, a recipe does not automatically create these dummy variables for you; you’ll need to tell your recipe to add this step. This is for two reasons. First, many models do not require numeric predictors, so dummy variables may not always be preferred. Second, recipes can also be used for purposes outside of modeling, where non-dummy versions of the variables may work better. For example, you may want to make a table or a plot with a variable as a single factor. For those reasons, you need to explicitly tell recipes to create dummy variables using step_dummy():\n\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE) %>% \n step_dummy(all_nominal_predictors())\n\nHere, we did something different than before: instead of applying a step to an individual variable, we used selectors to apply this recipe step to several variables at once, all_nominal_predictors(). The selector functions can be combined to select intersections of variables.\nAt this stage in the recipe, this step selects the origin, dest, and carrier variables. It also includes two new variables, date_dow and date_month, that were created by the earlier step_date().\nMore generally, the recipe selectors mean that you don’t always have to apply steps to individual variables one at a time. Since a recipe knows the variable type and role of each column, they can also be selected (or dropped) using this information.\nWe need one final step to add to our recipe. Since carrier and dest have some infrequently occurring factor values, it is possible that dummy variables might be created for values that don’t exist in the training set. For example, there is one destination that is only in the test set:\n\ntest_data %>% \n distinct(dest) %>% \n anti_join(train_data)\n#> Joining with `by = join_by(dest)`\n#> # A tibble: 1 × 1\n#> dest \n#> <fct>\n#> 1 LEX\n\nWhen the recipe is applied to the training set, a column is made for LEX because the factor levels come from flight_data (not the training set), but this column will contain all zeros. This is a “zero-variance predictor” that has no information within the column. While some R functions will not produce an error for such predictors, it usually causes warnings and other issues. step_zv() will remove columns from the data when the training set data have a single value, so it is added to the recipe after step_dummy():\n\nflights_rec <- \n recipe(arr_delay ~ ., data = train_data) %>% \n update_role(flight, time_hour, new_role = \"ID\") %>% \n step_date(date, features = c(\"dow\", \"month\")) %>% \n step_holiday(date, \n holidays = timeDate::listHolidays(\"US\"), \n keep_original_cols = FALSE) %>% \n step_dummy(all_nominal_predictors()) %>% \n step_zv(all_predictors())\n\nNow we’ve created a specification of what should be done with the data. How do we use the recipe we made?" + }, + { + "objectID": "start/recipes/index.html#fit-workflow", + "href": "start/recipes/index.html#fit-workflow", + "title": "Preprocess your data with recipes", + "section": "Fit a model with a recipe", + "text": "Fit a model with a recipe\nLet’s use logistic regression to model the flight data. As we saw in Build a Model, we start by building a model specification using the parsnip package:\n\nlr_mod <- \n logistic_reg() %>% \n set_engine(\"glm\")\n\nWe will want to use our recipe across several steps as we train and test our model. We will:\n\nProcess the recipe using the training set: This involves any estimation or calculations based on the training set. For our recipe, the training set will be used to determine which predictors should be converted to dummy variables and which predictors will have zero-variance in the training set, and should be slated for removal.\nApply the recipe to the training set: We create the final predictor set on the training set.\nApply the recipe to the test set: We create the final predictor set on the test set. Nothing is recomputed and no information from the test set is used here; the dummy variable and zero-variance results from the training set are applied to the test set.\n\nTo simplify this process, we can use a model workflow, which pairs a model and recipe together. This is a straightforward approach because different recipes are often needed for different models, so when a model and recipe are bundled, it becomes easier to train and test workflows. We’ll use the workflows package from tidymodels to bundle our parsnip model (lr_mod) with our recipe (flights_rec).\n\nflights_wflow <- \n workflow() %>% \n add_model(lr_mod) %>% \n add_recipe(flights_rec)\n\nflights_wflow\n#> ══ Workflow ══════════════════════════════════════════════════════════\n#> Preprocessor: Recipe\n#> Model: logistic_reg()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> 4 Recipe Steps\n#> \n#> • step_date()\n#> • step_holiday()\n#> • step_dummy()\n#> • step_zv()\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> Logistic Regression Model Specification (classification)\n#> \n#> Computational engine: glm\n\nNow, there is a single function that can be used to prepare the recipe and train the model from the resulting predictors:\n\nflights_fit <- \n flights_wflow %>% \n fit(data = train_data)\n\nThis object has the finalized recipe and fitted model objects inside. You may want to extract the model or recipe objects from the workflow. To do this, you can use the helper functions extract_fit_parsnip() and extract_recipe(). For example, here we pull the fitted model object then use the broom::tidy() function to get a tidy tibble of model coefficients:\n\nflights_fit %>% \n extract_fit_parsnip() %>% \n tidy()\n#> # A tibble: 158 × 5\n#> term estimate std.error statistic p.value\n#> <chr> <dbl> <dbl> <dbl> <dbl>\n#> 1 (Intercept) 7.26 2.73 2.66 7.75e- 3\n#> 2 dep_time -0.00166 0.0000141 -118. 0 \n#> 3 air_time -0.0440 0.000563 -78.2 0 \n#> 4 distance 0.00508 0.00150 3.38 7.13e- 4\n#> 5 date_USChristmasDay 1.35 0.178 7.59 3.32e-14\n#> 6 date_USColumbusDay 0.721 0.170 4.23 2.33e- 5\n#> 7 date_USCPulaskisBirthday 0.804 0.139 5.78 7.38e- 9\n#> 8 date_USDecorationMemorialDay 0.582 0.117 4.96 7.22e- 7\n#> 9 date_USElectionDay 0.945 0.190 4.97 6.73e- 7\n#> 10 date_USGoodFriday 1.24 0.167 7.44 1.04e-13\n#> # ℹ 148 more rows" + }, + { + "objectID": "start/recipes/index.html#predict-workflow", + "href": "start/recipes/index.html#predict-workflow", + "title": "Preprocess your data with recipes", + "section": "Use a trained workflow to predict", + "text": "Use a trained workflow to predict\nOur goal was to predict whether a plane arrives more than 30 minutes late. We have just:\n\nBuilt the model (lr_mod),\nCreated a preprocessing recipe (flights_rec),\nBundled the model and recipe (flights_wflow), and\nTrained our workflow using a single call to fit().\n\nThe next step is to use the trained workflow (flights_fit) to predict with the unseen test data, which we will do with a single call to predict(). The predict() method applies the recipe to the new data, then passes them to the fitted model.\n\npredict(flights_fit, test_data)\n#> # A tibble: 81,455 × 1\n#> .pred_class\n#> <fct> \n#> 1 on_time \n#> 2 on_time \n#> 3 on_time \n#> 4 on_time \n#> 5 on_time \n#> 6 on_time \n#> 7 on_time \n#> 8 on_time \n#> 9 on_time \n#> 10 on_time \n#> # ℹ 81,445 more rows\n\nBecause our outcome variable here is a factor, the output from predict() returns the predicted class: late versus on_time. But, let’s say we want the predicted class probabilities for each flight instead. To return those, we can specify type = \"prob\" when we use predict() or use augment() with the model plus test data to save them together:\n\nflights_aug <- \n augment(flights_fit, test_data)\n\n# The data look like: \nflights_aug %>%\n select(arr_delay, time_hour, flight, .pred_class, .pred_on_time)\n#> # A tibble: 81,455 × 5\n#> arr_delay time_hour flight .pred_class .pred_on_time\n#> <fct> <dttm> <int> <fct> <dbl>\n#> 1 on_time 2013-01-01 05:00:00 1545 on_time 0.945\n#> 2 on_time 2013-01-01 05:00:00 1714 on_time 0.949\n#> 3 on_time 2013-01-01 06:00:00 507 on_time 0.964\n#> 4 on_time 2013-01-01 06:00:00 5708 on_time 0.961\n#> 5 on_time 2013-01-01 06:00:00 71 on_time 0.962\n#> 6 on_time 2013-01-01 06:00:00 194 on_time 0.975\n#> 7 on_time 2013-01-01 06:00:00 1124 on_time 0.963\n#> 8 on_time 2013-01-01 05:00:00 1806 on_time 0.981\n#> 9 on_time 2013-01-01 06:00:00 1187 on_time 0.935\n#> 10 on_time 2013-01-01 06:00:00 4650 on_time 0.931\n#> # ℹ 81,445 more rows\n\nNow that we have a tibble with our predicted class probabilities, how will we evaluate the performance of our workflow? We can see from these first few rows that our model predicted these 5 on time flights correctly because the values of .pred_on_time are p > .50. But we also know that we have 81,455 rows total to predict. We would like to calculate a metric that tells how well our model predicted late arrivals, compared to the true status of our outcome variable, arr_delay.\nLet’s use the area under the ROC curve as our metric, computed using roc_curve() and roc_auc() from the yardstick package.\nTo generate a ROC curve, we need the predicted class probabilities for late and on_time, which we just calculated in the code chunk above. We can create the ROC curve with these values, using roc_curve() and then piping to the autoplot() method:\n\nflights_aug %>% \n roc_curve(truth = arr_delay, .pred_late) %>% \n autoplot()\n\n\n\n\n\n\n\n\nSimilarly, roc_auc() estimates the area under the curve:\n\nflights_aug %>% \n roc_auc(truth = arr_delay, .pred_late)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 roc_auc binary 0.764\n\nNot too bad! We leave it to the reader to test out this workflow without this recipe. You can use workflows::add_formula(arr_delay ~ .) instead of add_recipe() (remember to remove the identification variables first!), and see whether our recipe improved our model’s ability to predict late arrivals." + }, + { + "objectID": "start/recipes/index.html#session-info", + "href": "start/recipes/index.html#session-info", + "title": "Preprocess your data with recipes", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> nycflights13 * 1.0.2 2021-04-12 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> skimr * 2.1.5 2022-12-23 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "start/resampling/index.html", + "href": "start/resampling/index.html", + "title": "Evaluate your model with resampling", + "section": "", + "text": "So far, we have built a model and preprocessed data with a recipe. We also introduced workflows as a way to bundle a parsnip model and recipe together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on resampling statistics.\nTo use code in this article, you will need to install the following packages: modeldata, ranger, and tidymodels.\n\nlibrary(tidymodels) # for the rsample package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(modeldata) # for the cells data" + }, + { + "objectID": "start/resampling/index.html#intro", + "href": "start/resampling/index.html#intro", + "title": "Evaluate your model with resampling", + "section": "", + "text": "So far, we have built a model and preprocessed data with a recipe. We also introduced workflows as a way to bundle a parsnip model and recipe together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on resampling statistics.\nTo use code in this article, you will need to install the following packages: modeldata, ranger, and tidymodels.\n\nlibrary(tidymodels) # for the rsample package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(modeldata) # for the cells data" + }, + { + "objectID": "start/resampling/index.html#data", + "href": "start/resampling/index.html#data", + "title": "Evaluate your model with resampling", + "section": "The cell image data", + "text": "The cell image data\nLet’s use data from Hill, LaPan, Li, and Haney (2007), available in the modeldata package, to predict cell image segmentation quality with resampling. To start, we load this data into R:\n\ndata(cells, package = \"modeldata\")\ncells\n#> # A tibble: 2,019 × 58\n#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3\n#> <fct> <fct> <dbl> <int> <dbl> <dbl> <dbl>\n#> 1 Test PS 143. 185 15.7 4.95 9.55\n#> 2 Train PS 134. 819 31.9 207. 69.9 \n#> 3 Train WS 107. 431 28.0 116. 63.9 \n#> 4 Train PS 69.2 298 19.5 102. 28.2 \n#> 5 Test PS 2.89 285 24.3 112. 20.5 \n#> 6 Test WS 40.7 172 326. 654. 129. \n#> 7 Test WS 174. 177 260. 596. 124. \n#> 8 Test PS 180. 251 18.3 5.73 17.2 \n#> 9 Test WS 18.9 495 16.1 89.5 13.7 \n#> 10 Test WS 153. 384 17.7 89.9 20.4 \n#> # ℹ 2,009 more rows\n#> # ℹ 51 more variables: avg_inten_ch_4 <dbl>, convex_hull_area_ratio_ch_1 <dbl>,\n#> # convex_hull_perim_ratio_ch_1 <dbl>, diff_inten_density_ch_1 <dbl>,\n#> # diff_inten_density_ch_3 <dbl>, diff_inten_density_ch_4 <dbl>,\n#> # entropy_inten_ch_1 <dbl>, entropy_inten_ch_3 <dbl>,\n#> # entropy_inten_ch_4 <dbl>, eq_circ_diam_ch_1 <dbl>,\n#> # eq_ellipse_lwr_ch_1 <dbl>, eq_ellipse_oblate_vol_ch_1 <dbl>, …\n\nWe have data for 2019 cells, with 58 variables. The main outcome variable of interest for us here is called class, which you can see is a factor. But before we jump into predicting the class variable, we need to understand it better. Below is a brief primer on cell image segmentation.\n\nPredicting image segmentation quality\nSome biologists conduct experiments on cells. In drug discovery, a particular type of cell can be treated with either a drug or control and then observed to see what the effect is (if any). A common approach for this kind of measurement is cell imaging. Different parts of the cells can be colored so that the locations of a cell can be determined.\nFor example, in top panel of this image of five cells, the green color is meant to define the boundary of the cell (coloring something called the cytoskeleton) while the blue color defines the nucleus of the cell.\n\n\n\n\n\n\n\n\n\nUsing these colors, the cells in an image can be segmented so that we know which pixels belong to which cell. If this is done well, the cell can be measured in different ways that are important to the biology. Sometimes the shape of the cell matters and different mathematical tools are used to summarize characteristics like the size or “oblongness” of the cell.\nThe bottom panel shows some segmentation results. Cells 1 and 5 are fairly well segmented. However, cells 2 to 4 are bunched up together because the segmentation was not very good. The consequence of bad segmentation is data contamination; when the biologist analyzes the shape or size of these cells, the data are inaccurate and could lead to the wrong conclusion.\nA cell-based experiment might involve millions of cells so it is unfeasible to visually assess them all. Instead, a subsample can be created and these cells can be manually labeled by experts as either poorly segmented (PS) or well-segmented (WS). If we can predict these labels accurately, the larger data set can be improved by filtering out the cells most likely to be poorly segmented.\n\n\nBack to the cells data\nThe cells data has class labels for 2019 cells — each cell is labeled as either poorly segmented (PS) or well-segmented (WS). Each also has a total of 56 predictors based on automated image analysis measurements. For example, avg_inten_ch_1 is the mean intensity of the data contained in the nucleus, area_ch_1 is the total size of the cell, and so on (some predictors are fairly arcane in nature).\n\ncells\n#> # A tibble: 2,019 × 58\n#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3\n#> <fct> <fct> <dbl> <int> <dbl> <dbl> <dbl>\n#> 1 Test PS 143. 185 15.7 4.95 9.55\n#> 2 Train PS 134. 819 31.9 207. 69.9 \n#> 3 Train WS 107. 431 28.0 116. 63.9 \n#> 4 Train PS 69.2 298 19.5 102. 28.2 \n#> 5 Test PS 2.89 285 24.3 112. 20.5 \n#> 6 Test WS 40.7 172 326. 654. 129. \n#> 7 Test WS 174. 177 260. 596. 124. \n#> 8 Test PS 180. 251 18.3 5.73 17.2 \n#> 9 Test WS 18.9 495 16.1 89.5 13.7 \n#> 10 Test WS 153. 384 17.7 89.9 20.4 \n#> # ℹ 2,009 more rows\n#> # ℹ 51 more variables: avg_inten_ch_4 <dbl>, convex_hull_area_ratio_ch_1 <dbl>,\n#> # convex_hull_perim_ratio_ch_1 <dbl>, diff_inten_density_ch_1 <dbl>,\n#> # diff_inten_density_ch_3 <dbl>, diff_inten_density_ch_4 <dbl>,\n#> # entropy_inten_ch_1 <dbl>, entropy_inten_ch_3 <dbl>,\n#> # entropy_inten_ch_4 <dbl>, eq_circ_diam_ch_1 <dbl>,\n#> # eq_ellipse_lwr_ch_1 <dbl>, eq_ellipse_oblate_vol_ch_1 <dbl>, …\n\nThe rates of the classes are somewhat imbalanced; there are more poorly segmented cells than well-segmented cells:\n\ncells %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> class n prop\n#> <fct> <int> <dbl>\n#> 1 PS 1300 0.644\n#> 2 WS 719 0.356" + }, + { + "objectID": "start/resampling/index.html#data-split", + "href": "start/resampling/index.html#data-split", + "title": "Evaluate your model with resampling", + "section": "Data splitting", + "text": "Data splitting\nIn our previous Preprocess your data with recipes article, we started by splitting our data. It is common when beginning a modeling project to separate the data set into two partitions:\n\nThe training set is used to estimate parameters, compare models and feature engineering techniques, tune models, etc.\nThe test set is held in reserve until the end of the project, at which point there should only be one or two models under serious consideration. It is used as an unbiased source for measuring final model performance.\n\nThere are different ways to create these partitions of the data. The most common approach is to use a random sample. Suppose that one quarter of the data were reserved for the test set. Random sampling would randomly select 25% for the test set and use the remainder for the training set. We can use the rsample package for this purpose.\nSince random sampling uses random numbers, it is important to set the random number seed. This ensures that the random numbers can be reproduced at a later time (if needed).\nThe function rsample::initial_split() takes the original data and saves the information on how to make the partitions. In the original analysis, the authors made their own training/test set and that information is contained in the column case. To demonstrate how to make a split, we’ll remove this column before we make our own split:\n\nset.seed(123)\ncell_split <- initial_split(cells %>% select(-case), \n strata = class)\n\nHere we used the strata argument, which conducts a stratified split. This ensures that, despite the imbalance we noticed in our class variable, our training and test data sets will keep roughly the same proportions of poorly and well-segmented cells as in the original data. After the initial_split, the training() and testing() functions return the actual data sets.\n\ncell_train <- training(cell_split)\ncell_test <- testing(cell_split)\n\nnrow(cell_train)\n#> [1] 1514\nnrow(cell_train)/nrow(cells)\n#> [1] 0.7498762\n\n# training set proportions by class\ncell_train %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> class n prop\n#> <fct> <int> <dbl>\n#> 1 PS 975 0.644\n#> 2 WS 539 0.356\n\n# test set proportions by class\ncell_test %>% \n count(class) %>% \n mutate(prop = n/sum(n))\n#> # A tibble: 2 × 3\n#> class n prop\n#> <fct> <int> <dbl>\n#> 1 PS 325 0.644\n#> 2 WS 180 0.356\n\nThe majority of the modeling work is then conducted on the training set data." + }, + { + "objectID": "start/resampling/index.html#modeling", + "href": "start/resampling/index.html#modeling", + "title": "Evaluate your model with resampling", + "section": "Modeling", + "text": "Modeling\nRandom forest models are ensembles of decision trees. A large number of decision tree models are created for the ensemble based on slightly different versions of the training set. When creating the individual decision trees, the fitting process encourages them to be as diverse as possible. The collection of trees are combined into the random forest model and, when a new sample is predicted, the votes from each tree are used to calculate the final predicted value for the new sample. For categorical outcome variables like class in our cells data example, the majority vote across all the trees in the random forest determines the predicted class for the new sample.\nOne of the benefits of a random forest model is that it is very low maintenance; it requires very little preprocessing of the data and the default parameters tend to give reasonable results. For that reason, we won’t create a recipe for the cells data.\nAt the same time, the number of trees in the ensemble should be large (in the thousands) and this makes the model moderately expensive to compute.\nTo fit a random forest model on the training set, let’s use the parsnip package with the ranger engine. We first define the model that we want to create:\n\nrf_mod <- \n rand_forest(trees = 1000) %>% \n set_engine(\"ranger\") %>% \n set_mode(\"classification\")\n\nStarting with this parsnip model object, the fit() function can be used with a model formula. Since random forest models use random numbers, we again set the seed prior to computing:\n\nset.seed(234)\nrf_fit <- \n rf_mod %>% \n fit(class ~ ., data = cell_train)\nrf_fit\n#> parsnip model object\n#> \n#> Ranger result\n#> \n#> Call:\n#> ranger::ranger(x = maybe_data_frame(x), y = y, num.trees = ~1000, num.threads = 1, verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE) \n#> \n#> Type: Probability estimation \n#> Number of trees: 1000 \n#> Sample size: 1514 \n#> Number of independent variables: 56 \n#> Mtry: 7 \n#> Target node size: 10 \n#> Variable importance mode: none \n#> Splitrule: gini \n#> OOB prediction error (Brier s.): 0.1189338\n\nThis new rf_fit object is our fitted model, trained on our training data set." + }, + { + "objectID": "start/resampling/index.html#performance", + "href": "start/resampling/index.html#performance", + "title": "Evaluate your model with resampling", + "section": "Estimating performance", + "text": "Estimating performance\nDuring a modeling project, we might create a variety of different models. To choose between them, we need to consider how well these models do, as measured by some performance statistics. In our example in this article, some options we could use are:\n\nthe area under the Receiver Operating Characteristic (ROC) curve, and\noverall classification accuracy.\n\nThe ROC curve uses the class probability estimates to give us a sense of performance across the entire set of potential probability cutoffs. Overall accuracy uses the hard class predictions to measure performance. The hard class predictions tell us whether our model predicted PS or WS for each cell. But, behind those predictions, the model is actually estimating a probability. A simple 50% probability cutoff is used to categorize a cell as poorly segmented.\nThe yardstick package has functions for computing both of these measures called roc_auc() and accuracy().\nAt first glance, it might seem like a good idea to use the training set data to compute these statistics. (This is actually a very bad idea.) Let’s see what happens if we try this. To evaluate performance based on the training set, we call the predict() method to get both types of predictions (i.e. probabilities and hard class predictions).\n\nrf_training_pred <- \n predict(rf_fit, cell_train) %>% \n bind_cols(predict(rf_fit, cell_train, type = \"prob\")) %>% \n # Add the true outcome data back in\n bind_cols(cell_train %>% \n select(class))\n\nUsing the yardstick functions, this model has spectacular results, so spectacular that you might be starting to get suspicious:\n\nrf_training_pred %>% # training set predictions\n roc_auc(truth = class, .pred_PS)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 roc_auc binary 1.00\nrf_training_pred %>% # training set predictions\n accuracy(truth = class, .pred_class)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 accuracy binary 0.991\n\nNow that we have this model with exceptional performance, we proceed to the test set. Unfortunately, we discover that, although our results aren’t bad, they are certainly worse than what we initially thought based on predicting the training set:\n\nrf_testing_pred <- \n predict(rf_fit, cell_test) %>% \n bind_cols(predict(rf_fit, cell_test, type = \"prob\")) %>% \n bind_cols(cell_test %>% select(class))\n\n\nrf_testing_pred %>% # test set predictions\n roc_auc(truth = class, .pred_PS)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 roc_auc binary 0.891\nrf_testing_pred %>% # test set predictions\n accuracy(truth = class, .pred_class)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 accuracy binary 0.816\n\n\nWhat happened here?\nThere are several reasons why training set statistics like the ones shown in this section can be unrealistically optimistic:\n\nModels like random forests, neural networks, and other black-box methods can essentially memorize the training set. Re-predicting that same set should always result in nearly perfect results.\nThe training set does not have the capacity to be a good arbiter of performance. It is not an independent piece of information; predicting the training set can only reflect what the model already knows.\n\nTo understand that second point better, think about an analogy from teaching. Suppose you give a class a test, then give them the answers, then provide the same test. The student scores on the second test do not accurately reflect what they know about the subject; these scores would probably be higher than their results on the first test." + }, + { + "objectID": "start/resampling/index.html#resampling", + "href": "start/resampling/index.html#resampling", + "title": "Evaluate your model with resampling", + "section": "Resampling to the rescue", + "text": "Resampling to the rescue\nResampling methods, such as cross-validation and the bootstrap, are empirical simulation systems. They create a series of data sets similar to the training/testing split discussed previously; a subset of the data are used for creating the model and a different subset is used to measure performance. Resampling is always used with the training set. This schematic from Kuhn and Johnson (2019) illustrates data usage for resampling methods:\n\n\n\n\n\n\n\n\n\nIn the first level of this diagram, you see what happens when you use rsample::initial_split(), which splits the original data into training and test sets. Then, the training set is chosen for resampling, and the test set is held out.\nLet’s use 10-fold cross-validation (CV) in this example. This method randomly allocates the 1514 cells in the training set to 10 groups of roughly equal size, called “folds”. For the first iteration of resampling, the first fold of about 151 cells are held out for the purpose of measuring performance. This is similar to a test set but, to avoid confusion, we call these data the assessment set in the tidymodels framework.\nThe other 90% of the data (about 1362 cells) are used to fit the model. Again, this sounds similar to a training set, so in tidymodels we call this data the analysis set. This model, trained on the analysis set, is applied to the assessment set to generate predictions, and performance statistics are computed based on those predictions.\nIn this example, 10-fold CV moves iteratively through the folds and leaves a different 10% out each time for model assessment. At the end of this process, there are 10 sets of performance statistics that were created on 10 data sets that were not used in the modeling process. For the cell example, this means 10 accuracies and 10 areas under the ROC curve. While 10 models were created, these are not used further; we do not keep the models themselves trained on these folds because their only purpose is calculating performance metrics.\nThe final resampling estimates for the model are the averages of the performance statistics replicates. For example, suppose for our data the results were:\n\n\n\n\n\nresample\naccuracy\nroc_auc\nassessment size\n\n\n\n\nFold01\n0.8289474\n0.8937128\n152\n\n\nFold02\n0.7697368\n0.8768989\n152\n\n\nFold03\n0.8552632\n0.9017666\n152\n\n\nFold04\n0.8552632\n0.8928076\n152\n\n\nFold05\n0.7947020\n0.8816342\n151\n\n\nFold06\n0.8476821\n0.9244306\n151\n\n\nFold07\n0.8145695\n0.8960339\n151\n\n\nFold08\n0.8543046\n0.9267677\n151\n\n\nFold09\n0.8543046\n0.9231392\n151\n\n\nFold10\n0.8476821\n0.9266917\n151\n\n\n\n\n\n\n\nFrom these resampling statistics, the final estimate of performance for this random forest model would be 0.904 for the area under the ROC curve and 0.832 for accuracy.\nThese resampling statistics are an effective method for measuring model performance without predicting the training set directly as a whole." + }, + { + "objectID": "start/resampling/index.html#fit-resamples", + "href": "start/resampling/index.html#fit-resamples", + "title": "Evaluate your model with resampling", + "section": "Fit a model with resampling", + "text": "Fit a model with resampling\nTo generate these results, the first step is to create a resampling object using rsample. There are several resampling methods implemented in rsample; cross-validation folds can be created using vfold_cv():\n\nset.seed(345)\nfolds <- vfold_cv(cell_train, v = 10)\nfolds\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 2\n#> splits id \n#> <list> <chr> \n#> 1 <split [1362/152]> Fold01\n#> 2 <split [1362/152]> Fold02\n#> 3 <split [1362/152]> Fold03\n#> 4 <split [1362/152]> Fold04\n#> 5 <split [1363/151]> Fold05\n#> 6 <split [1363/151]> Fold06\n#> 7 <split [1363/151]> Fold07\n#> 8 <split [1363/151]> Fold08\n#> 9 <split [1363/151]> Fold09\n#> 10 <split [1363/151]> Fold10\n\nThe list column for splits contains the information on which rows belong in the analysis and assessment sets. There are functions that can be used to extract the individual resampled data called analysis() and assessment().\nHowever, the tune package contains high-level functions that can do the required computations to resample a model for the purpose of measuring performance. You have several options for building an object for resampling:\n\nResample a model specification preprocessed with a formula or recipe, or\nResample a workflow() that bundles together a model specification and formula/recipe.\n\nFor this example, let’s use a workflow() that bundles together the random forest model and a formula, since we are not using a recipe. Whichever of these options you use, the syntax to fit_resamples() is very similar to fit():\n\nrf_wf <- \n workflow() %>%\n add_model(rf_mod) %>%\n add_formula(class ~ .)\n\nset.seed(456)\nrf_fit_rs <- \n rf_wf %>% \n fit_resamples(folds)\n\n\nrf_fit_rs\n#> # Resampling results\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 4\n#> splits id .metrics .notes \n#> <list> <chr> <list> <list> \n#> 1 <split [1362/152]> Fold01 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 2 <split [1362/152]> Fold02 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 3 <split [1362/152]> Fold03 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 4 <split [1362/152]> Fold04 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 5 <split [1363/151]> Fold05 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 6 <split [1363/151]> Fold06 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 7 <split [1363/151]> Fold07 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 8 <split [1363/151]> Fold08 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 9 <split [1363/151]> Fold09 <tibble [2 × 4]> <tibble [0 × 3]>\n#> 10 <split [1363/151]> Fold10 <tibble [2 × 4]> <tibble [0 × 3]>\n\nThe results are similar to the folds results with some extra columns. The column .metrics contains the performance statistics created from the 10 assessment sets. These can be manually unnested but the tune package contains a number of simple functions that can extract these data:\n\ncollect_metrics(rf_fit_rs)\n#> # A tibble: 2 × 6\n#> .metric .estimator mean n std_err .config \n#> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 accuracy binary 0.832 10 0.00952 Preprocessor1_Model1\n#> 2 roc_auc binary 0.904 10 0.00610 Preprocessor1_Model1\n\nThink about these values we now have for accuracy and AUC. These performance metrics are now more realistic (i.e. lower) than our ill-advised first attempt at computing performance metrics in the section above. If we wanted to try different model types for this data set, we could more confidently compare performance metrics computed using resampling to choose between models. Also, remember that at the end of our project, we return to our test set to estimate final model performance. We have looked at this once already before we started using resampling, but let’s remind ourselves of the results:\n\nrf_testing_pred %>% # test set predictions\n roc_auc(truth = class, .pred_PS)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 roc_auc binary 0.891\nrf_testing_pred %>% # test set predictions\n accuracy(truth = class, .pred_class)\n#> # A tibble: 1 × 3\n#> .metric .estimator .estimate\n#> <chr> <chr> <dbl>\n#> 1 accuracy binary 0.816\n\nThe performance metrics from the test set are much closer to the performance metrics computed using resampling than our first (“bad idea”) attempt. Resampling allows us to simulate how well our model will perform on new data, and the test set acts as the final, unbiased check for our model’s performance." + }, + { + "objectID": "start/resampling/index.html#session-info", + "href": "start/resampling/index.html#session-info", + "title": "Evaluate your model with resampling", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> ranger * 0.15.1 2023-04-03 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + }, + { + "objectID": "start/tuning/index.html", + "href": "start/tuning/index.html", + "title": "Tune model parameters", + "section": "", + "text": "Some model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called hyperparameters. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this mtry in tidymodels) or the learning rate in a boosted tree model (we call this learn_rate). Instead of learning these kinds of hyperparameters during model training, we can estimate the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called tuning.\nTo use code in this article, you will need to install the following packages: rpart, rpart.plot, tidymodels, and vip.\n\nlibrary(tidymodels) # for the tune package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(rpart.plot) # for visualizing a decision tree\nlibrary(vip) # for variable importance plots" + }, + { + "objectID": "start/tuning/index.html#intro", + "href": "start/tuning/index.html#intro", + "title": "Tune model parameters", + "section": "", + "text": "Some model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called hyperparameters. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this mtry in tidymodels) or the learning rate in a boosted tree model (we call this learn_rate). Instead of learning these kinds of hyperparameters during model training, we can estimate the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called tuning.\nTo use code in this article, you will need to install the following packages: rpart, rpart.plot, tidymodels, and vip.\n\nlibrary(tidymodels) # for the tune package, along with the rest of tidymodels\n\n# Helper packages\nlibrary(rpart.plot) # for visualizing a decision tree\nlibrary(vip) # for variable importance plots" + }, + { + "objectID": "start/tuning/index.html#data", + "href": "start/tuning/index.html#data", + "title": "Tune model parameters", + "section": "The cell image data, revisited", + "text": "The cell image data, revisited\nIn our previous Evaluate your model with resampling article, we introduced a data set of images of cells that were labeled by experts as well-segmented (WS) or poorly segmented (PS). We trained a random forest model to predict which images are segmented well vs. poorly, so that a biologist could filter out poorly segmented cell images in their analysis. We used resampling to estimate the performance of our model on this data.\n\ndata(cells, package = \"modeldata\")\ncells\n#> # A tibble: 2,019 × 58\n#> case class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3\n#> <fct> <fct> <dbl> <int> <dbl> <dbl> <dbl>\n#> 1 Test PS 143. 185 15.7 4.95 9.55\n#> 2 Train PS 134. 819 31.9 207. 69.9 \n#> 3 Train WS 107. 431 28.0 116. 63.9 \n#> 4 Train PS 69.2 298 19.5 102. 28.2 \n#> 5 Test PS 2.89 285 24.3 112. 20.5 \n#> 6 Test WS 40.7 172 326. 654. 129. \n#> 7 Test WS 174. 177 260. 596. 124. \n#> 8 Test PS 180. 251 18.3 5.73 17.2 \n#> 9 Test WS 18.9 495 16.1 89.5 13.7 \n#> 10 Test WS 153. 384 17.7 89.9 20.4 \n#> # ℹ 2,009 more rows\n#> # ℹ 51 more variables: avg_inten_ch_4 <dbl>, convex_hull_area_ratio_ch_1 <dbl>,\n#> # convex_hull_perim_ratio_ch_1 <dbl>, diff_inten_density_ch_1 <dbl>,\n#> # diff_inten_density_ch_3 <dbl>, diff_inten_density_ch_4 <dbl>,\n#> # entropy_inten_ch_1 <dbl>, entropy_inten_ch_3 <dbl>,\n#> # entropy_inten_ch_4 <dbl>, eq_circ_diam_ch_1 <dbl>,\n#> # eq_ellipse_lwr_ch_1 <dbl>, eq_ellipse_oblate_vol_ch_1 <dbl>, …" + }, + { + "objectID": "start/tuning/index.html#why-tune", + "href": "start/tuning/index.html#why-tune", + "title": "Tune model parameters", + "section": "Predicting image segmentation, but better", + "text": "Predicting image segmentation, but better\nRandom forest models are a tree-based ensemble method, and typically perform well with default hyperparameters. However, the accuracy of some other tree-based models, such as boosted tree models or decision tree models, can be sensitive to the values of hyperparameters. In this article, we will train a decision tree model. There are several hyperparameters for decision tree models that can be tuned for better performance. Let’s explore:\n\nthe complexity parameter (which we call cost_complexity in tidymodels) for the tree, and\nthe maximum tree_depth.\n\nTuning these hyperparameters can improve model performance because decision tree models are prone to overfitting. This happens because single tree models tend to fit the training data too well — so well, in fact, that they over-learn patterns present in the training data that end up being detrimental when predicting new data.\nWe will tune the model hyperparameters to avoid overfitting. Tuning the value of cost_complexity helps by pruning back our tree. It adds a cost, or penalty, to error rates of more complex trees; a cost closer to zero decreases the number tree nodes pruned and is more likely to result in an overfit tree. However, a high cost increases the number of tree nodes pruned and can result in the opposite problem—an underfit tree. Tuning tree_depth, on the other hand, helps by stopping our tree from growing after it reaches a certain depth. We want to tune these hyperparameters to find what those two values should be for our model to do the best job predicting image segmentation.\nBefore we start the tuning process, we split our data into training and testing sets, just like when we trained the model with one default set of hyperparameters. As before, we can use strata = class if we want our training and testing sets to be created using stratified sampling so that both have the same proportion of both kinds of segmentation.\n\nset.seed(123)\ncell_split <- initial_split(cells %>% select(-case), \n strata = class)\ncell_train <- training(cell_split)\ncell_test <- testing(cell_split)\n\nWe use the training data for tuning the model." + }, + { + "objectID": "start/tuning/index.html#tuning", + "href": "start/tuning/index.html#tuning", + "title": "Tune model parameters", + "section": "Tuning hyperparameters", + "text": "Tuning hyperparameters\nLet’s start with the parsnip package, using a decision_tree() model with the rpart engine. To tune the decision tree hyperparameters cost_complexity and tree_depth, we create a model specification that identifies which hyperparameters we plan to tune.\n\ntune_spec <- \n decision_tree(\n cost_complexity = tune(),\n tree_depth = tune()\n ) %>% \n set_engine(\"rpart\") %>% \n set_mode(\"classification\")\n\ntune_spec\n#> Decision Tree Model Specification (classification)\n#> \n#> Main Arguments:\n#> cost_complexity = tune()\n#> tree_depth = tune()\n#> \n#> Computational engine: rpart\n\nThink of tune() here as a placeholder. After the tuning process, we will select a single numeric value for each of these hyperparameters. For now, we specify our parsnip model object and identify the hyperparameters we will tune().\nWe can’t train this specification on a single data set (such as the entire training set) and learn what the hyperparameter values should be, but we can train many models using resampled data and see which models turn out best. We can create a regular grid of values to try using some convenience functions for each hyperparameter:\n\ntree_grid <- grid_regular(cost_complexity(),\n tree_depth(),\n levels = 5)\n\nThe function grid_regular() is from the dials package. It chooses sensible values to try for each hyperparameter; here, we asked for 5 of each. Since we have two to tune, grid_regular() returns 5 \\(\\times\\) 5 = 25 different possible tuning combinations to try in a tidy tibble format.\n\ntree_grid\n#> # A tibble: 25 × 2\n#> cost_complexity tree_depth\n#> <dbl> <int>\n#> 1 0.0000000001 1\n#> 2 0.0000000178 1\n#> 3 0.00000316 1\n#> 4 0.000562 1\n#> 5 0.1 1\n#> 6 0.0000000001 4\n#> 7 0.0000000178 4\n#> 8 0.00000316 4\n#> 9 0.000562 4\n#> 10 0.1 4\n#> # ℹ 15 more rows\n\nHere, you can see all 5 values of cost_complexity ranging up to 0.1. These values get repeated for each of the 5 values of tree_depth:\n\ntree_grid %>% \n count(tree_depth)\n#> # A tibble: 5 × 2\n#> tree_depth n\n#> <int> <int>\n#> 1 1 5\n#> 2 4 5\n#> 3 8 5\n#> 4 11 5\n#> 5 15 5\n\nArmed with our grid filled with 25 candidate decision tree models, let’s create cross-validation folds for tuning:\n\nset.seed(234)\ncell_folds <- vfold_cv(cell_train)\n\nTuning in tidymodels requires a resampled object created with the rsample package." + }, + { + "objectID": "start/tuning/index.html#tune-grid", + "href": "start/tuning/index.html#tune-grid", + "title": "Tune model parameters", + "section": "Model tuning with a grid", + "text": "Model tuning with a grid\nWe are ready to tune! Let’s use tune_grid() to fit models at all the different values we chose for each tuned hyperparameter. There are several options for building the object for tuning:\n\nTune a model specification along with a recipe or model, or\nTune a workflow() that bundles together a model specification and a recipe or model preprocessor.\n\nHere we use a workflow() with a straightforward formula; if this model required more involved data preprocessing, we could use add_recipe() instead of add_formula().\n\nset.seed(345)\n\ntree_wf <- workflow() %>%\n add_model(tune_spec) %>%\n add_formula(class ~ .)\n\ntree_res <- \n tree_wf %>% \n tune_grid(\n resamples = cell_folds,\n grid = tree_grid\n )\n\ntree_res\n#> # Tuning results\n#> # 10-fold cross-validation \n#> # A tibble: 10 × 4\n#> splits id .metrics .notes \n#> <list> <chr> <list> <list> \n#> 1 <split [1362/152]> Fold01 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 2 <split [1362/152]> Fold02 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 3 <split [1362/152]> Fold03 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 4 <split [1362/152]> Fold04 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 5 <split [1363/151]> Fold05 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 6 <split [1363/151]> Fold06 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 7 <split [1363/151]> Fold07 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 8 <split [1363/151]> Fold08 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 9 <split [1363/151]> Fold09 <tibble [50 × 6]> <tibble [0 × 3]>\n#> 10 <split [1363/151]> Fold10 <tibble [50 × 6]> <tibble [0 × 3]>\n\nOnce we have our tuning results, we can both explore them through visualization and then select the best result. The function collect_metrics() gives us a tidy tibble with all the results. We had 25 candidate models and two metrics, accuracy and roc_auc, and we get a row for each .metric and model.\n\ntree_res %>% \n collect_metrics()\n#> # A tibble: 50 × 8\n#> cost_complexity tree_depth .metric .estimator mean n std_err .config \n#> <dbl> <int> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 0.0000000001 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 2 0.0000000001 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> 3 0.0000000178 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 4 0.0000000178 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> 5 0.00000316 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 6 0.00000316 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> 7 0.000562 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 8 0.000562 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> 9 0.1 1 accuracy binary 0.732 10 0.0148 Preproces…\n#> 10 0.1 1 roc_auc binary 0.777 10 0.0107 Preproces…\n#> # ℹ 40 more rows\n\nWe might get more out of plotting these results:\n\ntree_res %>%\n collect_metrics() %>%\n mutate(tree_depth = factor(tree_depth)) %>%\n ggplot(aes(cost_complexity, mean, color = tree_depth)) +\n geom_line(size = 1.5, alpha = 0.6) +\n geom_point(size = 2) +\n facet_wrap(~ .metric, scales = \"free\", nrow = 2) +\n scale_x_log10(labels = scales::label_number()) +\n scale_color_viridis_d(option = \"plasma\", begin = .9, end = 0)\n#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.\n#> ℹ Please use `linewidth` instead.\n\n\n\n\n\n\n\n\nWe can see that our “stubbiest” tree, with a depth of 1, is the worst model according to both metrics and across all candidate values of cost_complexity. Our deepest tree, with a depth of 15, did better. However, the best tree seems to be between these values with a tree depth of 4. The show_best() function shows us the top 5 candidate models by default:\n\ntree_res %>%\n show_best(\"accuracy\")\n#> # A tibble: 5 × 8\n#> cost_complexity tree_depth .metric .estimator mean n std_err .config \n#> <dbl> <int> <chr> <chr> <dbl> <int> <dbl> <chr> \n#> 1 0.0000000001 4 accuracy binary 0.807 10 0.0119 Preprocess…\n#> 2 0.0000000178 4 accuracy binary 0.807 10 0.0119 Preprocess…\n#> 3 0.00000316 4 accuracy binary 0.807 10 0.0119 Preprocess…\n#> 4 0.000562 4 accuracy binary 0.807 10 0.0119 Preprocess…\n#> 5 0.1 4 accuracy binary 0.786 10 0.0124 Preprocess…\n\nWe can also use the select_best() function to pull out the single set of hyperparameter values for our best decision tree model:\n\nbest_tree <- tree_res %>%\n select_best(\"accuracy\")\n\nbest_tree\n#> # A tibble: 1 × 3\n#> cost_complexity tree_depth .config \n#> <dbl> <int> <chr> \n#> 1 0.0000000001 4 Preprocessor1_Model06\n\nThese are the values for tree_depth and cost_complexity that maximize accuracy in this data set of cell images." + }, + { + "objectID": "start/tuning/index.html#final-model", + "href": "start/tuning/index.html#final-model", + "title": "Tune model parameters", + "section": "Finalizing our model", + "text": "Finalizing our model\nWe can update (or “finalize”) our workflow object tree_wf with the values from select_best().\n\nfinal_wf <- \n tree_wf %>% \n finalize_workflow(best_tree)\n\nfinal_wf\n#> ══ Workflow ══════════════════════════════════════════════════════════\n#> Preprocessor: Formula\n#> Model: decision_tree()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> class ~ .\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> Decision Tree Model Specification (classification)\n#> \n#> Main Arguments:\n#> cost_complexity = 1e-10\n#> tree_depth = 4\n#> \n#> Computational engine: rpart\n\nOur tuning is done!\n\nThe last fit\nFinally, let’s fit this final model to the training data and use our test data to estimate the model performance we expect to see with new data. We can use the function last_fit() with our finalized model; this function fits the finalized model on the full training data set and evaluates the finalized model on the testing data.\n\nfinal_fit <- \n final_wf %>%\n last_fit(cell_split) \n\nfinal_fit %>%\n collect_metrics()\n#> # A tibble: 2 × 4\n#> .metric .estimator .estimate .config \n#> <chr> <chr> <dbl> <chr> \n#> 1 accuracy binary 0.802 Preprocessor1_Model1\n#> 2 roc_auc binary 0.840 Preprocessor1_Model1\n\nfinal_fit %>%\n collect_predictions() %>% \n roc_curve(class, .pred_PS) %>% \n autoplot()\n\n\n\n\n\n\n\n\nThe performance metrics from the test set indicate that we did not overfit during our tuning procedure.\nThe final_fit object contains a finalized, fitted workflow that you can use for predicting on new data or further understanding the results. You may want to extract this object, using one of the extract_ helper functions.\n\nfinal_tree <- extract_workflow(final_fit)\nfinal_tree\n#> ══ Workflow [trained] ════════════════════════════════════════════════\n#> Preprocessor: Formula\n#> Model: decision_tree()\n#> \n#> ── Preprocessor ──────────────────────────────────────────────────────\n#> class ~ .\n#> \n#> ── Model ─────────────────────────────────────────────────────────────\n#> n= 1514 \n#> \n#> node), split, n, loss, yval, (yprob)\n#> * denotes terminal node\n#> \n#> 1) root 1514 539 PS (0.64398943 0.35601057) \n#> 2) total_inten_ch_2< 41732.5 642 33 PS (0.94859813 0.05140187) \n#> 4) shape_p_2_a_ch_1>=1.251801 631 27 PS (0.95721078 0.04278922) *\n#> 5) shape_p_2_a_ch_1< 1.251801 11 5 WS (0.45454545 0.54545455) *\n#> 3) total_inten_ch_2>=41732.5 872 366 WS (0.41972477 0.58027523) \n#> 6) fiber_width_ch_1< 11.37318 406 160 PS (0.60591133 0.39408867) \n#> 12) avg_inten_ch_1< 145.4883 293 85 PS (0.70989761 0.29010239) *\n#> 13) avg_inten_ch_1>=145.4883 113 38 WS (0.33628319 0.66371681) \n#> 26) total_inten_ch_3>=57919.5 33 10 PS (0.69696970 0.30303030) *\n#> 27) total_inten_ch_3< 57919.5 80 15 WS (0.18750000 0.81250000) *\n#> 7) fiber_width_ch_1>=11.37318 466 120 WS (0.25751073 0.74248927) \n#> 14) eq_ellipse_oblate_vol_ch_1>=1673.942 30 8 PS (0.73333333 0.26666667) \n#> 28) var_inten_ch_3>=41.10858 20 2 PS (0.90000000 0.10000000) *\n#> 29) var_inten_ch_3< 41.10858 10 4 WS (0.40000000 0.60000000) *\n#> 15) eq_ellipse_oblate_vol_ch_1< 1673.942 436 98 WS (0.22477064 0.77522936) *\n\nWe can create a visualization of the decision tree using another helper function to extract the underlying engine-specific fit.\n\nfinal_tree %>%\n extract_fit_engine() %>%\n rpart.plot(roundint = FALSE)\n\n\n\n\n\n\n\n\nPerhaps we would also like to understand what variables are important in this final model. We can use the vip package to estimate variable importance based on the model’s structure.\n\nlibrary(vip)\n\nfinal_tree %>% \n extract_fit_parsnip() %>% \n vip()\n\n\n\n\n\n\n\n\nThese are the automated image analysis measurements that are the most important in driving segmentation quality predictions.\nWe leave it to the reader to explore whether you can tune a different decision tree hyperparameter. You can explore the reference docs, or use the args() function to see which parsnip object arguments are available:\n\nargs(decision_tree)\n#> function (mode = \"unknown\", engine = \"rpart\", cost_complexity = NULL, \n#> tree_depth = NULL, min_n = NULL) \n#> NULL\n\nYou could tune the other hyperparameter we didn’t use here, min_n, which sets the minimum n to split at any node. This is another early stopping method for decision trees that can help prevent overfitting. Use this searchable table to find the original argument for min_n in the rpart package (hint). See whether you can tune a different combination of hyperparameters and/or values to improve a tree’s ability to predict cell segmentation quality." + }, + { + "objectID": "start/tuning/index.html#session-info", + "href": "start/tuning/index.html#session-info", + "title": "Tune model parameters", + "section": "Session information", + "text": "Session information\n\n#> ─ Session info ─────────────────────────────────────────────────────\n#> setting value\n#> version R version 4.3.0 (2023-04-21)\n#> os macOS Monterey 12.6\n#> system aarch64, darwin20\n#> ui X11\n#> language (EN)\n#> collate en_US.UTF-8\n#> ctype en_US.UTF-8\n#> tz America/Los_Angeles\n#> date 2023-05-25\n#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)\n#> \n#> ─ Packages ─────────────────────────────────────────────────────────\n#> package * version date (UTC) lib source\n#> broom * 1.0.4 2023-03-11 [1] CRAN (R 4.3.0)\n#> dials * 1.2.0 2023-04-03 [1] CRAN (R 4.3.0)\n#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.0)\n#> ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.0)\n#> infer * 1.0.4 2022-12-02 [1] CRAN (R 4.3.0)\n#> parsnip * 1.1.0 2023-04-12 [1] CRAN (R 4.3.0)\n#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.3.0)\n#> recipes * 1.0.6 2023-04-25 [1] CRAN (R 4.3.0)\n#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.0)\n#> rpart * 4.1.19 2022-10-21 [2] CRAN (R 4.3.0)\n#> rpart.plot * 3.1.1 2022-05-21 [1] CRAN (R 4.3.0)\n#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.3.0)\n#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)\n#> tidymodels * 1.1.0 2023-05-01 [1] CRAN (R 4.3.0)\n#> tune * 1.1.1 2023-04-11 [1] CRAN (R 4.3.0)\n#> vip * 0.3.2 2020-12-17 [1] CRAN (R 4.3.0)\n#> workflows * 1.1.3 2023-02-22 [1] CRAN (R 4.3.0)\n#> yardstick * 1.2.0 2023-04-21 [1] CRAN (R 4.3.0)\n#> \n#> [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library\n#> [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library\n#> \n#> ────────────────────────────────────────────────────────────────────" + } +] \ No newline at end of file diff --git a/docs/site_libs/bootstrap/bootstrap-icons.css b/docs/site_libs/bootstrap/bootstrap-icons.css new file mode 100644 index 00000000..94f19404 --- /dev/null +++ b/docs/site_libs/bootstrap/bootstrap-icons.css @@ -0,0 +1,2018 @@ +@font-face { + font-display: block; + font-family: "bootstrap-icons"; + src: +url("./bootstrap-icons.woff?2ab2cbbe07fcebb53bdaa7313bb290f2") format("woff"); +} + +.bi::before, +[class^="bi-"]::before, +[class*=" bi-"]::before { + display: inline-block; + font-family: bootstrap-icons !important; + font-style: normal; + font-weight: normal !important; + font-variant: normal; + text-transform: none; + line-height: 1; + vertical-align: -.125em; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +.bi-123::before { content: "\f67f"; } +.bi-alarm-fill::before { content: "\f101"; } +.bi-alarm::before { content: "\f102"; } +.bi-align-bottom::before { content: "\f103"; } +.bi-align-center::before { content: "\f104"; } +.bi-align-end::before { content: "\f105"; } +.bi-align-middle::before { content: "\f106"; } +.bi-align-start::before { content: "\f107"; } +.bi-align-top::before { content: "\f108"; } +.bi-alt::before { content: "\f109"; } +.bi-app-indicator::before { content: "\f10a"; } +.bi-app::before { content: "\f10b"; } +.bi-archive-fill::before { content: "\f10c"; } +.bi-archive::before { content: "\f10d"; } +.bi-arrow-90deg-down::before { content: "\f10e"; } +.bi-arrow-90deg-left::before { content: "\f10f"; } +.bi-arrow-90deg-right::before { content: "\f110"; } +.bi-arrow-90deg-up::before { content: "\f111"; } +.bi-arrow-bar-down::before { content: "\f112"; } +.bi-arrow-bar-left::before { content: "\f113"; } +.bi-arrow-bar-right::before { content: "\f114"; } +.bi-arrow-bar-up::before { content: "\f115"; } +.bi-arrow-clockwise::before { content: "\f116"; } +.bi-arrow-counterclockwise::before { content: "\f117"; } +.bi-arrow-down-circle-fill::before { content: "\f118"; } +.bi-arrow-down-circle::before { content: "\f119"; } +.bi-arrow-down-left-circle-fill::before { content: "\f11a"; } +.bi-arrow-down-left-circle::before { content: "\f11b"; } +.bi-arrow-down-left-square-fill::before { content: "\f11c"; } +.bi-arrow-down-left-square::before { content: "\f11d"; } +.bi-arrow-down-left::before { content: "\f11e"; } +.bi-arrow-down-right-circle-fill::before { content: "\f11f"; } +.bi-arrow-down-right-circle::before { content: "\f120"; } +.bi-arrow-down-right-square-fill::before { content: "\f121"; } +.bi-arrow-down-right-square::before { content: "\f122"; } +.bi-arrow-down-right::before { content: "\f123"; } +.bi-arrow-down-short::before { content: "\f124"; } +.bi-arrow-down-square-fill::before { content: "\f125"; } +.bi-arrow-down-square::before { content: "\f126"; } +.bi-arrow-down-up::before { content: "\f127"; } +.bi-arrow-down::before { content: "\f128"; } +.bi-arrow-left-circle-fill::before { content: "\f129"; } +.bi-arrow-left-circle::before { content: "\f12a"; } +.bi-arrow-left-right::before { content: "\f12b"; } +.bi-arrow-left-short::before { content: "\f12c"; } +.bi-arrow-left-square-fill::before { content: "\f12d"; } +.bi-arrow-left-square::before { content: "\f12e"; } +.bi-arrow-left::before { content: "\f12f"; } +.bi-arrow-repeat::before { content: "\f130"; } +.bi-arrow-return-left::before { content: "\f131"; } +.bi-arrow-return-right::before { content: "\f132"; } +.bi-arrow-right-circle-fill::before { content: "\f133"; } +.bi-arrow-right-circle::before { content: "\f134"; } +.bi-arrow-right-short::before { content: "\f135"; } +.bi-arrow-right-square-fill::before { content: "\f136"; } +.bi-arrow-right-square::before { content: "\f137"; } +.bi-arrow-right::before { content: "\f138"; } +.bi-arrow-up-circle-fill::before { content: "\f139"; } +.bi-arrow-up-circle::before { content: "\f13a"; } +.bi-arrow-up-left-circle-fill::before { content: "\f13b"; } +.bi-arrow-up-left-circle::before { content: "\f13c"; } +.bi-arrow-up-left-square-fill::before { content: "\f13d"; } +.bi-arrow-up-left-square::before { content: "\f13e"; } +.bi-arrow-up-left::before { content: "\f13f"; } +.bi-arrow-up-right-circle-fill::before { content: "\f140"; } +.bi-arrow-up-right-circle::before { content: "\f141"; } +.bi-arrow-up-right-square-fill::before { content: "\f142"; } +.bi-arrow-up-right-square::before { content: "\f143"; } +.bi-arrow-up-right::before { content: "\f144"; } +.bi-arrow-up-short::before { content: "\f145"; } +.bi-arrow-up-square-fill::before { content: "\f146"; } +.bi-arrow-up-square::before { content: "\f147"; } +.bi-arrow-up::before { content: "\f148"; } +.bi-arrows-angle-contract::before { content: "\f149"; } +.bi-arrows-angle-expand::before { content: "\f14a"; } +.bi-arrows-collapse::before { content: "\f14b"; } +.bi-arrows-expand::before { content: "\f14c"; } +.bi-arrows-fullscreen::before { content: "\f14d"; } +.bi-arrows-move::before { content: "\f14e"; } +.bi-aspect-ratio-fill::before { content: "\f14f"; } +.bi-aspect-ratio::before { content: "\f150"; } +.bi-asterisk::before { content: "\f151"; } +.bi-at::before { content: "\f152"; } +.bi-award-fill::before { content: "\f153"; } +.bi-award::before { content: "\f154"; } +.bi-back::before { content: "\f155"; } +.bi-backspace-fill::before { content: "\f156"; } +.bi-backspace-reverse-fill::before { content: "\f157"; } +.bi-backspace-reverse::before { content: "\f158"; } +.bi-backspace::before { content: "\f159"; } +.bi-badge-3d-fill::before { content: "\f15a"; } +.bi-badge-3d::before { content: "\f15b"; } +.bi-badge-4k-fill::before { content: "\f15c"; } +.bi-badge-4k::before { content: "\f15d"; } +.bi-badge-8k-fill::before { content: "\f15e"; } +.bi-badge-8k::before { content: "\f15f"; } +.bi-badge-ad-fill::before { content: "\f160"; } +.bi-badge-ad::before { content: "\f161"; } +.bi-badge-ar-fill::before { content: "\f162"; } +.bi-badge-ar::before { content: "\f163"; } +.bi-badge-cc-fill::before { content: "\f164"; } +.bi-badge-cc::before { content: "\f165"; } +.bi-badge-hd-fill::before { content: "\f166"; } +.bi-badge-hd::before { content: "\f167"; } +.bi-badge-tm-fill::before { content: "\f168"; } +.bi-badge-tm::before { content: "\f169"; } +.bi-badge-vo-fill::before { content: "\f16a"; } +.bi-badge-vo::before { content: "\f16b"; } +.bi-badge-vr-fill::before { content: "\f16c"; } +.bi-badge-vr::before { content: "\f16d"; } +.bi-badge-wc-fill::before { content: "\f16e"; } +.bi-badge-wc::before { content: "\f16f"; } +.bi-bag-check-fill::before { content: "\f170"; } +.bi-bag-check::before { content: "\f171"; } +.bi-bag-dash-fill::before { content: "\f172"; } +.bi-bag-dash::before { content: "\f173"; } +.bi-bag-fill::before { content: "\f174"; } +.bi-bag-plus-fill::before { content: "\f175"; } +.bi-bag-plus::before { content: "\f176"; } +.bi-bag-x-fill::before { content: "\f177"; } +.bi-bag-x::before { content: "\f178"; } +.bi-bag::before { content: "\f179"; } +.bi-bar-chart-fill::before { content: "\f17a"; } +.bi-bar-chart-line-fill::before { content: "\f17b"; } +.bi-bar-chart-line::before { content: "\f17c"; } +.bi-bar-chart-steps::before { content: "\f17d"; } +.bi-bar-chart::before { content: "\f17e"; } +.bi-basket-fill::before { content: "\f17f"; } +.bi-basket::before { content: "\f180"; } +.bi-basket2-fill::before { content: "\f181"; } +.bi-basket2::before { content: "\f182"; } +.bi-basket3-fill::before { content: "\f183"; } +.bi-basket3::before { content: "\f184"; } +.bi-battery-charging::before { content: "\f185"; } +.bi-battery-full::before { content: "\f186"; } +.bi-battery-half::before { content: "\f187"; } +.bi-battery::before { content: "\f188"; } +.bi-bell-fill::before { content: "\f189"; } +.bi-bell::before { content: "\f18a"; } +.bi-bezier::before { content: "\f18b"; } +.bi-bezier2::before { content: "\f18c"; } +.bi-bicycle::before { content: "\f18d"; } +.bi-binoculars-fill::before { content: "\f18e"; } +.bi-binoculars::before { content: "\f18f"; } +.bi-blockquote-left::before { content: "\f190"; } +.bi-blockquote-right::before { content: "\f191"; } +.bi-book-fill::before { content: "\f192"; } +.bi-book-half::before { content: "\f193"; } +.bi-book::before { content: "\f194"; } +.bi-bookmark-check-fill::before { content: "\f195"; } +.bi-bookmark-check::before { content: "\f196"; } +.bi-bookmark-dash-fill::before { content: "\f197"; } +.bi-bookmark-dash::before { content: "\f198"; } +.bi-bookmark-fill::before { content: "\f199"; } +.bi-bookmark-heart-fill::before { content: "\f19a"; } +.bi-bookmark-heart::before { content: "\f19b"; } +.bi-bookmark-plus-fill::before { content: "\f19c"; } +.bi-bookmark-plus::before { content: "\f19d"; } +.bi-bookmark-star-fill::before { content: "\f19e"; } +.bi-bookmark-star::before { content: "\f19f"; } +.bi-bookmark-x-fill::before { content: "\f1a0"; } +.bi-bookmark-x::before { content: "\f1a1"; } +.bi-bookmark::before { content: "\f1a2"; } +.bi-bookmarks-fill::before { content: "\f1a3"; } +.bi-bookmarks::before { content: "\f1a4"; } +.bi-bookshelf::before { content: "\f1a5"; } +.bi-bootstrap-fill::before { content: "\f1a6"; } +.bi-bootstrap-reboot::before { content: "\f1a7"; } +.bi-bootstrap::before { content: "\f1a8"; } +.bi-border-all::before { content: "\f1a9"; } +.bi-border-bottom::before { content: "\f1aa"; } +.bi-border-center::before { content: "\f1ab"; } +.bi-border-inner::before { content: "\f1ac"; } +.bi-border-left::before { content: "\f1ad"; } +.bi-border-middle::before { content: "\f1ae"; } +.bi-border-outer::before { content: "\f1af"; } +.bi-border-right::before { content: "\f1b0"; } +.bi-border-style::before { content: "\f1b1"; } +.bi-border-top::before { content: "\f1b2"; } +.bi-border-width::before { content: "\f1b3"; } +.bi-border::before { content: "\f1b4"; } +.bi-bounding-box-circles::before { content: "\f1b5"; } +.bi-bounding-box::before { content: "\f1b6"; } +.bi-box-arrow-down-left::before { content: "\f1b7"; } +.bi-box-arrow-down-right::before { content: "\f1b8"; } +.bi-box-arrow-down::before { content: "\f1b9"; } +.bi-box-arrow-in-down-left::before { content: "\f1ba"; } +.bi-box-arrow-in-down-right::before { content: "\f1bb"; } +.bi-box-arrow-in-down::before { content: "\f1bc"; } +.bi-box-arrow-in-left::before { content: "\f1bd"; } +.bi-box-arrow-in-right::before { content: "\f1be"; } +.bi-box-arrow-in-up-left::before { content: "\f1bf"; } +.bi-box-arrow-in-up-right::before { content: "\f1c0"; } +.bi-box-arrow-in-up::before { content: "\f1c1"; } +.bi-box-arrow-left::before { content: "\f1c2"; } +.bi-box-arrow-right::before { content: "\f1c3"; } +.bi-box-arrow-up-left::before { content: "\f1c4"; } +.bi-box-arrow-up-right::before { content: "\f1c5"; } +.bi-box-arrow-up::before { content: "\f1c6"; } +.bi-box-seam::before { content: "\f1c7"; } +.bi-box::before { content: "\f1c8"; } +.bi-braces::before { content: "\f1c9"; } +.bi-bricks::before { content: "\f1ca"; } +.bi-briefcase-fill::before { content: "\f1cb"; } +.bi-briefcase::before { content: "\f1cc"; } +.bi-brightness-alt-high-fill::before { content: "\f1cd"; } +.bi-brightness-alt-high::before { content: "\f1ce"; } +.bi-brightness-alt-low-fill::before { content: "\f1cf"; } +.bi-brightness-alt-low::before { content: "\f1d0"; } +.bi-brightness-high-fill::before { content: "\f1d1"; } +.bi-brightness-high::before { content: "\f1d2"; } +.bi-brightness-low-fill::before { content: "\f1d3"; } +.bi-brightness-low::before { content: "\f1d4"; } +.bi-broadcast-pin::before { content: "\f1d5"; } +.bi-broadcast::before { content: "\f1d6"; } +.bi-brush-fill::before { content: "\f1d7"; } +.bi-brush::before { content: "\f1d8"; } +.bi-bucket-fill::before { content: "\f1d9"; } +.bi-bucket::before { content: "\f1da"; } +.bi-bug-fill::before { content: "\f1db"; } +.bi-bug::before { content: "\f1dc"; } +.bi-building::before { content: "\f1dd"; } +.bi-bullseye::before { content: "\f1de"; } +.bi-calculator-fill::before { content: "\f1df"; } +.bi-calculator::before { content: "\f1e0"; } +.bi-calendar-check-fill::before { content: "\f1e1"; } +.bi-calendar-check::before { content: "\f1e2"; } +.bi-calendar-date-fill::before { content: "\f1e3"; } +.bi-calendar-date::before { content: "\f1e4"; } +.bi-calendar-day-fill::before { content: "\f1e5"; } +.bi-calendar-day::before { content: "\f1e6"; } +.bi-calendar-event-fill::before { content: "\f1e7"; } +.bi-calendar-event::before { content: "\f1e8"; } +.bi-calendar-fill::before { content: "\f1e9"; } +.bi-calendar-minus-fill::before { content: "\f1ea"; } +.bi-calendar-minus::before { content: "\f1eb"; } +.bi-calendar-month-fill::before { content: "\f1ec"; } +.bi-calendar-month::before { content: "\f1ed"; } +.bi-calendar-plus-fill::before { content: "\f1ee"; } +.bi-calendar-plus::before { content: "\f1ef"; } +.bi-calendar-range-fill::before { content: "\f1f0"; } +.bi-calendar-range::before { content: "\f1f1"; } +.bi-calendar-week-fill::before { content: "\f1f2"; } +.bi-calendar-week::before { content: "\f1f3"; } +.bi-calendar-x-fill::before { content: "\f1f4"; } +.bi-calendar-x::before { content: "\f1f5"; } +.bi-calendar::before { content: "\f1f6"; } +.bi-calendar2-check-fill::before { content: "\f1f7"; } +.bi-calendar2-check::before { content: "\f1f8"; } +.bi-calendar2-date-fill::before { content: "\f1f9"; } +.bi-calendar2-date::before { content: "\f1fa"; } +.bi-calendar2-day-fill::before { content: "\f1fb"; } +.bi-calendar2-day::before { content: "\f1fc"; } +.bi-calendar2-event-fill::before { content: "\f1fd"; } +.bi-calendar2-event::before { content: "\f1fe"; } +.bi-calendar2-fill::before { content: "\f1ff"; } +.bi-calendar2-minus-fill::before { content: "\f200"; } +.bi-calendar2-minus::before { content: "\f201"; } +.bi-calendar2-month-fill::before { content: "\f202"; } +.bi-calendar2-month::before { content: "\f203"; } +.bi-calendar2-plus-fill::before { content: "\f204"; } +.bi-calendar2-plus::before { content: "\f205"; } +.bi-calendar2-range-fill::before { content: "\f206"; } +.bi-calendar2-range::before { content: "\f207"; } +.bi-calendar2-week-fill::before { content: "\f208"; } +.bi-calendar2-week::before { content: "\f209"; } +.bi-calendar2-x-fill::before { content: "\f20a"; } +.bi-calendar2-x::before { content: "\f20b"; } +.bi-calendar2::before { content: "\f20c"; } +.bi-calendar3-event-fill::before { content: "\f20d"; } +.bi-calendar3-event::before { content: "\f20e"; } +.bi-calendar3-fill::before { content: "\f20f"; } +.bi-calendar3-range-fill::before { content: "\f210"; } +.bi-calendar3-range::before { content: "\f211"; } +.bi-calendar3-week-fill::before { content: "\f212"; } +.bi-calendar3-week::before { content: "\f213"; } +.bi-calendar3::before { content: "\f214"; } +.bi-calendar4-event::before { content: "\f215"; } +.bi-calendar4-range::before { content: "\f216"; } +.bi-calendar4-week::before { content: "\f217"; } +.bi-calendar4::before { content: "\f218"; } +.bi-camera-fill::before { content: "\f219"; } +.bi-camera-reels-fill::before { content: "\f21a"; } +.bi-camera-reels::before { content: "\f21b"; } +.bi-camera-video-fill::before { content: "\f21c"; } +.bi-camera-video-off-fill::before { content: "\f21d"; } +.bi-camera-video-off::before { content: "\f21e"; } +.bi-camera-video::before { content: "\f21f"; } +.bi-camera::before { content: "\f220"; } +.bi-camera2::before { content: "\f221"; } +.bi-capslock-fill::before { content: "\f222"; } +.bi-capslock::before { content: "\f223"; } +.bi-card-checklist::before { content: "\f224"; } +.bi-card-heading::before { content: "\f225"; } +.bi-card-image::before { content: "\f226"; } +.bi-card-list::before { content: "\f227"; } +.bi-card-text::before { content: "\f228"; } +.bi-caret-down-fill::before { content: "\f229"; } +.bi-caret-down-square-fill::before { content: "\f22a"; } +.bi-caret-down-square::before { content: "\f22b"; } +.bi-caret-down::before { content: "\f22c"; } +.bi-caret-left-fill::before { content: "\f22d"; } +.bi-caret-left-square-fill::before { content: "\f22e"; } +.bi-caret-left-square::before { content: "\f22f"; } +.bi-caret-left::before { content: "\f230"; } +.bi-caret-right-fill::before { content: "\f231"; } +.bi-caret-right-square-fill::before { content: "\f232"; } +.bi-caret-right-square::before { content: "\f233"; } +.bi-caret-right::before { content: "\f234"; } +.bi-caret-up-fill::before { content: "\f235"; } +.bi-caret-up-square-fill::before { content: "\f236"; } +.bi-caret-up-square::before { content: "\f237"; } +.bi-caret-up::before { content: "\f238"; } +.bi-cart-check-fill::before { content: "\f239"; } +.bi-cart-check::before { content: "\f23a"; } +.bi-cart-dash-fill::before { content: "\f23b"; } +.bi-cart-dash::before { content: "\f23c"; } +.bi-cart-fill::before { content: "\f23d"; } +.bi-cart-plus-fill::before { content: "\f23e"; } +.bi-cart-plus::before { content: "\f23f"; } +.bi-cart-x-fill::before { content: "\f240"; } +.bi-cart-x::before { content: "\f241"; } +.bi-cart::before { content: "\f242"; } +.bi-cart2::before { content: "\f243"; } +.bi-cart3::before { content: "\f244"; } +.bi-cart4::before { content: "\f245"; } +.bi-cash-stack::before { content: "\f246"; } +.bi-cash::before { content: "\f247"; } +.bi-cast::before { content: "\f248"; } +.bi-chat-dots-fill::before { content: "\f249"; } +.bi-chat-dots::before { content: "\f24a"; } +.bi-chat-fill::before { content: "\f24b"; } +.bi-chat-left-dots-fill::before { content: "\f24c"; } +.bi-chat-left-dots::before { content: "\f24d"; } +.bi-chat-left-fill::before { content: "\f24e"; } +.bi-chat-left-quote-fill::before { content: "\f24f"; } +.bi-chat-left-quote::before { content: "\f250"; } +.bi-chat-left-text-fill::before { content: "\f251"; } +.bi-chat-left-text::before { content: "\f252"; } +.bi-chat-left::before { content: "\f253"; } +.bi-chat-quote-fill::before { content: "\f254"; } +.bi-chat-quote::before { content: "\f255"; } +.bi-chat-right-dots-fill::before { content: "\f256"; } +.bi-chat-right-dots::before { content: "\f257"; } +.bi-chat-right-fill::before { content: "\f258"; } +.bi-chat-right-quote-fill::before { content: "\f259"; } +.bi-chat-right-quote::before { content: "\f25a"; } +.bi-chat-right-text-fill::before { content: "\f25b"; } +.bi-chat-right-text::before { content: "\f25c"; } +.bi-chat-right::before { content: "\f25d"; } +.bi-chat-square-dots-fill::before { content: "\f25e"; } +.bi-chat-square-dots::before { content: "\f25f"; } +.bi-chat-square-fill::before { content: "\f260"; } +.bi-chat-square-quote-fill::before { content: "\f261"; } +.bi-chat-square-quote::before { content: "\f262"; } +.bi-chat-square-text-fill::before { content: "\f263"; } +.bi-chat-square-text::before { content: "\f264"; } +.bi-chat-square::before { content: "\f265"; } +.bi-chat-text-fill::before { content: "\f266"; } +.bi-chat-text::before { content: "\f267"; } +.bi-chat::before { content: "\f268"; } +.bi-check-all::before { content: "\f269"; } +.bi-check-circle-fill::before { content: "\f26a"; } +.bi-check-circle::before { content: "\f26b"; } +.bi-check-square-fill::before { content: "\f26c"; } +.bi-check-square::before { content: "\f26d"; } +.bi-check::before { content: "\f26e"; } +.bi-check2-all::before { content: "\f26f"; } +.bi-check2-circle::before { content: "\f270"; } +.bi-check2-square::before { content: "\f271"; } +.bi-check2::before { content: "\f272"; } +.bi-chevron-bar-contract::before { content: "\f273"; } +.bi-chevron-bar-down::before { content: "\f274"; } +.bi-chevron-bar-expand::before { content: "\f275"; } +.bi-chevron-bar-left::before { content: "\f276"; } +.bi-chevron-bar-right::before { content: "\f277"; } +.bi-chevron-bar-up::before { content: "\f278"; } +.bi-chevron-compact-down::before { content: "\f279"; } +.bi-chevron-compact-left::before { content: "\f27a"; } +.bi-chevron-compact-right::before { content: "\f27b"; } +.bi-chevron-compact-up::before { content: "\f27c"; } +.bi-chevron-contract::before { content: "\f27d"; } +.bi-chevron-double-down::before { content: "\f27e"; } +.bi-chevron-double-left::before { content: "\f27f"; } +.bi-chevron-double-right::before { content: "\f280"; } +.bi-chevron-double-up::before { content: "\f281"; } +.bi-chevron-down::before { content: "\f282"; } +.bi-chevron-expand::before { content: "\f283"; } +.bi-chevron-left::before { content: "\f284"; } +.bi-chevron-right::before { content: "\f285"; } +.bi-chevron-up::before { content: "\f286"; } +.bi-circle-fill::before { content: "\f287"; } +.bi-circle-half::before { content: "\f288"; } +.bi-circle-square::before { content: "\f289"; } +.bi-circle::before { content: "\f28a"; } +.bi-clipboard-check::before { content: "\f28b"; } +.bi-clipboard-data::before { content: "\f28c"; } +.bi-clipboard-minus::before { content: "\f28d"; } +.bi-clipboard-plus::before { content: "\f28e"; } +.bi-clipboard-x::before { content: "\f28f"; } +.bi-clipboard::before { content: "\f290"; } +.bi-clock-fill::before { content: "\f291"; } +.bi-clock-history::before { content: "\f292"; } +.bi-clock::before { content: "\f293"; } +.bi-cloud-arrow-down-fill::before { content: "\f294"; } +.bi-cloud-arrow-down::before { content: "\f295"; } +.bi-cloud-arrow-up-fill::before { content: "\f296"; } +.bi-cloud-arrow-up::before { content: "\f297"; } +.bi-cloud-check-fill::before { content: "\f298"; } +.bi-cloud-check::before { content: "\f299"; } +.bi-cloud-download-fill::before { content: "\f29a"; } +.bi-cloud-download::before { content: "\f29b"; } +.bi-cloud-drizzle-fill::before { content: "\f29c"; } +.bi-cloud-drizzle::before { content: "\f29d"; } +.bi-cloud-fill::before { content: "\f29e"; } +.bi-cloud-fog-fill::before { content: "\f29f"; } +.bi-cloud-fog::before { content: "\f2a0"; } +.bi-cloud-fog2-fill::before { content: "\f2a1"; } +.bi-cloud-fog2::before { content: "\f2a2"; } +.bi-cloud-hail-fill::before { content: "\f2a3"; } +.bi-cloud-hail::before { content: "\f2a4"; } +.bi-cloud-haze-1::before { content: "\f2a5"; } +.bi-cloud-haze-fill::before { content: "\f2a6"; } +.bi-cloud-haze::before { content: "\f2a7"; } +.bi-cloud-haze2-fill::before { content: "\f2a8"; } +.bi-cloud-lightning-fill::before { content: "\f2a9"; } +.bi-cloud-lightning-rain-fill::before { content: "\f2aa"; } +.bi-cloud-lightning-rain::before { content: "\f2ab"; } +.bi-cloud-lightning::before { content: "\f2ac"; } +.bi-cloud-minus-fill::before { content: "\f2ad"; } +.bi-cloud-minus::before { content: "\f2ae"; } +.bi-cloud-moon-fill::before { content: "\f2af"; } +.bi-cloud-moon::before { content: "\f2b0"; } +.bi-cloud-plus-fill::before { content: "\f2b1"; } +.bi-cloud-plus::before { content: "\f2b2"; } +.bi-cloud-rain-fill::before { content: "\f2b3"; } +.bi-cloud-rain-heavy-fill::before { content: "\f2b4"; } +.bi-cloud-rain-heavy::before { content: "\f2b5"; } +.bi-cloud-rain::before { content: "\f2b6"; } +.bi-cloud-slash-fill::before { content: "\f2b7"; } +.bi-cloud-slash::before { content: "\f2b8"; } +.bi-cloud-sleet-fill::before { content: "\f2b9"; } +.bi-cloud-sleet::before { content: "\f2ba"; } +.bi-cloud-snow-fill::before { content: "\f2bb"; } +.bi-cloud-snow::before { content: "\f2bc"; } +.bi-cloud-sun-fill::before { content: "\f2bd"; } +.bi-cloud-sun::before { content: "\f2be"; } +.bi-cloud-upload-fill::before { content: "\f2bf"; } +.bi-cloud-upload::before { content: "\f2c0"; } +.bi-cloud::before { content: "\f2c1"; } +.bi-clouds-fill::before { content: "\f2c2"; } +.bi-clouds::before { content: "\f2c3"; } +.bi-cloudy-fill::before { content: "\f2c4"; } +.bi-cloudy::before { content: "\f2c5"; } +.bi-code-slash::before { content: "\f2c6"; } +.bi-code-square::before { content: "\f2c7"; } +.bi-code::before { content: "\f2c8"; } +.bi-collection-fill::before { content: "\f2c9"; } +.bi-collection-play-fill::before { content: "\f2ca"; } +.bi-collection-play::before { content: "\f2cb"; } +.bi-collection::before { content: "\f2cc"; } +.bi-columns-gap::before { content: "\f2cd"; } +.bi-columns::before { content: "\f2ce"; } +.bi-command::before { content: "\f2cf"; } +.bi-compass-fill::before { content: "\f2d0"; } +.bi-compass::before { content: "\f2d1"; } +.bi-cone-striped::before { content: "\f2d2"; } +.bi-cone::before { content: "\f2d3"; } +.bi-controller::before { content: "\f2d4"; } +.bi-cpu-fill::before { content: "\f2d5"; } +.bi-cpu::before { content: "\f2d6"; } +.bi-credit-card-2-back-fill::before { content: "\f2d7"; } +.bi-credit-card-2-back::before { content: "\f2d8"; } +.bi-credit-card-2-front-fill::before { content: "\f2d9"; } +.bi-credit-card-2-front::before { content: "\f2da"; } +.bi-credit-card-fill::before { content: "\f2db"; } +.bi-credit-card::before { content: "\f2dc"; } +.bi-crop::before { content: "\f2dd"; } +.bi-cup-fill::before { content: "\f2de"; } +.bi-cup-straw::before { content: "\f2df"; } +.bi-cup::before { content: "\f2e0"; } +.bi-cursor-fill::before { content: "\f2e1"; } +.bi-cursor-text::before { content: "\f2e2"; } +.bi-cursor::before { content: "\f2e3"; } +.bi-dash-circle-dotted::before { content: "\f2e4"; } +.bi-dash-circle-fill::before { content: "\f2e5"; } +.bi-dash-circle::before { content: "\f2e6"; } +.bi-dash-square-dotted::before { content: "\f2e7"; } +.bi-dash-square-fill::before { content: "\f2e8"; } +.bi-dash-square::before { content: "\f2e9"; } +.bi-dash::before { content: "\f2ea"; } +.bi-diagram-2-fill::before { content: "\f2eb"; } +.bi-diagram-2::before { content: "\f2ec"; } +.bi-diagram-3-fill::before { content: "\f2ed"; } +.bi-diagram-3::before { content: "\f2ee"; } +.bi-diamond-fill::before { content: "\f2ef"; } +.bi-diamond-half::before { content: "\f2f0"; } +.bi-diamond::before { content: "\f2f1"; } +.bi-dice-1-fill::before { content: "\f2f2"; } +.bi-dice-1::before { content: "\f2f3"; } +.bi-dice-2-fill::before { content: "\f2f4"; } +.bi-dice-2::before { content: "\f2f5"; } +.bi-dice-3-fill::before { content: "\f2f6"; } +.bi-dice-3::before { content: "\f2f7"; } +.bi-dice-4-fill::before { content: "\f2f8"; } +.bi-dice-4::before { content: "\f2f9"; } +.bi-dice-5-fill::before { content: "\f2fa"; } +.bi-dice-5::before { content: "\f2fb"; } +.bi-dice-6-fill::before { content: "\f2fc"; } +.bi-dice-6::before { content: "\f2fd"; } +.bi-disc-fill::before { content: "\f2fe"; } +.bi-disc::before { content: "\f2ff"; } +.bi-discord::before { content: "\f300"; } +.bi-display-fill::before { content: "\f301"; } +.bi-display::before { content: "\f302"; } +.bi-distribute-horizontal::before { content: "\f303"; } +.bi-distribute-vertical::before { content: "\f304"; } +.bi-door-closed-fill::before { content: "\f305"; } +.bi-door-closed::before { content: "\f306"; } +.bi-door-open-fill::before { content: "\f307"; } +.bi-door-open::before { content: "\f308"; } +.bi-dot::before { content: "\f309"; } +.bi-download::before { content: "\f30a"; } +.bi-droplet-fill::before { content: "\f30b"; } +.bi-droplet-half::before { content: "\f30c"; } +.bi-droplet::before { content: "\f30d"; } +.bi-earbuds::before { content: "\f30e"; } +.bi-easel-fill::before { content: "\f30f"; } +.bi-easel::before { content: "\f310"; } +.bi-egg-fill::before { content: "\f311"; } +.bi-egg-fried::before { content: "\f312"; } +.bi-egg::before { content: "\f313"; } +.bi-eject-fill::before { content: "\f314"; } +.bi-eject::before { content: "\f315"; } +.bi-emoji-angry-fill::before { content: "\f316"; } +.bi-emoji-angry::before { content: "\f317"; } +.bi-emoji-dizzy-fill::before { content: "\f318"; } +.bi-emoji-dizzy::before { content: "\f319"; } +.bi-emoji-expressionless-fill::before { content: "\f31a"; } +.bi-emoji-expressionless::before { content: "\f31b"; } +.bi-emoji-frown-fill::before { content: "\f31c"; } +.bi-emoji-frown::before { content: "\f31d"; } +.bi-emoji-heart-eyes-fill::before { content: "\f31e"; } +.bi-emoji-heart-eyes::before { content: "\f31f"; } +.bi-emoji-laughing-fill::before { content: "\f320"; } +.bi-emoji-laughing::before { content: "\f321"; } +.bi-emoji-neutral-fill::before { content: "\f322"; } +.bi-emoji-neutral::before { content: "\f323"; } +.bi-emoji-smile-fill::before { content: "\f324"; } +.bi-emoji-smile-upside-down-fill::before { content: "\f325"; } +.bi-emoji-smile-upside-down::before { content: "\f326"; } +.bi-emoji-smile::before { content: "\f327"; } +.bi-emoji-sunglasses-fill::before { content: "\f328"; } +.bi-emoji-sunglasses::before { content: "\f329"; } +.bi-emoji-wink-fill::before { content: "\f32a"; } +.bi-emoji-wink::before { content: "\f32b"; } +.bi-envelope-fill::before { content: "\f32c"; } +.bi-envelope-open-fill::before { content: "\f32d"; } +.bi-envelope-open::before { content: "\f32e"; } +.bi-envelope::before { content: "\f32f"; } +.bi-eraser-fill::before { content: "\f330"; } +.bi-eraser::before { content: "\f331"; } +.bi-exclamation-circle-fill::before { content: "\f332"; } +.bi-exclamation-circle::before { content: "\f333"; } +.bi-exclamation-diamond-fill::before { content: "\f334"; } +.bi-exclamation-diamond::before { content: "\f335"; } +.bi-exclamation-octagon-fill::before { content: "\f336"; } +.bi-exclamation-octagon::before { content: "\f337"; } +.bi-exclamation-square-fill::before { content: "\f338"; } +.bi-exclamation-square::before { content: "\f339"; } +.bi-exclamation-triangle-fill::before { content: "\f33a"; } +.bi-exclamation-triangle::before { content: "\f33b"; } +.bi-exclamation::before { content: "\f33c"; } +.bi-exclude::before { content: "\f33d"; } +.bi-eye-fill::before { content: "\f33e"; } +.bi-eye-slash-fill::before { content: "\f33f"; } +.bi-eye-slash::before { content: "\f340"; } +.bi-eye::before { content: "\f341"; } +.bi-eyedropper::before { content: "\f342"; } +.bi-eyeglasses::before { content: "\f343"; } +.bi-facebook::before { content: "\f344"; } +.bi-file-arrow-down-fill::before { content: "\f345"; } +.bi-file-arrow-down::before { content: "\f346"; } +.bi-file-arrow-up-fill::before { content: "\f347"; } +.bi-file-arrow-up::before { content: "\f348"; } +.bi-file-bar-graph-fill::before { content: "\f349"; } +.bi-file-bar-graph::before { content: "\f34a"; } +.bi-file-binary-fill::before { content: "\f34b"; } +.bi-file-binary::before { content: "\f34c"; } +.bi-file-break-fill::before { content: "\f34d"; } +.bi-file-break::before { content: "\f34e"; } +.bi-file-check-fill::before { content: "\f34f"; } +.bi-file-check::before { content: "\f350"; } +.bi-file-code-fill::before { content: "\f351"; } +.bi-file-code::before { content: "\f352"; } +.bi-file-diff-fill::before { content: "\f353"; } +.bi-file-diff::before { content: "\f354"; } +.bi-file-earmark-arrow-down-fill::before { content: "\f355"; } +.bi-file-earmark-arrow-down::before { content: "\f356"; } +.bi-file-earmark-arrow-up-fill::before { content: "\f357"; } +.bi-file-earmark-arrow-up::before { content: "\f358"; } +.bi-file-earmark-bar-graph-fill::before { content: "\f359"; } +.bi-file-earmark-bar-graph::before { content: "\f35a"; } +.bi-file-earmark-binary-fill::before { content: "\f35b"; } +.bi-file-earmark-binary::before { content: "\f35c"; } +.bi-file-earmark-break-fill::before { content: "\f35d"; } +.bi-file-earmark-break::before { content: "\f35e"; } +.bi-file-earmark-check-fill::before { content: "\f35f"; } +.bi-file-earmark-check::before { content: "\f360"; } +.bi-file-earmark-code-fill::before { content: "\f361"; } +.bi-file-earmark-code::before { content: "\f362"; } +.bi-file-earmark-diff-fill::before { content: "\f363"; } +.bi-file-earmark-diff::before { content: "\f364"; } +.bi-file-earmark-easel-fill::before { content: "\f365"; } +.bi-file-earmark-easel::before { content: "\f366"; } +.bi-file-earmark-excel-fill::before { content: "\f367"; } +.bi-file-earmark-excel::before { content: "\f368"; } +.bi-file-earmark-fill::before { content: "\f369"; } +.bi-file-earmark-font-fill::before { content: "\f36a"; } +.bi-file-earmark-font::before { content: "\f36b"; } +.bi-file-earmark-image-fill::before { content: "\f36c"; } +.bi-file-earmark-image::before { content: "\f36d"; } +.bi-file-earmark-lock-fill::before { content: "\f36e"; } +.bi-file-earmark-lock::before { content: "\f36f"; } +.bi-file-earmark-lock2-fill::before { content: "\f370"; } +.bi-file-earmark-lock2::before { content: "\f371"; } +.bi-file-earmark-medical-fill::before { content: "\f372"; } +.bi-file-earmark-medical::before { content: "\f373"; } +.bi-file-earmark-minus-fill::before { content: "\f374"; } +.bi-file-earmark-minus::before { content: "\f375"; } +.bi-file-earmark-music-fill::before { content: "\f376"; } +.bi-file-earmark-music::before { content: "\f377"; } +.bi-file-earmark-person-fill::before { content: "\f378"; } +.bi-file-earmark-person::before { content: "\f379"; } +.bi-file-earmark-play-fill::before { content: "\f37a"; } +.bi-file-earmark-play::before { content: "\f37b"; } +.bi-file-earmark-plus-fill::before { content: "\f37c"; } +.bi-file-earmark-plus::before { content: "\f37d"; } +.bi-file-earmark-post-fill::before { content: "\f37e"; } +.bi-file-earmark-post::before { content: "\f37f"; } +.bi-file-earmark-ppt-fill::before { content: "\f380"; } +.bi-file-earmark-ppt::before { content: "\f381"; } +.bi-file-earmark-richtext-fill::before { content: "\f382"; } +.bi-file-earmark-richtext::before { content: "\f383"; } +.bi-file-earmark-ruled-fill::before { content: "\f384"; } +.bi-file-earmark-ruled::before { content: "\f385"; } +.bi-file-earmark-slides-fill::before { content: "\f386"; } +.bi-file-earmark-slides::before { content: "\f387"; } +.bi-file-earmark-spreadsheet-fill::before { content: "\f388"; } +.bi-file-earmark-spreadsheet::before { content: "\f389"; } +.bi-file-earmark-text-fill::before { content: "\f38a"; } +.bi-file-earmark-text::before { content: "\f38b"; } +.bi-file-earmark-word-fill::before { content: "\f38c"; } +.bi-file-earmark-word::before { content: "\f38d"; } +.bi-file-earmark-x-fill::before { content: "\f38e"; } +.bi-file-earmark-x::before { content: "\f38f"; } +.bi-file-earmark-zip-fill::before { content: "\f390"; } +.bi-file-earmark-zip::before { content: "\f391"; } +.bi-file-earmark::before { content: "\f392"; } +.bi-file-easel-fill::before { content: "\f393"; } +.bi-file-easel::before { content: "\f394"; } +.bi-file-excel-fill::before { content: "\f395"; } +.bi-file-excel::before { content: "\f396"; } +.bi-file-fill::before { content: "\f397"; } +.bi-file-font-fill::before { content: "\f398"; } +.bi-file-font::before { content: "\f399"; } +.bi-file-image-fill::before { content: "\f39a"; } +.bi-file-image::before { content: "\f39b"; } +.bi-file-lock-fill::before { content: "\f39c"; } +.bi-file-lock::before { content: "\f39d"; } +.bi-file-lock2-fill::before { content: "\f39e"; } +.bi-file-lock2::before { content: "\f39f"; } +.bi-file-medical-fill::before { content: "\f3a0"; } +.bi-file-medical::before { content: "\f3a1"; } +.bi-file-minus-fill::before { content: "\f3a2"; } +.bi-file-minus::before { content: "\f3a3"; } +.bi-file-music-fill::before { content: "\f3a4"; } +.bi-file-music::before { content: "\f3a5"; } +.bi-file-person-fill::before { content: "\f3a6"; } +.bi-file-person::before { content: "\f3a7"; } +.bi-file-play-fill::before { content: "\f3a8"; } +.bi-file-play::before { content: "\f3a9"; } +.bi-file-plus-fill::before { content: "\f3aa"; } +.bi-file-plus::before { content: "\f3ab"; } +.bi-file-post-fill::before { content: "\f3ac"; } +.bi-file-post::before { content: "\f3ad"; } +.bi-file-ppt-fill::before { content: "\f3ae"; } +.bi-file-ppt::before { content: "\f3af"; } +.bi-file-richtext-fill::before { content: "\f3b0"; } +.bi-file-richtext::before { content: "\f3b1"; } +.bi-file-ruled-fill::before { content: "\f3b2"; } +.bi-file-ruled::before { content: "\f3b3"; } +.bi-file-slides-fill::before { content: "\f3b4"; } +.bi-file-slides::before { content: "\f3b5"; } +.bi-file-spreadsheet-fill::before { content: "\f3b6"; } +.bi-file-spreadsheet::before { content: "\f3b7"; } +.bi-file-text-fill::before { content: "\f3b8"; } +.bi-file-text::before { content: "\f3b9"; } +.bi-file-word-fill::before { content: "\f3ba"; } +.bi-file-word::before { content: "\f3bb"; } +.bi-file-x-fill::before { content: "\f3bc"; } +.bi-file-x::before { content: "\f3bd"; } +.bi-file-zip-fill::before { content: "\f3be"; } +.bi-file-zip::before { content: "\f3bf"; } +.bi-file::before { content: "\f3c0"; } +.bi-files-alt::before { content: "\f3c1"; } +.bi-files::before { content: "\f3c2"; } +.bi-film::before { content: "\f3c3"; } +.bi-filter-circle-fill::before { content: "\f3c4"; } +.bi-filter-circle::before { content: "\f3c5"; } +.bi-filter-left::before { content: "\f3c6"; } +.bi-filter-right::before { content: "\f3c7"; } +.bi-filter-square-fill::before { content: "\f3c8"; } +.bi-filter-square::before { content: "\f3c9"; } +.bi-filter::before { content: "\f3ca"; } +.bi-flag-fill::before { content: "\f3cb"; } +.bi-flag::before { content: "\f3cc"; } +.bi-flower1::before { content: "\f3cd"; } +.bi-flower2::before { content: "\f3ce"; } +.bi-flower3::before { content: "\f3cf"; } +.bi-folder-check::before { content: "\f3d0"; } +.bi-folder-fill::before { content: "\f3d1"; } +.bi-folder-minus::before { content: "\f3d2"; } +.bi-folder-plus::before { content: "\f3d3"; } +.bi-folder-symlink-fill::before { content: "\f3d4"; } +.bi-folder-symlink::before { content: "\f3d5"; } +.bi-folder-x::before { content: "\f3d6"; } +.bi-folder::before { content: "\f3d7"; } +.bi-folder2-open::before { content: "\f3d8"; } +.bi-folder2::before { content: "\f3d9"; } +.bi-fonts::before { content: "\f3da"; } +.bi-forward-fill::before { content: "\f3db"; } +.bi-forward::before { content: "\f3dc"; } +.bi-front::before { content: "\f3dd"; } +.bi-fullscreen-exit::before { content: "\f3de"; } +.bi-fullscreen::before { content: "\f3df"; } +.bi-funnel-fill::before { content: "\f3e0"; } +.bi-funnel::before { content: "\f3e1"; } +.bi-gear-fill::before { content: "\f3e2"; } +.bi-gear-wide-connected::before { content: "\f3e3"; } +.bi-gear-wide::before { content: "\f3e4"; } +.bi-gear::before { content: "\f3e5"; } +.bi-gem::before { content: "\f3e6"; } +.bi-geo-alt-fill::before { content: "\f3e7"; } +.bi-geo-alt::before { content: "\f3e8"; } +.bi-geo-fill::before { content: "\f3e9"; } +.bi-geo::before { content: "\f3ea"; } +.bi-gift-fill::before { content: "\f3eb"; } +.bi-gift::before { content: "\f3ec"; } +.bi-github::before { content: "\f3ed"; } +.bi-globe::before { content: "\f3ee"; } +.bi-globe2::before { content: "\f3ef"; } +.bi-google::before { content: "\f3f0"; } +.bi-graph-down::before { content: "\f3f1"; } +.bi-graph-up::before { content: "\f3f2"; } +.bi-grid-1x2-fill::before { content: "\f3f3"; } +.bi-grid-1x2::before { content: "\f3f4"; } +.bi-grid-3x2-gap-fill::before { content: "\f3f5"; } +.bi-grid-3x2-gap::before { content: "\f3f6"; } +.bi-grid-3x2::before { content: "\f3f7"; } +.bi-grid-3x3-gap-fill::before { content: "\f3f8"; } +.bi-grid-3x3-gap::before { content: "\f3f9"; } +.bi-grid-3x3::before { content: "\f3fa"; } +.bi-grid-fill::before { content: "\f3fb"; } +.bi-grid::before { content: "\f3fc"; } +.bi-grip-horizontal::before { content: "\f3fd"; } +.bi-grip-vertical::before { content: "\f3fe"; } +.bi-hammer::before { content: "\f3ff"; } +.bi-hand-index-fill::before { content: "\f400"; } +.bi-hand-index-thumb-fill::before { content: "\f401"; } +.bi-hand-index-thumb::before { content: "\f402"; } +.bi-hand-index::before { content: "\f403"; } +.bi-hand-thumbs-down-fill::before { content: "\f404"; } +.bi-hand-thumbs-down::before { content: "\f405"; } +.bi-hand-thumbs-up-fill::before { content: "\f406"; } +.bi-hand-thumbs-up::before { content: "\f407"; } +.bi-handbag-fill::before { content: "\f408"; } +.bi-handbag::before { content: "\f409"; } +.bi-hash::before { content: "\f40a"; } +.bi-hdd-fill::before { content: "\f40b"; } +.bi-hdd-network-fill::before { content: "\f40c"; } +.bi-hdd-network::before { content: "\f40d"; } +.bi-hdd-rack-fill::before { content: "\f40e"; } +.bi-hdd-rack::before { content: "\f40f"; } +.bi-hdd-stack-fill::before { content: "\f410"; } +.bi-hdd-stack::before { content: "\f411"; } +.bi-hdd::before { content: "\f412"; } +.bi-headphones::before { content: "\f413"; } +.bi-headset::before { content: "\f414"; } +.bi-heart-fill::before { content: "\f415"; } +.bi-heart-half::before { content: "\f416"; } +.bi-heart::before { content: "\f417"; } +.bi-heptagon-fill::before { content: "\f418"; } +.bi-heptagon-half::before { content: "\f419"; } +.bi-heptagon::before { content: "\f41a"; } +.bi-hexagon-fill::before { content: "\f41b"; } +.bi-hexagon-half::before { content: "\f41c"; } +.bi-hexagon::before { content: "\f41d"; } +.bi-hourglass-bottom::before { content: "\f41e"; } +.bi-hourglass-split::before { content: "\f41f"; } +.bi-hourglass-top::before { content: "\f420"; } +.bi-hourglass::before { content: "\f421"; } +.bi-house-door-fill::before { content: "\f422"; } +.bi-house-door::before { content: "\f423"; } +.bi-house-fill::before { content: "\f424"; } +.bi-house::before { content: "\f425"; } +.bi-hr::before { content: "\f426"; } +.bi-hurricane::before { content: "\f427"; } +.bi-image-alt::before { content: "\f428"; } +.bi-image-fill::before { content: "\f429"; } +.bi-image::before { content: "\f42a"; } +.bi-images::before { content: "\f42b"; } +.bi-inbox-fill::before { content: "\f42c"; } +.bi-inbox::before { content: "\f42d"; } +.bi-inboxes-fill::before { content: "\f42e"; } +.bi-inboxes::before { content: "\f42f"; } +.bi-info-circle-fill::before { content: "\f430"; } +.bi-info-circle::before { content: "\f431"; } +.bi-info-square-fill::before { content: "\f432"; } +.bi-info-square::before { content: "\f433"; } +.bi-info::before { content: "\f434"; } +.bi-input-cursor-text::before { content: "\f435"; } +.bi-input-cursor::before { content: "\f436"; } +.bi-instagram::before { content: "\f437"; } +.bi-intersect::before { content: "\f438"; } +.bi-journal-album::before { content: "\f439"; } +.bi-journal-arrow-down::before { content: "\f43a"; } +.bi-journal-arrow-up::before { content: "\f43b"; } +.bi-journal-bookmark-fill::before { content: "\f43c"; } +.bi-journal-bookmark::before { content: "\f43d"; } +.bi-journal-check::before { content: "\f43e"; } +.bi-journal-code::before { content: "\f43f"; } +.bi-journal-medical::before { content: "\f440"; } +.bi-journal-minus::before { content: "\f441"; } +.bi-journal-plus::before { content: "\f442"; } +.bi-journal-richtext::before { content: "\f443"; } +.bi-journal-text::before { content: "\f444"; } +.bi-journal-x::before { content: "\f445"; } +.bi-journal::before { content: "\f446"; } +.bi-journals::before { content: "\f447"; } +.bi-joystick::before { content: "\f448"; } +.bi-justify-left::before { content: "\f449"; } +.bi-justify-right::before { content: "\f44a"; } +.bi-justify::before { content: "\f44b"; } +.bi-kanban-fill::before { content: "\f44c"; } +.bi-kanban::before { content: "\f44d"; } +.bi-key-fill::before { content: "\f44e"; } +.bi-key::before { content: "\f44f"; } +.bi-keyboard-fill::before { content: "\f450"; } +.bi-keyboard::before { content: "\f451"; } +.bi-ladder::before { content: "\f452"; } +.bi-lamp-fill::before { content: "\f453"; } +.bi-lamp::before { content: "\f454"; } +.bi-laptop-fill::before { content: "\f455"; } +.bi-laptop::before { content: "\f456"; } +.bi-layer-backward::before { content: "\f457"; } +.bi-layer-forward::before { content: "\f458"; } +.bi-layers-fill::before { content: "\f459"; } +.bi-layers-half::before { content: "\f45a"; } +.bi-layers::before { content: "\f45b"; } +.bi-layout-sidebar-inset-reverse::before { content: "\f45c"; } +.bi-layout-sidebar-inset::before { content: "\f45d"; } +.bi-layout-sidebar-reverse::before { content: "\f45e"; } +.bi-layout-sidebar::before { content: "\f45f"; } +.bi-layout-split::before { content: "\f460"; } +.bi-layout-text-sidebar-reverse::before { content: "\f461"; } +.bi-layout-text-sidebar::before { content: "\f462"; } +.bi-layout-text-window-reverse::before { content: "\f463"; } +.bi-layout-text-window::before { content: "\f464"; } +.bi-layout-three-columns::before { content: "\f465"; } +.bi-layout-wtf::before { content: "\f466"; } +.bi-life-preserver::before { content: "\f467"; } +.bi-lightbulb-fill::before { content: "\f468"; } +.bi-lightbulb-off-fill::before { content: "\f469"; } +.bi-lightbulb-off::before { content: "\f46a"; } +.bi-lightbulb::before { content: "\f46b"; } +.bi-lightning-charge-fill::before { content: "\f46c"; } +.bi-lightning-charge::before { content: "\f46d"; } +.bi-lightning-fill::before { content: "\f46e"; } +.bi-lightning::before { content: "\f46f"; } +.bi-link-45deg::before { content: "\f470"; } +.bi-link::before { content: "\f471"; } +.bi-linkedin::before { content: "\f472"; } +.bi-list-check::before { content: "\f473"; } +.bi-list-nested::before { content: "\f474"; } +.bi-list-ol::before { content: "\f475"; } +.bi-list-stars::before { content: "\f476"; } +.bi-list-task::before { content: "\f477"; } +.bi-list-ul::before { content: "\f478"; } +.bi-list::before { content: "\f479"; } +.bi-lock-fill::before { content: "\f47a"; } +.bi-lock::before { content: "\f47b"; } +.bi-mailbox::before { content: "\f47c"; } +.bi-mailbox2::before { content: "\f47d"; } +.bi-map-fill::before { content: "\f47e"; } +.bi-map::before { content: "\f47f"; } +.bi-markdown-fill::before { content: "\f480"; } +.bi-markdown::before { content: "\f481"; } +.bi-mask::before { content: "\f482"; } +.bi-megaphone-fill::before { content: "\f483"; } +.bi-megaphone::before { content: "\f484"; } +.bi-menu-app-fill::before { content: "\f485"; } +.bi-menu-app::before { content: "\f486"; } +.bi-menu-button-fill::before { content: "\f487"; } +.bi-menu-button-wide-fill::before { content: "\f488"; } +.bi-menu-button-wide::before { content: "\f489"; } +.bi-menu-button::before { content: "\f48a"; } +.bi-menu-down::before { content: "\f48b"; } +.bi-menu-up::before { content: "\f48c"; } +.bi-mic-fill::before { content: "\f48d"; } +.bi-mic-mute-fill::before { content: "\f48e"; } +.bi-mic-mute::before { content: "\f48f"; } +.bi-mic::before { content: "\f490"; } +.bi-minecart-loaded::before { content: "\f491"; } +.bi-minecart::before { content: "\f492"; } +.bi-moisture::before { content: "\f493"; } +.bi-moon-fill::before { content: "\f494"; } +.bi-moon-stars-fill::before { content: "\f495"; } +.bi-moon-stars::before { content: "\f496"; } +.bi-moon::before { content: "\f497"; } +.bi-mouse-fill::before { content: "\f498"; } +.bi-mouse::before { content: "\f499"; } +.bi-mouse2-fill::before { content: "\f49a"; } +.bi-mouse2::before { content: "\f49b"; } +.bi-mouse3-fill::before { content: "\f49c"; } +.bi-mouse3::before { content: "\f49d"; } +.bi-music-note-beamed::before { content: "\f49e"; } +.bi-music-note-list::before { content: "\f49f"; } +.bi-music-note::before { content: "\f4a0"; } +.bi-music-player-fill::before { content: "\f4a1"; } +.bi-music-player::before { content: "\f4a2"; } +.bi-newspaper::before { content: "\f4a3"; } +.bi-node-minus-fill::before { content: "\f4a4"; } +.bi-node-minus::before { content: "\f4a5"; } +.bi-node-plus-fill::before { content: "\f4a6"; } +.bi-node-plus::before { content: "\f4a7"; } +.bi-nut-fill::before { content: "\f4a8"; } +.bi-nut::before { content: "\f4a9"; } +.bi-octagon-fill::before { content: "\f4aa"; } +.bi-octagon-half::before { content: "\f4ab"; } +.bi-octagon::before { content: "\f4ac"; } +.bi-option::before { content: "\f4ad"; } +.bi-outlet::before { content: "\f4ae"; } +.bi-paint-bucket::before { content: "\f4af"; } +.bi-palette-fill::before { content: "\f4b0"; } +.bi-palette::before { content: "\f4b1"; } +.bi-palette2::before { content: "\f4b2"; } +.bi-paperclip::before { content: "\f4b3"; } +.bi-paragraph::before { content: "\f4b4"; } +.bi-patch-check-fill::before { content: "\f4b5"; } +.bi-patch-check::before { content: "\f4b6"; } +.bi-patch-exclamation-fill::before { content: "\f4b7"; } +.bi-patch-exclamation::before { content: "\f4b8"; } +.bi-patch-minus-fill::before { content: "\f4b9"; } +.bi-patch-minus::before { content: "\f4ba"; } +.bi-patch-plus-fill::before { content: "\f4bb"; } +.bi-patch-plus::before { content: "\f4bc"; } +.bi-patch-question-fill::before { content: "\f4bd"; } +.bi-patch-question::before { content: "\f4be"; } +.bi-pause-btn-fill::before { content: "\f4bf"; } +.bi-pause-btn::before { content: "\f4c0"; } +.bi-pause-circle-fill::before { content: "\f4c1"; } +.bi-pause-circle::before { content: "\f4c2"; } +.bi-pause-fill::before { content: "\f4c3"; } +.bi-pause::before { content: "\f4c4"; } +.bi-peace-fill::before { content: "\f4c5"; } +.bi-peace::before { content: "\f4c6"; } +.bi-pen-fill::before { content: "\f4c7"; } +.bi-pen::before { content: "\f4c8"; } +.bi-pencil-fill::before { content: "\f4c9"; } +.bi-pencil-square::before { content: "\f4ca"; } +.bi-pencil::before { content: "\f4cb"; } +.bi-pentagon-fill::before { content: "\f4cc"; } +.bi-pentagon-half::before { content: "\f4cd"; } +.bi-pentagon::before { content: "\f4ce"; } +.bi-people-fill::before { content: "\f4cf"; } +.bi-people::before { content: "\f4d0"; } +.bi-percent::before { content: "\f4d1"; } +.bi-person-badge-fill::before { content: "\f4d2"; } +.bi-person-badge::before { content: "\f4d3"; } +.bi-person-bounding-box::before { content: "\f4d4"; } +.bi-person-check-fill::before { content: "\f4d5"; } +.bi-person-check::before { content: "\f4d6"; } +.bi-person-circle::before { content: "\f4d7"; } +.bi-person-dash-fill::before { content: "\f4d8"; } +.bi-person-dash::before { content: "\f4d9"; } +.bi-person-fill::before { content: "\f4da"; } +.bi-person-lines-fill::before { content: "\f4db"; } +.bi-person-plus-fill::before { content: "\f4dc"; } +.bi-person-plus::before { content: "\f4dd"; } +.bi-person-square::before { content: "\f4de"; } +.bi-person-x-fill::before { content: "\f4df"; } +.bi-person-x::before { content: "\f4e0"; } +.bi-person::before { content: "\f4e1"; } +.bi-phone-fill::before { content: "\f4e2"; } +.bi-phone-landscape-fill::before { content: "\f4e3"; } +.bi-phone-landscape::before { content: "\f4e4"; } +.bi-phone-vibrate-fill::before { content: "\f4e5"; } +.bi-phone-vibrate::before { content: "\f4e6"; } +.bi-phone::before { content: "\f4e7"; } +.bi-pie-chart-fill::before { content: "\f4e8"; } +.bi-pie-chart::before { content: "\f4e9"; } +.bi-pin-angle-fill::before { content: "\f4ea"; } +.bi-pin-angle::before { content: "\f4eb"; } +.bi-pin-fill::before { content: "\f4ec"; } +.bi-pin::before { content: "\f4ed"; } +.bi-pip-fill::before { content: "\f4ee"; } +.bi-pip::before { content: "\f4ef"; } +.bi-play-btn-fill::before { content: "\f4f0"; } +.bi-play-btn::before { content: "\f4f1"; } +.bi-play-circle-fill::before { content: "\f4f2"; } +.bi-play-circle::before { content: "\f4f3"; } +.bi-play-fill::before { content: "\f4f4"; } +.bi-play::before { content: "\f4f5"; } +.bi-plug-fill::before { content: "\f4f6"; } +.bi-plug::before { content: "\f4f7"; } +.bi-plus-circle-dotted::before { content: "\f4f8"; } +.bi-plus-circle-fill::before { content: "\f4f9"; } +.bi-plus-circle::before { content: "\f4fa"; } +.bi-plus-square-dotted::before { content: "\f4fb"; } +.bi-plus-square-fill::before { content: "\f4fc"; } +.bi-plus-square::before { content: "\f4fd"; } +.bi-plus::before { content: "\f4fe"; } +.bi-power::before { content: "\f4ff"; } +.bi-printer-fill::before { content: "\f500"; } +.bi-printer::before { content: "\f501"; } +.bi-puzzle-fill::before { content: "\f502"; } +.bi-puzzle::before { content: "\f503"; } +.bi-question-circle-fill::before { content: "\f504"; } +.bi-question-circle::before { content: "\f505"; } +.bi-question-diamond-fill::before { content: "\f506"; } +.bi-question-diamond::before { content: "\f507"; } +.bi-question-octagon-fill::before { content: "\f508"; } +.bi-question-octagon::before { content: "\f509"; } +.bi-question-square-fill::before { content: "\f50a"; } +.bi-question-square::before { content: "\f50b"; } +.bi-question::before { content: "\f50c"; } +.bi-rainbow::before { content: "\f50d"; } +.bi-receipt-cutoff::before { content: "\f50e"; } +.bi-receipt::before { content: "\f50f"; } +.bi-reception-0::before { content: "\f510"; } +.bi-reception-1::before { content: "\f511"; } +.bi-reception-2::before { content: "\f512"; } +.bi-reception-3::before { content: "\f513"; } +.bi-reception-4::before { content: "\f514"; } +.bi-record-btn-fill::before { content: "\f515"; } +.bi-record-btn::before { content: "\f516"; } +.bi-record-circle-fill::before { content: "\f517"; } +.bi-record-circle::before { content: "\f518"; } +.bi-record-fill::before { content: "\f519"; } +.bi-record::before { content: "\f51a"; } +.bi-record2-fill::before { content: "\f51b"; } +.bi-record2::before { content: "\f51c"; } +.bi-reply-all-fill::before { content: "\f51d"; } +.bi-reply-all::before { content: "\f51e"; } +.bi-reply-fill::before { content: "\f51f"; } +.bi-reply::before { content: "\f520"; } +.bi-rss-fill::before { content: "\f521"; } +.bi-rss::before { content: "\f522"; } +.bi-rulers::before { content: "\f523"; } +.bi-save-fill::before { content: "\f524"; } +.bi-save::before { content: "\f525"; } +.bi-save2-fill::before { content: "\f526"; } +.bi-save2::before { content: "\f527"; } +.bi-scissors::before { content: "\f528"; } +.bi-screwdriver::before { content: "\f529"; } +.bi-search::before { content: "\f52a"; } +.bi-segmented-nav::before { content: "\f52b"; } +.bi-server::before { content: "\f52c"; } +.bi-share-fill::before { content: "\f52d"; } +.bi-share::before { content: "\f52e"; } +.bi-shield-check::before { content: "\f52f"; } +.bi-shield-exclamation::before { content: "\f530"; } +.bi-shield-fill-check::before { content: "\f531"; } +.bi-shield-fill-exclamation::before { content: "\f532"; } +.bi-shield-fill-minus::before { content: "\f533"; } +.bi-shield-fill-plus::before { content: "\f534"; } +.bi-shield-fill-x::before { content: "\f535"; } +.bi-shield-fill::before { content: "\f536"; } +.bi-shield-lock-fill::before { content: "\f537"; } +.bi-shield-lock::before { content: "\f538"; } +.bi-shield-minus::before { content: "\f539"; } +.bi-shield-plus::before { content: "\f53a"; } +.bi-shield-shaded::before { content: "\f53b"; } +.bi-shield-slash-fill::before { content: "\f53c"; } +.bi-shield-slash::before { content: "\f53d"; } +.bi-shield-x::before { content: "\f53e"; } +.bi-shield::before { content: "\f53f"; } +.bi-shift-fill::before { content: "\f540"; } +.bi-shift::before { content: "\f541"; } +.bi-shop-window::before { content: "\f542"; } +.bi-shop::before { content: "\f543"; } +.bi-shuffle::before { content: "\f544"; } +.bi-signpost-2-fill::before { content: "\f545"; } +.bi-signpost-2::before { content: "\f546"; } +.bi-signpost-fill::before { content: "\f547"; } +.bi-signpost-split-fill::before { content: "\f548"; } +.bi-signpost-split::before { content: "\f549"; } +.bi-signpost::before { content: "\f54a"; } +.bi-sim-fill::before { content: "\f54b"; } +.bi-sim::before { content: "\f54c"; } +.bi-skip-backward-btn-fill::before { content: "\f54d"; } +.bi-skip-backward-btn::before { content: "\f54e"; } +.bi-skip-backward-circle-fill::before { content: "\f54f"; } +.bi-skip-backward-circle::before { content: "\f550"; } +.bi-skip-backward-fill::before { content: "\f551"; } +.bi-skip-backward::before { content: "\f552"; } +.bi-skip-end-btn-fill::before { content: "\f553"; } +.bi-skip-end-btn::before { content: "\f554"; } +.bi-skip-end-circle-fill::before { content: "\f555"; } +.bi-skip-end-circle::before { content: "\f556"; } +.bi-skip-end-fill::before { content: "\f557"; } +.bi-skip-end::before { content: "\f558"; } +.bi-skip-forward-btn-fill::before { content: "\f559"; } +.bi-skip-forward-btn::before { content: "\f55a"; } +.bi-skip-forward-circle-fill::before { content: "\f55b"; } +.bi-skip-forward-circle::before { content: "\f55c"; } +.bi-skip-forward-fill::before { content: "\f55d"; } +.bi-skip-forward::before { content: "\f55e"; } +.bi-skip-start-btn-fill::before { content: "\f55f"; } +.bi-skip-start-btn::before { content: "\f560"; } +.bi-skip-start-circle-fill::before { content: "\f561"; } +.bi-skip-start-circle::before { content: "\f562"; } +.bi-skip-start-fill::before { content: "\f563"; } +.bi-skip-start::before { content: "\f564"; } +.bi-slack::before { content: "\f565"; } +.bi-slash-circle-fill::before { content: "\f566"; } +.bi-slash-circle::before { content: "\f567"; } +.bi-slash-square-fill::before { content: "\f568"; } +.bi-slash-square::before { content: "\f569"; } +.bi-slash::before { content: "\f56a"; } +.bi-sliders::before { content: "\f56b"; } +.bi-smartwatch::before { content: "\f56c"; } +.bi-snow::before { content: "\f56d"; } +.bi-snow2::before { content: "\f56e"; } +.bi-snow3::before { content: "\f56f"; } +.bi-sort-alpha-down-alt::before { content: "\f570"; } +.bi-sort-alpha-down::before { content: "\f571"; } +.bi-sort-alpha-up-alt::before { content: "\f572"; } +.bi-sort-alpha-up::before { content: "\f573"; } +.bi-sort-down-alt::before { content: "\f574"; } +.bi-sort-down::before { content: "\f575"; } +.bi-sort-numeric-down-alt::before { content: "\f576"; } +.bi-sort-numeric-down::before { content: "\f577"; } +.bi-sort-numeric-up-alt::before { content: "\f578"; } +.bi-sort-numeric-up::before { content: "\f579"; } +.bi-sort-up-alt::before { content: "\f57a"; } +.bi-sort-up::before { content: "\f57b"; } +.bi-soundwave::before { content: "\f57c"; } +.bi-speaker-fill::before { content: "\f57d"; } +.bi-speaker::before { content: "\f57e"; } +.bi-speedometer::before { content: "\f57f"; } +.bi-speedometer2::before { content: "\f580"; } +.bi-spellcheck::before { content: "\f581"; } +.bi-square-fill::before { content: "\f582"; } +.bi-square-half::before { content: "\f583"; } +.bi-square::before { content: "\f584"; } +.bi-stack::before { content: "\f585"; } +.bi-star-fill::before { content: "\f586"; } +.bi-star-half::before { content: "\f587"; } +.bi-star::before { content: "\f588"; } +.bi-stars::before { content: "\f589"; } +.bi-stickies-fill::before { content: "\f58a"; } +.bi-stickies::before { content: "\f58b"; } +.bi-sticky-fill::before { content: "\f58c"; } +.bi-sticky::before { content: "\f58d"; } +.bi-stop-btn-fill::before { content: "\f58e"; } +.bi-stop-btn::before { content: "\f58f"; } +.bi-stop-circle-fill::before { content: "\f590"; } +.bi-stop-circle::before { content: "\f591"; } +.bi-stop-fill::before { content: "\f592"; } +.bi-stop::before { content: "\f593"; } +.bi-stoplights-fill::before { content: "\f594"; } +.bi-stoplights::before { content: "\f595"; } +.bi-stopwatch-fill::before { content: "\f596"; } +.bi-stopwatch::before { content: "\f597"; } +.bi-subtract::before { content: "\f598"; } +.bi-suit-club-fill::before { content: "\f599"; } +.bi-suit-club::before { content: "\f59a"; } +.bi-suit-diamond-fill::before { content: "\f59b"; } +.bi-suit-diamond::before { content: "\f59c"; } +.bi-suit-heart-fill::before { content: "\f59d"; } +.bi-suit-heart::before { content: "\f59e"; } +.bi-suit-spade-fill::before { content: "\f59f"; } +.bi-suit-spade::before { content: "\f5a0"; } +.bi-sun-fill::before { content: "\f5a1"; } +.bi-sun::before { content: "\f5a2"; } +.bi-sunglasses::before { content: "\f5a3"; } +.bi-sunrise-fill::before { content: "\f5a4"; } +.bi-sunrise::before { content: "\f5a5"; } +.bi-sunset-fill::before { content: "\f5a6"; } +.bi-sunset::before { content: "\f5a7"; } +.bi-symmetry-horizontal::before { content: "\f5a8"; } +.bi-symmetry-vertical::before { content: "\f5a9"; } +.bi-table::before { content: "\f5aa"; } +.bi-tablet-fill::before { content: "\f5ab"; } +.bi-tablet-landscape-fill::before { content: "\f5ac"; } +.bi-tablet-landscape::before { content: "\f5ad"; } +.bi-tablet::before { content: "\f5ae"; } +.bi-tag-fill::before { content: "\f5af"; } +.bi-tag::before { content: "\f5b0"; } +.bi-tags-fill::before { content: "\f5b1"; } +.bi-tags::before { content: "\f5b2"; } +.bi-telegram::before { content: "\f5b3"; } +.bi-telephone-fill::before { content: "\f5b4"; } +.bi-telephone-forward-fill::before { content: "\f5b5"; } +.bi-telephone-forward::before { content: "\f5b6"; } +.bi-telephone-inbound-fill::before { content: "\f5b7"; } +.bi-telephone-inbound::before { content: "\f5b8"; } +.bi-telephone-minus-fill::before { content: "\f5b9"; } +.bi-telephone-minus::before { content: "\f5ba"; } +.bi-telephone-outbound-fill::before { content: "\f5bb"; } +.bi-telephone-outbound::before { content: "\f5bc"; } +.bi-telephone-plus-fill::before { content: "\f5bd"; } +.bi-telephone-plus::before { content: "\f5be"; } +.bi-telephone-x-fill::before { content: "\f5bf"; } +.bi-telephone-x::before { content: "\f5c0"; } +.bi-telephone::before { content: "\f5c1"; } +.bi-terminal-fill::before { content: "\f5c2"; } +.bi-terminal::before { content: "\f5c3"; } +.bi-text-center::before { content: "\f5c4"; } +.bi-text-indent-left::before { content: "\f5c5"; } +.bi-text-indent-right::before { content: "\f5c6"; } +.bi-text-left::before { content: "\f5c7"; } +.bi-text-paragraph::before { content: "\f5c8"; } +.bi-text-right::before { content: "\f5c9"; } +.bi-textarea-resize::before { content: "\f5ca"; } +.bi-textarea-t::before { content: "\f5cb"; } +.bi-textarea::before { content: "\f5cc"; } +.bi-thermometer-half::before { content: "\f5cd"; } +.bi-thermometer-high::before { content: "\f5ce"; } +.bi-thermometer-low::before { content: "\f5cf"; } +.bi-thermometer-snow::before { content: "\f5d0"; } +.bi-thermometer-sun::before { content: "\f5d1"; } +.bi-thermometer::before { content: "\f5d2"; } +.bi-three-dots-vertical::before { content: "\f5d3"; } +.bi-three-dots::before { content: "\f5d4"; } +.bi-toggle-off::before { content: "\f5d5"; } +.bi-toggle-on::before { content: "\f5d6"; } +.bi-toggle2-off::before { content: "\f5d7"; } +.bi-toggle2-on::before { content: "\f5d8"; } +.bi-toggles::before { content: "\f5d9"; } +.bi-toggles2::before { content: "\f5da"; } +.bi-tools::before { content: "\f5db"; } +.bi-tornado::before { content: "\f5dc"; } +.bi-trash-fill::before { content: "\f5dd"; } +.bi-trash::before { content: "\f5de"; } +.bi-trash2-fill::before { content: "\f5df"; } +.bi-trash2::before { content: "\f5e0"; } +.bi-tree-fill::before { content: "\f5e1"; } +.bi-tree::before { content: "\f5e2"; } +.bi-triangle-fill::before { content: "\f5e3"; } +.bi-triangle-half::before { content: "\f5e4"; } +.bi-triangle::before { content: "\f5e5"; } +.bi-trophy-fill::before { content: "\f5e6"; } +.bi-trophy::before { content: "\f5e7"; } +.bi-tropical-storm::before { content: "\f5e8"; } +.bi-truck-flatbed::before { content: "\f5e9"; } +.bi-truck::before { content: "\f5ea"; } +.bi-tsunami::before { content: "\f5eb"; } +.bi-tv-fill::before { content: "\f5ec"; } +.bi-tv::before { content: "\f5ed"; } +.bi-twitch::before { content: "\f5ee"; } +.bi-twitter::before { content: "\f5ef"; } +.bi-type-bold::before { content: "\f5f0"; } +.bi-type-h1::before { content: "\f5f1"; } +.bi-type-h2::before { content: "\f5f2"; } +.bi-type-h3::before { content: "\f5f3"; } +.bi-type-italic::before { content: "\f5f4"; } +.bi-type-strikethrough::before { content: "\f5f5"; } +.bi-type-underline::before { content: "\f5f6"; } +.bi-type::before { content: "\f5f7"; } +.bi-ui-checks-grid::before { content: "\f5f8"; } +.bi-ui-checks::before { content: "\f5f9"; } +.bi-ui-radios-grid::before { content: "\f5fa"; } +.bi-ui-radios::before { content: "\f5fb"; } +.bi-umbrella-fill::before { content: "\f5fc"; } +.bi-umbrella::before { content: "\f5fd"; } +.bi-union::before { content: "\f5fe"; } +.bi-unlock-fill::before { content: "\f5ff"; } +.bi-unlock::before { content: "\f600"; } +.bi-upc-scan::before { content: "\f601"; } +.bi-upc::before { content: "\f602"; } +.bi-upload::before { content: "\f603"; } +.bi-vector-pen::before { content: "\f604"; } +.bi-view-list::before { content: "\f605"; } +.bi-view-stacked::before { content: "\f606"; } +.bi-vinyl-fill::before { content: "\f607"; } +.bi-vinyl::before { content: "\f608"; } +.bi-voicemail::before { content: "\f609"; } +.bi-volume-down-fill::before { content: "\f60a"; } +.bi-volume-down::before { content: "\f60b"; } +.bi-volume-mute-fill::before { content: "\f60c"; } +.bi-volume-mute::before { content: "\f60d"; } +.bi-volume-off-fill::before { content: "\f60e"; } +.bi-volume-off::before { content: "\f60f"; } +.bi-volume-up-fill::before { content: "\f610"; } +.bi-volume-up::before { content: "\f611"; } +.bi-vr::before { content: "\f612"; } +.bi-wallet-fill::before { content: "\f613"; } +.bi-wallet::before { content: "\f614"; } +.bi-wallet2::before { content: "\f615"; } +.bi-watch::before { content: "\f616"; } +.bi-water::before { content: "\f617"; } +.bi-whatsapp::before { content: "\f618"; } +.bi-wifi-1::before { content: "\f619"; } +.bi-wifi-2::before { content: "\f61a"; } +.bi-wifi-off::before { content: "\f61b"; } +.bi-wifi::before { content: "\f61c"; } +.bi-wind::before { content: "\f61d"; } +.bi-window-dock::before { content: "\f61e"; } +.bi-window-sidebar::before { content: "\f61f"; } +.bi-window::before { content: "\f620"; } +.bi-wrench::before { content: "\f621"; } +.bi-x-circle-fill::before { content: "\f622"; } +.bi-x-circle::before { content: "\f623"; } +.bi-x-diamond-fill::before { content: "\f624"; } +.bi-x-diamond::before { content: "\f625"; } +.bi-x-octagon-fill::before { content: "\f626"; } +.bi-x-octagon::before { content: "\f627"; } +.bi-x-square-fill::before { content: "\f628"; } +.bi-x-square::before { content: "\f629"; } +.bi-x::before { content: "\f62a"; } +.bi-youtube::before { content: "\f62b"; } +.bi-zoom-in::before { content: "\f62c"; } +.bi-zoom-out::before { content: "\f62d"; } +.bi-bank::before { content: "\f62e"; } +.bi-bank2::before { content: "\f62f"; } +.bi-bell-slash-fill::before { content: "\f630"; } +.bi-bell-slash::before { content: "\f631"; } +.bi-cash-coin::before { content: "\f632"; } +.bi-check-lg::before { content: "\f633"; } +.bi-coin::before { content: "\f634"; } +.bi-currency-bitcoin::before { content: "\f635"; } +.bi-currency-dollar::before { content: "\f636"; } +.bi-currency-euro::before { content: "\f637"; } +.bi-currency-exchange::before { content: "\f638"; } +.bi-currency-pound::before { content: "\f639"; } +.bi-currency-yen::before { content: "\f63a"; } +.bi-dash-lg::before { content: "\f63b"; } +.bi-exclamation-lg::before { content: "\f63c"; } +.bi-file-earmark-pdf-fill::before { content: "\f63d"; } +.bi-file-earmark-pdf::before { content: "\f63e"; } +.bi-file-pdf-fill::before { content: "\f63f"; } +.bi-file-pdf::before { content: "\f640"; } +.bi-gender-ambiguous::before { content: "\f641"; } +.bi-gender-female::before { content: "\f642"; } +.bi-gender-male::before { content: "\f643"; } +.bi-gender-trans::before { content: "\f644"; } +.bi-headset-vr::before { content: "\f645"; } +.bi-info-lg::before { content: "\f646"; } +.bi-mastodon::before { content: "\f647"; } +.bi-messenger::before { content: "\f648"; } +.bi-piggy-bank-fill::before { content: "\f649"; } +.bi-piggy-bank::before { content: "\f64a"; } +.bi-pin-map-fill::before { content: "\f64b"; } +.bi-pin-map::before { content: "\f64c"; } +.bi-plus-lg::before { content: "\f64d"; } +.bi-question-lg::before { content: "\f64e"; } +.bi-recycle::before { content: "\f64f"; } +.bi-reddit::before { content: "\f650"; } +.bi-safe-fill::before { content: "\f651"; } +.bi-safe2-fill::before { content: "\f652"; } +.bi-safe2::before { content: "\f653"; } +.bi-sd-card-fill::before { content: "\f654"; } +.bi-sd-card::before { content: "\f655"; } +.bi-skype::before { content: "\f656"; } +.bi-slash-lg::before { content: "\f657"; } +.bi-translate::before { content: "\f658"; } +.bi-x-lg::before { content: "\f659"; } +.bi-safe::before { content: "\f65a"; } +.bi-apple::before { content: "\f65b"; } +.bi-microsoft::before { content: "\f65d"; } +.bi-windows::before { content: "\f65e"; } +.bi-behance::before { content: "\f65c"; } +.bi-dribbble::before { content: "\f65f"; } +.bi-line::before { content: "\f660"; } +.bi-medium::before { content: "\f661"; } +.bi-paypal::before { content: "\f662"; } +.bi-pinterest::before { content: "\f663"; } +.bi-signal::before { content: "\f664"; } +.bi-snapchat::before { content: "\f665"; } +.bi-spotify::before { content: "\f666"; } +.bi-stack-overflow::before { content: "\f667"; } +.bi-strava::before { content: "\f668"; } +.bi-wordpress::before { content: "\f669"; } +.bi-vimeo::before { content: "\f66a"; } +.bi-activity::before { content: "\f66b"; } +.bi-easel2-fill::before { content: "\f66c"; } +.bi-easel2::before { content: "\f66d"; } +.bi-easel3-fill::before { content: "\f66e"; } +.bi-easel3::before { content: "\f66f"; } +.bi-fan::before { content: "\f670"; } +.bi-fingerprint::before { content: "\f671"; } +.bi-graph-down-arrow::before { content: "\f672"; } +.bi-graph-up-arrow::before { content: "\f673"; } +.bi-hypnotize::before { content: "\f674"; } +.bi-magic::before { content: "\f675"; } +.bi-person-rolodex::before { content: "\f676"; } +.bi-person-video::before { content: "\f677"; } +.bi-person-video2::before { content: "\f678"; } +.bi-person-video3::before { content: "\f679"; } +.bi-person-workspace::before { content: "\f67a"; } +.bi-radioactive::before { content: "\f67b"; } +.bi-webcam-fill::before { content: "\f67c"; } +.bi-webcam::before { content: "\f67d"; } +.bi-yin-yang::before { content: "\f67e"; } +.bi-bandaid-fill::before { content: "\f680"; } +.bi-bandaid::before { content: "\f681"; } +.bi-bluetooth::before { content: "\f682"; } +.bi-body-text::before { content: "\f683"; } +.bi-boombox::before { content: "\f684"; } +.bi-boxes::before { content: "\f685"; } +.bi-dpad-fill::before { content: "\f686"; } +.bi-dpad::before { content: "\f687"; } +.bi-ear-fill::before { content: "\f688"; } +.bi-ear::before { content: "\f689"; } +.bi-envelope-check-1::before { content: "\f68a"; } +.bi-envelope-check-fill::before { content: "\f68b"; } +.bi-envelope-check::before { content: "\f68c"; } +.bi-envelope-dash-1::before { content: "\f68d"; } +.bi-envelope-dash-fill::before { content: "\f68e"; } +.bi-envelope-dash::before { content: "\f68f"; } +.bi-envelope-exclamation-1::before { content: "\f690"; } +.bi-envelope-exclamation-fill::before { content: "\f691"; } +.bi-envelope-exclamation::before { content: "\f692"; } +.bi-envelope-plus-fill::before { content: "\f693"; } +.bi-envelope-plus::before { content: "\f694"; } +.bi-envelope-slash-1::before { content: "\f695"; } +.bi-envelope-slash-fill::before { content: "\f696"; } +.bi-envelope-slash::before { content: "\f697"; } +.bi-envelope-x-1::before { content: "\f698"; } +.bi-envelope-x-fill::before { content: "\f699"; } +.bi-envelope-x::before { content: "\f69a"; } +.bi-explicit-fill::before { content: "\f69b"; } +.bi-explicit::before { content: "\f69c"; } +.bi-git::before { content: "\f69d"; } +.bi-infinity::before { content: "\f69e"; } +.bi-list-columns-reverse::before { content: "\f69f"; } +.bi-list-columns::before { content: "\f6a0"; } +.bi-meta::before { content: "\f6a1"; } +.bi-mortorboard-fill::before { content: "\f6a2"; } +.bi-mortorboard::before { content: "\f6a3"; } +.bi-nintendo-switch::before { content: "\f6a4"; } +.bi-pc-display-horizontal::before { content: "\f6a5"; } +.bi-pc-display::before { content: "\f6a6"; } +.bi-pc-horizontal::before { content: "\f6a7"; } +.bi-pc::before { content: "\f6a8"; } +.bi-playstation::before { content: "\f6a9"; } +.bi-plus-slash-minus::before { content: "\f6aa"; } +.bi-projector-fill::before { content: "\f6ab"; } +.bi-projector::before { content: "\f6ac"; } +.bi-qr-code-scan::before { content: "\f6ad"; } +.bi-qr-code::before { content: "\f6ae"; } +.bi-quora::before { content: "\f6af"; } +.bi-quote::before { content: "\f6b0"; } +.bi-robot::before { content: "\f6b1"; } +.bi-send-check-fill::before { content: "\f6b2"; } +.bi-send-check::before { content: "\f6b3"; } +.bi-send-dash-fill::before { content: "\f6b4"; } +.bi-send-dash::before { content: "\f6b5"; } +.bi-send-exclamation-1::before { content: "\f6b6"; } +.bi-send-exclamation-fill::before { content: "\f6b7"; } +.bi-send-exclamation::before { content: "\f6b8"; } +.bi-send-fill::before { content: "\f6b9"; } +.bi-send-plus-fill::before { content: "\f6ba"; } +.bi-send-plus::before { content: "\f6bb"; } +.bi-send-slash-fill::before { content: "\f6bc"; } +.bi-send-slash::before { content: "\f6bd"; } +.bi-send-x-fill::before { content: "\f6be"; } +.bi-send-x::before { content: "\f6bf"; } +.bi-send::before { content: "\f6c0"; } +.bi-steam::before { content: "\f6c1"; } +.bi-terminal-dash-1::before { content: "\f6c2"; } +.bi-terminal-dash::before { content: "\f6c3"; } +.bi-terminal-plus::before { content: "\f6c4"; } +.bi-terminal-split::before { content: "\f6c5"; } +.bi-ticket-detailed-fill::before { content: "\f6c6"; } +.bi-ticket-detailed::before { content: "\f6c7"; } +.bi-ticket-fill::before { content: "\f6c8"; } +.bi-ticket-perforated-fill::before { content: "\f6c9"; } +.bi-ticket-perforated::before { content: "\f6ca"; } +.bi-ticket::before { content: "\f6cb"; } +.bi-tiktok::before { content: "\f6cc"; } +.bi-window-dash::before { content: "\f6cd"; } +.bi-window-desktop::before { content: "\f6ce"; } +.bi-window-fullscreen::before { content: "\f6cf"; } +.bi-window-plus::before { content: "\f6d0"; } +.bi-window-split::before { content: "\f6d1"; } +.bi-window-stack::before { content: "\f6d2"; } +.bi-window-x::before { content: "\f6d3"; } +.bi-xbox::before { content: "\f6d4"; } +.bi-ethernet::before { content: "\f6d5"; } +.bi-hdmi-fill::before { content: "\f6d6"; } +.bi-hdmi::before { content: "\f6d7"; } +.bi-usb-c-fill::before { content: "\f6d8"; } +.bi-usb-c::before { content: "\f6d9"; } +.bi-usb-fill::before { content: "\f6da"; } +.bi-usb-plug-fill::before { content: "\f6db"; } +.bi-usb-plug::before { content: "\f6dc"; } +.bi-usb-symbol::before { content: "\f6dd"; } +.bi-usb::before { content: "\f6de"; } +.bi-boombox-fill::before { content: "\f6df"; } +.bi-displayport-1::before { content: "\f6e0"; } +.bi-displayport::before { content: "\f6e1"; } +.bi-gpu-card::before { content: "\f6e2"; } +.bi-memory::before { content: "\f6e3"; } +.bi-modem-fill::before { content: "\f6e4"; } +.bi-modem::before { content: "\f6e5"; } +.bi-motherboard-fill::before { content: "\f6e6"; } +.bi-motherboard::before { content: "\f6e7"; } +.bi-optical-audio-fill::before { content: "\f6e8"; } +.bi-optical-audio::before { content: "\f6e9"; } +.bi-pci-card::before { content: "\f6ea"; } +.bi-router-fill::before { content: "\f6eb"; } +.bi-router::before { content: "\f6ec"; } +.bi-ssd-fill::before { content: "\f6ed"; } +.bi-ssd::before { content: "\f6ee"; } +.bi-thunderbolt-fill::before { content: "\f6ef"; } +.bi-thunderbolt::before { content: "\f6f0"; } +.bi-usb-drive-fill::before { content: "\f6f1"; } +.bi-usb-drive::before { content: "\f6f2"; } +.bi-usb-micro-fill::before { content: "\f6f3"; } +.bi-usb-micro::before { content: "\f6f4"; } +.bi-usb-mini-fill::before { content: "\f6f5"; } +.bi-usb-mini::before { content: "\f6f6"; } +.bi-cloud-haze2::before { content: "\f6f7"; } +.bi-device-hdd-fill::before { content: "\f6f8"; } +.bi-device-hdd::before { content: "\f6f9"; } +.bi-device-ssd-fill::before { content: "\f6fa"; } +.bi-device-ssd::before { content: "\f6fb"; } +.bi-displayport-fill::before { content: "\f6fc"; } +.bi-mortarboard-fill::before { content: "\f6fd"; } +.bi-mortarboard::before { content: "\f6fe"; } +.bi-terminal-x::before { content: "\f6ff"; } +.bi-arrow-through-heart-fill::before { content: "\f700"; } +.bi-arrow-through-heart::before { content: "\f701"; } +.bi-badge-sd-fill::before { content: "\f702"; } +.bi-badge-sd::before { content: "\f703"; } +.bi-bag-heart-fill::before { content: "\f704"; } +.bi-bag-heart::before { content: "\f705"; } +.bi-balloon-fill::before { content: "\f706"; } +.bi-balloon-heart-fill::before { content: "\f707"; } +.bi-balloon-heart::before { content: "\f708"; } +.bi-balloon::before { content: "\f709"; } +.bi-box2-fill::before { content: "\f70a"; } +.bi-box2-heart-fill::before { content: "\f70b"; } +.bi-box2-heart::before { content: "\f70c"; } +.bi-box2::before { content: "\f70d"; } +.bi-braces-asterisk::before { content: "\f70e"; } +.bi-calendar-heart-fill::before { content: "\f70f"; } +.bi-calendar-heart::before { content: "\f710"; } +.bi-calendar2-heart-fill::before { content: "\f711"; } +.bi-calendar2-heart::before { content: "\f712"; } +.bi-chat-heart-fill::before { content: "\f713"; } +.bi-chat-heart::before { content: "\f714"; } +.bi-chat-left-heart-fill::before { content: "\f715"; } +.bi-chat-left-heart::before { content: "\f716"; } +.bi-chat-right-heart-fill::before { content: "\f717"; } +.bi-chat-right-heart::before { content: "\f718"; } +.bi-chat-square-heart-fill::before { content: "\f719"; } +.bi-chat-square-heart::before { content: "\f71a"; } +.bi-clipboard-check-fill::before { content: "\f71b"; } +.bi-clipboard-data-fill::before { content: "\f71c"; } +.bi-clipboard-fill::before { content: "\f71d"; } +.bi-clipboard-heart-fill::before { content: "\f71e"; } +.bi-clipboard-heart::before { content: "\f71f"; } +.bi-clipboard-minus-fill::before { content: "\f720"; } +.bi-clipboard-plus-fill::before { content: "\f721"; } +.bi-clipboard-pulse::before { content: "\f722"; } +.bi-clipboard-x-fill::before { content: "\f723"; } +.bi-clipboard2-check-fill::before { content: "\f724"; } +.bi-clipboard2-check::before { content: "\f725"; } +.bi-clipboard2-data-fill::before { content: "\f726"; } +.bi-clipboard2-data::before { content: "\f727"; } +.bi-clipboard2-fill::before { content: "\f728"; } +.bi-clipboard2-heart-fill::before { content: "\f729"; } +.bi-clipboard2-heart::before { content: "\f72a"; } +.bi-clipboard2-minus-fill::before { content: "\f72b"; } +.bi-clipboard2-minus::before { content: "\f72c"; } +.bi-clipboard2-plus-fill::before { content: "\f72d"; } +.bi-clipboard2-plus::before { content: "\f72e"; } +.bi-clipboard2-pulse-fill::before { content: "\f72f"; } +.bi-clipboard2-pulse::before { content: "\f730"; } +.bi-clipboard2-x-fill::before { content: "\f731"; } +.bi-clipboard2-x::before { content: "\f732"; } +.bi-clipboard2::before { content: "\f733"; } +.bi-emoji-kiss-fill::before { content: "\f734"; } +.bi-emoji-kiss::before { content: "\f735"; } +.bi-envelope-heart-fill::before { content: "\f736"; } +.bi-envelope-heart::before { content: "\f737"; } +.bi-envelope-open-heart-fill::before { content: "\f738"; } +.bi-envelope-open-heart::before { content: "\f739"; } +.bi-envelope-paper-fill::before { content: "\f73a"; } +.bi-envelope-paper-heart-fill::before { content: "\f73b"; } +.bi-envelope-paper-heart::before { content: "\f73c"; } +.bi-envelope-paper::before { content: "\f73d"; } +.bi-filetype-aac::before { content: "\f73e"; } +.bi-filetype-ai::before { content: "\f73f"; } +.bi-filetype-bmp::before { content: "\f740"; } +.bi-filetype-cs::before { content: "\f741"; } +.bi-filetype-css::before { content: "\f742"; } +.bi-filetype-csv::before { content: "\f743"; } +.bi-filetype-doc::before { content: "\f744"; } +.bi-filetype-docx::before { content: "\f745"; } +.bi-filetype-exe::before { content: "\f746"; } +.bi-filetype-gif::before { content: "\f747"; } +.bi-filetype-heic::before { content: "\f748"; } +.bi-filetype-html::before { content: "\f749"; } +.bi-filetype-java::before { content: "\f74a"; } +.bi-filetype-jpg::before { content: "\f74b"; } +.bi-filetype-js::before { content: "\f74c"; } +.bi-filetype-jsx::before { content: "\f74d"; } +.bi-filetype-key::before { content: "\f74e"; } +.bi-filetype-m4p::before { content: "\f74f"; } +.bi-filetype-md::before { content: "\f750"; } +.bi-filetype-mdx::before { content: "\f751"; } +.bi-filetype-mov::before { content: "\f752"; } +.bi-filetype-mp3::before { content: "\f753"; } +.bi-filetype-mp4::before { content: "\f754"; } +.bi-filetype-otf::before { content: "\f755"; } +.bi-filetype-pdf::before { content: "\f756"; } +.bi-filetype-php::before { content: "\f757"; } +.bi-filetype-png::before { content: "\f758"; } +.bi-filetype-ppt-1::before { content: "\f759"; } +.bi-filetype-ppt::before { content: "\f75a"; } +.bi-filetype-psd::before { content: "\f75b"; } +.bi-filetype-py::before { content: "\f75c"; } +.bi-filetype-raw::before { content: "\f75d"; } +.bi-filetype-rb::before { content: "\f75e"; } +.bi-filetype-sass::before { content: "\f75f"; } +.bi-filetype-scss::before { content: "\f760"; } +.bi-filetype-sh::before { content: "\f761"; } +.bi-filetype-svg::before { content: "\f762"; } +.bi-filetype-tiff::before { content: "\f763"; } +.bi-filetype-tsx::before { content: "\f764"; } +.bi-filetype-ttf::before { content: "\f765"; } +.bi-filetype-txt::before { content: "\f766"; } +.bi-filetype-wav::before { content: "\f767"; } +.bi-filetype-woff::before { content: "\f768"; } +.bi-filetype-xls-1::before { content: "\f769"; } +.bi-filetype-xls::before { content: "\f76a"; } +.bi-filetype-xml::before { content: "\f76b"; } +.bi-filetype-yml::before { content: "\f76c"; } +.bi-heart-arrow::before { content: "\f76d"; } +.bi-heart-pulse-fill::before { content: "\f76e"; } +.bi-heart-pulse::before { content: "\f76f"; } +.bi-heartbreak-fill::before { content: "\f770"; } +.bi-heartbreak::before { content: "\f771"; } +.bi-hearts::before { content: "\f772"; } +.bi-hospital-fill::before { content: "\f773"; } +.bi-hospital::before { content: "\f774"; } +.bi-house-heart-fill::before { content: "\f775"; } +.bi-house-heart::before { content: "\f776"; } +.bi-incognito::before { content: "\f777"; } +.bi-magnet-fill::before { content: "\f778"; } +.bi-magnet::before { content: "\f779"; } +.bi-person-heart::before { content: "\f77a"; } +.bi-person-hearts::before { content: "\f77b"; } +.bi-phone-flip::before { content: "\f77c"; } +.bi-plugin::before { content: "\f77d"; } +.bi-postage-fill::before { content: "\f77e"; } +.bi-postage-heart-fill::before { content: "\f77f"; } +.bi-postage-heart::before { content: "\f780"; } +.bi-postage::before { content: "\f781"; } +.bi-postcard-fill::before { content: "\f782"; } +.bi-postcard-heart-fill::before { content: "\f783"; } +.bi-postcard-heart::before { content: "\f784"; } +.bi-postcard::before { content: "\f785"; } +.bi-search-heart-fill::before { content: "\f786"; } +.bi-search-heart::before { content: "\f787"; } +.bi-sliders2-vertical::before { content: "\f788"; } +.bi-sliders2::before { content: "\f789"; } +.bi-trash3-fill::before { content: "\f78a"; } +.bi-trash3::before { content: "\f78b"; } +.bi-valentine::before { content: "\f78c"; } +.bi-valentine2::before { content: "\f78d"; } +.bi-wrench-adjustable-circle-fill::before { content: "\f78e"; } +.bi-wrench-adjustable-circle::before { content: "\f78f"; } +.bi-wrench-adjustable::before { content: "\f790"; } +.bi-filetype-json::before { content: "\f791"; } +.bi-filetype-pptx::before { content: "\f792"; } +.bi-filetype-xlsx::before { content: "\f793"; } +.bi-1-circle-1::before { content: "\f794"; } +.bi-1-circle-fill-1::before { content: "\f795"; } +.bi-1-circle-fill::before { content: "\f796"; } +.bi-1-circle::before { content: "\f797"; } +.bi-1-square-fill::before { content: "\f798"; } +.bi-1-square::before { content: "\f799"; } +.bi-2-circle-1::before { content: "\f79a"; } +.bi-2-circle-fill-1::before { content: "\f79b"; } +.bi-2-circle-fill::before { content: "\f79c"; } +.bi-2-circle::before { content: "\f79d"; } +.bi-2-square-fill::before { content: "\f79e"; } +.bi-2-square::before { content: "\f79f"; } +.bi-3-circle-1::before { content: "\f7a0"; } +.bi-3-circle-fill-1::before { content: "\f7a1"; } +.bi-3-circle-fill::before { content: "\f7a2"; } +.bi-3-circle::before { content: "\f7a3"; } +.bi-3-square-fill::before { content: "\f7a4"; } +.bi-3-square::before { content: "\f7a5"; } +.bi-4-circle-1::before { content: "\f7a6"; } +.bi-4-circle-fill-1::before { content: "\f7a7"; } +.bi-4-circle-fill::before { content: "\f7a8"; } +.bi-4-circle::before { content: "\f7a9"; } +.bi-4-square-fill::before { content: "\f7aa"; } +.bi-4-square::before { content: "\f7ab"; } +.bi-5-circle-1::before { content: "\f7ac"; } +.bi-5-circle-fill-1::before { content: "\f7ad"; } +.bi-5-circle-fill::before { content: "\f7ae"; } +.bi-5-circle::before { content: "\f7af"; } +.bi-5-square-fill::before { content: "\f7b0"; } +.bi-5-square::before { content: "\f7b1"; } +.bi-6-circle-1::before { content: "\f7b2"; } +.bi-6-circle-fill-1::before { content: "\f7b3"; } +.bi-6-circle-fill::before { content: "\f7b4"; } +.bi-6-circle::before { content: "\f7b5"; } +.bi-6-square-fill::before { content: "\f7b6"; } +.bi-6-square::before { content: "\f7b7"; } +.bi-7-circle-1::before { content: "\f7b8"; } +.bi-7-circle-fill-1::before { content: "\f7b9"; } +.bi-7-circle-fill::before { content: "\f7ba"; } +.bi-7-circle::before { content: "\f7bb"; } +.bi-7-square-fill::before { content: "\f7bc"; } +.bi-7-square::before { content: "\f7bd"; } +.bi-8-circle-1::before { content: "\f7be"; } +.bi-8-circle-fill-1::before { content: "\f7bf"; } +.bi-8-circle-fill::before { content: "\f7c0"; } +.bi-8-circle::before { content: "\f7c1"; } +.bi-8-square-fill::before { content: "\f7c2"; } +.bi-8-square::before { content: "\f7c3"; } +.bi-9-circle-1::before { content: "\f7c4"; } +.bi-9-circle-fill-1::before { content: "\f7c5"; } +.bi-9-circle-fill::before { content: "\f7c6"; } +.bi-9-circle::before { content: "\f7c7"; } +.bi-9-square-fill::before { content: "\f7c8"; } +.bi-9-square::before { content: "\f7c9"; } +.bi-airplane-engines-fill::before { content: "\f7ca"; } +.bi-airplane-engines::before { content: "\f7cb"; } +.bi-airplane-fill::before { content: "\f7cc"; } +.bi-airplane::before { content: "\f7cd"; } +.bi-alexa::before { content: "\f7ce"; } +.bi-alipay::before { content: "\f7cf"; } +.bi-android::before { content: "\f7d0"; } +.bi-android2::before { content: "\f7d1"; } +.bi-box-fill::before { content: "\f7d2"; } +.bi-box-seam-fill::before { content: "\f7d3"; } +.bi-browser-chrome::before { content: "\f7d4"; } +.bi-browser-edge::before { content: "\f7d5"; } +.bi-browser-firefox::before { content: "\f7d6"; } +.bi-browser-safari::before { content: "\f7d7"; } +.bi-c-circle-1::before { content: "\f7d8"; } +.bi-c-circle-fill-1::before { content: "\f7d9"; } +.bi-c-circle-fill::before { content: "\f7da"; } +.bi-c-circle::before { content: "\f7db"; } +.bi-c-square-fill::before { content: "\f7dc"; } +.bi-c-square::before { content: "\f7dd"; } +.bi-capsule-pill::before { content: "\f7de"; } +.bi-capsule::before { content: "\f7df"; } +.bi-car-front-fill::before { content: "\f7e0"; } +.bi-car-front::before { content: "\f7e1"; } +.bi-cassette-fill::before { content: "\f7e2"; } +.bi-cassette::before { content: "\f7e3"; } +.bi-cc-circle-1::before { content: "\f7e4"; } +.bi-cc-circle-fill-1::before { content: "\f7e5"; } +.bi-cc-circle-fill::before { content: "\f7e6"; } +.bi-cc-circle::before { content: "\f7e7"; } +.bi-cc-square-fill::before { content: "\f7e8"; } +.bi-cc-square::before { content: "\f7e9"; } +.bi-cup-hot-fill::before { content: "\f7ea"; } +.bi-cup-hot::before { content: "\f7eb"; } +.bi-currency-rupee::before { content: "\f7ec"; } +.bi-dropbox::before { content: "\f7ed"; } +.bi-escape::before { content: "\f7ee"; } +.bi-fast-forward-btn-fill::before { content: "\f7ef"; } +.bi-fast-forward-btn::before { content: "\f7f0"; } +.bi-fast-forward-circle-fill::before { content: "\f7f1"; } +.bi-fast-forward-circle::before { content: "\f7f2"; } +.bi-fast-forward-fill::before { content: "\f7f3"; } +.bi-fast-forward::before { content: "\f7f4"; } +.bi-filetype-sql::before { content: "\f7f5"; } +.bi-fire::before { content: "\f7f6"; } +.bi-google-play::before { content: "\f7f7"; } +.bi-h-circle-1::before { content: "\f7f8"; } +.bi-h-circle-fill-1::before { content: "\f7f9"; } +.bi-h-circle-fill::before { content: "\f7fa"; } +.bi-h-circle::before { content: "\f7fb"; } +.bi-h-square-fill::before { content: "\f7fc"; } +.bi-h-square::before { content: "\f7fd"; } +.bi-indent::before { content: "\f7fe"; } +.bi-lungs-fill::before { content: "\f7ff"; } +.bi-lungs::before { content: "\f800"; } +.bi-microsoft-teams::before { content: "\f801"; } +.bi-p-circle-1::before { content: "\f802"; } +.bi-p-circle-fill-1::before { content: "\f803"; } +.bi-p-circle-fill::before { content: "\f804"; } +.bi-p-circle::before { content: "\f805"; } +.bi-p-square-fill::before { content: "\f806"; } +.bi-p-square::before { content: "\f807"; } +.bi-pass-fill::before { content: "\f808"; } +.bi-pass::before { content: "\f809"; } +.bi-prescription::before { content: "\f80a"; } +.bi-prescription2::before { content: "\f80b"; } +.bi-r-circle-1::before { content: "\f80c"; } +.bi-r-circle-fill-1::before { content: "\f80d"; } +.bi-r-circle-fill::before { content: "\f80e"; } +.bi-r-circle::before { content: "\f80f"; } +.bi-r-square-fill::before { content: "\f810"; } +.bi-r-square::before { content: "\f811"; } +.bi-repeat-1::before { content: "\f812"; } +.bi-repeat::before { content: "\f813"; } +.bi-rewind-btn-fill::before { content: "\f814"; } +.bi-rewind-btn::before { content: "\f815"; } +.bi-rewind-circle-fill::before { content: "\f816"; } +.bi-rewind-circle::before { content: "\f817"; } +.bi-rewind-fill::before { content: "\f818"; } +.bi-rewind::before { content: "\f819"; } +.bi-train-freight-front-fill::before { content: "\f81a"; } +.bi-train-freight-front::before { content: "\f81b"; } +.bi-train-front-fill::before { content: "\f81c"; } +.bi-train-front::before { content: "\f81d"; } +.bi-train-lightrail-front-fill::before { content: "\f81e"; } +.bi-train-lightrail-front::before { content: "\f81f"; } +.bi-truck-front-fill::before { content: "\f820"; } +.bi-truck-front::before { content: "\f821"; } +.bi-ubuntu::before { content: "\f822"; } +.bi-unindent::before { content: "\f823"; } +.bi-unity::before { content: "\f824"; } +.bi-universal-access-circle::before { content: "\f825"; } +.bi-universal-access::before { content: "\f826"; } +.bi-virus::before { content: "\f827"; } +.bi-virus2::before { content: "\f828"; } +.bi-wechat::before { content: "\f829"; } +.bi-yelp::before { content: "\f82a"; } +.bi-sign-stop-fill::before { content: "\f82b"; } +.bi-sign-stop-lights-fill::before { content: "\f82c"; } +.bi-sign-stop-lights::before { content: "\f82d"; } +.bi-sign-stop::before { content: "\f82e"; } +.bi-sign-turn-left-fill::before { content: "\f82f"; } +.bi-sign-turn-left::before { content: "\f830"; } +.bi-sign-turn-right-fill::before { content: "\f831"; } +.bi-sign-turn-right::before { content: "\f832"; } +.bi-sign-turn-slight-left-fill::before { content: "\f833"; } +.bi-sign-turn-slight-left::before { content: "\f834"; } +.bi-sign-turn-slight-right-fill::before { content: "\f835"; } +.bi-sign-turn-slight-right::before { content: "\f836"; } +.bi-sign-yield-fill::before { content: "\f837"; } +.bi-sign-yield::before { content: "\f838"; } +.bi-ev-station-fill::before { content: "\f839"; } +.bi-ev-station::before { content: "\f83a"; } +.bi-fuel-pump-diesel-fill::before { content: "\f83b"; } +.bi-fuel-pump-diesel::before { content: "\f83c"; } +.bi-fuel-pump-fill::before { content: "\f83d"; } +.bi-fuel-pump::before { content: "\f83e"; } +.bi-0-circle-fill::before { content: "\f83f"; } +.bi-0-circle::before { content: "\f840"; } +.bi-0-square-fill::before { content: "\f841"; } +.bi-0-square::before { content: "\f842"; } +.bi-rocket-fill::before { content: "\f843"; } +.bi-rocket-takeoff-fill::before { content: "\f844"; } +.bi-rocket-takeoff::before { content: "\f845"; } +.bi-rocket::before { content: "\f846"; } +.bi-stripe::before { content: "\f847"; } +.bi-subscript::before { content: "\f848"; } +.bi-superscript::before { content: "\f849"; } +.bi-trello::before { content: "\f84a"; } +.bi-envelope-at-fill::before { content: "\f84b"; } +.bi-envelope-at::before { content: "\f84c"; } +.bi-regex::before { content: "\f84d"; } +.bi-text-wrap::before { content: "\f84e"; } +.bi-sign-dead-end-fill::before { content: "\f84f"; } +.bi-sign-dead-end::before { content: "\f850"; } +.bi-sign-do-not-enter-fill::before { content: "\f851"; } +.bi-sign-do-not-enter::before { content: "\f852"; } +.bi-sign-intersection-fill::before { content: "\f853"; } +.bi-sign-intersection-side-fill::before { content: "\f854"; } +.bi-sign-intersection-side::before { content: "\f855"; } +.bi-sign-intersection-t-fill::before { content: "\f856"; } +.bi-sign-intersection-t::before { content: "\f857"; } +.bi-sign-intersection-y-fill::before { content: "\f858"; } +.bi-sign-intersection-y::before { content: "\f859"; } +.bi-sign-intersection::before { content: "\f85a"; } +.bi-sign-merge-left-fill::before { content: "\f85b"; } +.bi-sign-merge-left::before { content: "\f85c"; } +.bi-sign-merge-right-fill::before { content: "\f85d"; } +.bi-sign-merge-right::before { content: "\f85e"; } +.bi-sign-no-left-turn-fill::before { content: "\f85f"; } +.bi-sign-no-left-turn::before { content: "\f860"; } +.bi-sign-no-parking-fill::before { content: "\f861"; } +.bi-sign-no-parking::before { content: "\f862"; } +.bi-sign-no-right-turn-fill::before { content: "\f863"; } +.bi-sign-no-right-turn::before { content: "\f864"; } +.bi-sign-railroad-fill::before { content: "\f865"; } +.bi-sign-railroad::before { content: "\f866"; } +.bi-building-add::before { content: "\f867"; } +.bi-building-check::before { content: "\f868"; } +.bi-building-dash::before { content: "\f869"; } +.bi-building-down::before { content: "\f86a"; } +.bi-building-exclamation::before { content: "\f86b"; } +.bi-building-fill-add::before { content: "\f86c"; } +.bi-building-fill-check::before { content: "\f86d"; } +.bi-building-fill-dash::before { content: "\f86e"; } +.bi-building-fill-down::before { content: "\f86f"; } +.bi-building-fill-exclamation::before { content: "\f870"; } +.bi-building-fill-gear::before { content: "\f871"; } +.bi-building-fill-lock::before { content: "\f872"; } +.bi-building-fill-slash::before { content: "\f873"; } +.bi-building-fill-up::before { content: "\f874"; } +.bi-building-fill-x::before { content: "\f875"; } +.bi-building-fill::before { content: "\f876"; } +.bi-building-gear::before { content: "\f877"; } +.bi-building-lock::before { content: "\f878"; } +.bi-building-slash::before { content: "\f879"; } +.bi-building-up::before { content: "\f87a"; } +.bi-building-x::before { content: "\f87b"; } +.bi-buildings-fill::before { content: "\f87c"; } +.bi-buildings::before { content: "\f87d"; } +.bi-bus-front-fill::before { content: "\f87e"; } +.bi-bus-front::before { content: "\f87f"; } +.bi-ev-front-fill::before { content: "\f880"; } +.bi-ev-front::before { content: "\f881"; } +.bi-globe-americas::before { content: "\f882"; } +.bi-globe-asia-australia::before { content: "\f883"; } +.bi-globe-central-south-asia::before { content: "\f884"; } +.bi-globe-europe-africa::before { content: "\f885"; } +.bi-house-add-fill::before { content: "\f886"; } +.bi-house-add::before { content: "\f887"; } +.bi-house-check-fill::before { content: "\f888"; } +.bi-house-check::before { content: "\f889"; } +.bi-house-dash-fill::before { content: "\f88a"; } +.bi-house-dash::before { content: "\f88b"; } +.bi-house-down-fill::before { content: "\f88c"; } +.bi-house-down::before { content: "\f88d"; } +.bi-house-exclamation-fill::before { content: "\f88e"; } +.bi-house-exclamation::before { content: "\f88f"; } +.bi-house-gear-fill::before { content: "\f890"; } +.bi-house-gear::before { content: "\f891"; } +.bi-house-lock-fill::before { content: "\f892"; } +.bi-house-lock::before { content: "\f893"; } +.bi-house-slash-fill::before { content: "\f894"; } +.bi-house-slash::before { content: "\f895"; } +.bi-house-up-fill::before { content: "\f896"; } +.bi-house-up::before { content: "\f897"; } +.bi-house-x-fill::before { content: "\f898"; } +.bi-house-x::before { content: "\f899"; } +.bi-person-add::before { content: "\f89a"; } +.bi-person-down::before { content: "\f89b"; } +.bi-person-exclamation::before { content: "\f89c"; } +.bi-person-fill-add::before { content: "\f89d"; } +.bi-person-fill-check::before { content: "\f89e"; } +.bi-person-fill-dash::before { content: "\f89f"; } +.bi-person-fill-down::before { content: "\f8a0"; } +.bi-person-fill-exclamation::before { content: "\f8a1"; } +.bi-person-fill-gear::before { content: "\f8a2"; } +.bi-person-fill-lock::before { content: "\f8a3"; } +.bi-person-fill-slash::before { content: "\f8a4"; } +.bi-person-fill-up::before { content: "\f8a5"; } +.bi-person-fill-x::before { content: "\f8a6"; } +.bi-person-gear::before { content: "\f8a7"; } +.bi-person-lock::before { content: "\f8a8"; } +.bi-person-slash::before { content: "\f8a9"; } +.bi-person-up::before { content: "\f8aa"; } +.bi-scooter::before { content: "\f8ab"; } +.bi-taxi-front-fill::before { content: "\f8ac"; } +.bi-taxi-front::before { content: "\f8ad"; } +.bi-amd::before { content: "\f8ae"; } +.bi-database-add::before { content: "\f8af"; } +.bi-database-check::before { content: "\f8b0"; } +.bi-database-dash::before { content: "\f8b1"; } +.bi-database-down::before { content: "\f8b2"; } +.bi-database-exclamation::before { content: "\f8b3"; } +.bi-database-fill-add::before { content: "\f8b4"; } +.bi-database-fill-check::before { content: "\f8b5"; } +.bi-database-fill-dash::before { content: "\f8b6"; } +.bi-database-fill-down::before { content: "\f8b7"; } +.bi-database-fill-exclamation::before { content: "\f8b8"; } +.bi-database-fill-gear::before { content: "\f8b9"; } +.bi-database-fill-lock::before { content: "\f8ba"; } +.bi-database-fill-slash::before { content: "\f8bb"; } +.bi-database-fill-up::before { content: "\f8bc"; } +.bi-database-fill-x::before { content: "\f8bd"; } +.bi-database-fill::before { content: "\f8be"; } +.bi-database-gear::before { content: "\f8bf"; } +.bi-database-lock::before { content: "\f8c0"; } +.bi-database-slash::before { content: "\f8c1"; } +.bi-database-up::before { content: "\f8c2"; } +.bi-database-x::before { content: "\f8c3"; } +.bi-database::before { content: "\f8c4"; } +.bi-houses-fill::before { content: "\f8c5"; } +.bi-houses::before { content: "\f8c6"; } +.bi-nvidia::before { content: "\f8c7"; } +.bi-person-vcard-fill::before { content: "\f8c8"; } +.bi-person-vcard::before { content: "\f8c9"; } +.bi-sina-weibo::before { content: "\f8ca"; } +.bi-tencent-qq::before { content: "\f8cb"; } +.bi-wikipedia::before { content: "\f8cc"; } diff --git a/docs/site_libs/bootstrap/bootstrap-icons.woff b/docs/site_libs/bootstrap/bootstrap-icons.woff new file mode 100644 index 00000000..18d21d45 Binary files /dev/null and b/docs/site_libs/bootstrap/bootstrap-icons.woff differ diff --git a/docs/site_libs/bootstrap/bootstrap.min.css b/docs/site_libs/bootstrap/bootstrap.min.css new file mode 100644 index 00000000..872afc31 --- /dev/null +++ b/docs/site_libs/bootstrap/bootstrap.min.css @@ -0,0 +1,10 @@ +@import"https://fonts.googleapis.com/css2?family=Lato:ital,wght@0,300;0,400;0,700;1,400&family=Source+Code+Pro&display=swap";/*! + * Bootstrap v5.1.3 (https://getbootstrap.com/) + * Copyright 2011-2021 The Bootstrap Authors + * Copyright 2011-2021 Twitter, Inc. + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) + */@import"https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400;700&display=swap";:root{--bs-blue: #2780e3;--bs-indigo: #6610f2;--bs-purple: #613d7c;--bs-pink: #e83e8c;--bs-red: #ff0039;--bs-orange: #f0ad4e;--bs-yellow: #ff7518;--bs-green: #3fb618;--bs-teal: #20c997;--bs-cyan: #9954bb;--bs-white: #fff;--bs-gray: #6c757d;--bs-gray-dark: #373a3c;--bs-gray-100: #f8f9fa;--bs-gray-200: #e9ecef;--bs-gray-300: #dee2e6;--bs-gray-400: #ced4da;--bs-gray-500: #adb5bd;--bs-gray-600: #6c757d;--bs-gray-700: #495057;--bs-gray-800: #373a3c;--bs-gray-900: #212529;--bs-default: #373a3c;--bs-primary: #2780e3;--bs-secondary: #373a3c;--bs-success: #3fb618;--bs-info: #9954bb;--bs-warning: #ff7518;--bs-danger: #ff0039;--bs-light: #f8f9fa;--bs-dark: #373a3c;--bs-default-rgb: 55, 58, 60;--bs-primary-rgb: 39, 128, 227;--bs-secondary-rgb: 55, 58, 60;--bs-success-rgb: 63, 182, 24;--bs-info-rgb: 153, 84, 187;--bs-warning-rgb: 255, 117, 24;--bs-danger-rgb: 255, 0, 57;--bs-light-rgb: 248, 249, 250;--bs-dark-rgb: 55, 58, 60;--bs-white-rgb: 255, 255, 255;--bs-black-rgb: 0, 0, 0;--bs-body-color-rgb: 55, 58, 60;--bs-body-bg-rgb: 255, 255, 255;--bs-font-sans-serif: "Lato", sans-serif;--bs-font-monospace: "Source Code Pro", monospace;--bs-gradient: linear-gradient(180deg, rgba(255, 255, 255, 0.15), rgba(255, 255, 255, 0));--bs-root-font-size: 17px;--bs-body-font-family: var(--bs-font-sans-serif);--bs-body-font-size: 1rem;--bs-body-font-weight: 400;--bs-body-line-height: 1.6;--bs-body-color: #373a3c;--bs-body-bg: #fff}*,*::before,*::after{box-sizing:border-box}:root{font-size:var(--bs-root-font-size)}body{margin:0;font-family:var(--bs-body-font-family);font-size:var(--bs-body-font-size);font-weight:var(--bs-body-font-weight);line-height:var(--bs-body-line-height);color:var(--bs-body-color);text-align:var(--bs-body-text-align);background-color:var(--bs-body-bg);-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:rgba(0,0,0,0)}hr{margin:1rem 0;color:inherit;background-color:currentColor;border:0;opacity:.25}hr:not([size]){height:1px}h6,.h6,h5,.h5,h4,.h4,h3,.h3,h2,.h2,h1,.h1{margin-top:0;margin-bottom:.5rem;font-weight:400;line-height:1.2}h1,.h1{font-size:2.4em}h2,.h2{font-size:calc(1.29rem + 0.48vw)}@media(min-width: 1200px){h2,.h2{font-size:1.65rem}}h3,.h3{font-size:calc(1.27rem + 0.24vw)}@media(min-width: 1200px){h3,.h3{font-size:1.45rem}}h4,.h4{font-size:1.25rem}h5,.h5{font-size:1.1rem}h6,.h6{font-size:1rem}p{margin-top:0;margin-bottom:1rem}abbr[title],abbr[data-bs-original-title]{text-decoration:underline dotted;-webkit-text-decoration:underline dotted;-moz-text-decoration:underline dotted;-ms-text-decoration:underline dotted;-o-text-decoration:underline dotted;cursor:help;text-decoration-skip-ink:none}address{margin-bottom:1rem;font-style:normal;line-height:inherit}ol,ul{padding-left:2rem}ol,ul,dl{margin-top:0;margin-bottom:1rem}ol ol,ul ul,ol ul,ul ol{margin-bottom:0}dt{font-weight:700}dd{margin-bottom:.5rem;margin-left:0}blockquote{margin:0 0 1rem;padding:.625rem 1.25rem;border-left:.25rem solid #e9ecef}blockquote p:last-child,blockquote ul:last-child,blockquote ol:last-child{margin-bottom:0}b,strong{font-weight:bolder}small,.small{font-size:0.875em}mark,.mark{padding:.2em;background-color:#fcf8e3}sub,sup{position:relative;font-size:0.75em;line-height:0;vertical-align:baseline}sub{bottom:-0.25em}sup{top:-0.5em}a{color:#ca225e;text-decoration:underline;-webkit-text-decoration:underline;-moz-text-decoration:underline;-ms-text-decoration:underline;-o-text-decoration:underline}a:hover{color:#a21b4b}a:not([href]):not([class]),a:not([href]):not([class]):hover{color:inherit;text-decoration:none}pre,code,kbd,samp{font-family:var(--bs-font-monospace);font-size:1em;direction:ltr /* rtl:ignore */;unicode-bidi:bidi-override}pre{display:block;margin-top:0;margin-bottom:1rem;overflow:auto;font-size:0.875em;color:#000;background-color:#f7f7f7;padding:.5rem;border:1px solid #dee2e6}pre code{background-color:rgba(0,0,0,0);font-size:inherit;color:inherit;word-break:normal}code{font-size:0.875em;color:#4758ab;background-color:#f7f7f7;padding:.125rem .25rem;word-wrap:break-word}a>code{color:inherit}kbd{padding:.4rem .4rem;font-size:0.875em;color:#fff;background-color:#212529}kbd kbd{padding:0;font-size:1em;font-weight:700}figure{margin:0 0 1rem}img,svg{vertical-align:middle}table{caption-side:bottom;border-collapse:collapse}caption{padding-top:.5rem;padding-bottom:.5rem;color:#6c757d;text-align:left}th{text-align:inherit;text-align:-webkit-match-parent}thead,tbody,tfoot,tr,td,th{border-color:inherit;border-style:solid;border-width:0}label{display:inline-block}button{border-radius:0}button:focus:not(:focus-visible){outline:0}input,button,select,optgroup,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}select:disabled{opacity:1}[list]::-webkit-calendar-picker-indicator{display:none}button,[type=button],[type=reset],[type=submit]{-webkit-appearance:button}button:not(:disabled),[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled){cursor:pointer}::-moz-focus-inner{padding:0;border-style:none}textarea{resize:vertical}fieldset{min-width:0;padding:0;margin:0;border:0}legend{float:left;width:100%;padding:0;margin-bottom:.5rem;font-size:calc(1.275rem + 0.3vw);line-height:inherit}@media(min-width: 1200px){legend{font-size:1.5rem}}legend+*{clear:left}::-webkit-datetime-edit-fields-wrapper,::-webkit-datetime-edit-text,::-webkit-datetime-edit-minute,::-webkit-datetime-edit-hour-field,::-webkit-datetime-edit-day-field,::-webkit-datetime-edit-month-field,::-webkit-datetime-edit-year-field{padding:0}::-webkit-inner-spin-button{height:auto}[type=search]{outline-offset:-2px;-webkit-appearance:textfield}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-color-swatch-wrapper{padding:0}::file-selector-button{font:inherit}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}output{display:inline-block}iframe{border:0}summary{display:list-item;cursor:pointer}progress{vertical-align:baseline}[hidden]{display:none !important}.lead{font-size:1.25rem;font-weight:300}.display-1{font-size:calc(1.625rem + 4.5vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-1{font-size:5rem}}.display-2{font-size:calc(1.575rem + 3.9vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-2{font-size:4.5rem}}.display-3{font-size:calc(1.525rem + 3.3vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-3{font-size:4rem}}.display-4{font-size:calc(1.475rem + 2.7vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-4{font-size:3.5rem}}.display-5{font-size:calc(1.425rem + 2.1vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-5{font-size:3rem}}.display-6{font-size:calc(1.375rem + 1.5vw);font-weight:300;line-height:1.2}@media(min-width: 1200px){.display-6{font-size:2.5rem}}.list-unstyled{padding-left:0;list-style:none}.list-inline{padding-left:0;list-style:none}.list-inline-item{display:inline-block}.list-inline-item:not(:last-child){margin-right:.5rem}.initialism{font-size:0.875em;text-transform:uppercase}.blockquote{margin-bottom:1rem;font-size:1.25rem}.blockquote>:last-child{margin-bottom:0}.blockquote-footer{margin-top:-1rem;margin-bottom:1rem;font-size:0.875em;color:#6c757d}.blockquote-footer::before{content:"— "}.img-fluid{max-width:100%;height:auto}.img-thumbnail{padding:.25rem;background-color:#fff;border:1px solid #dee2e6;max-width:100%;height:auto}.figure{display:inline-block}.figure-img{margin-bottom:.5rem;line-height:1}.figure-caption{font-size:0.875em;color:#6c757d}.grid{display:grid;grid-template-rows:repeat(var(--bs-rows, 1), 1fr);grid-template-columns:repeat(var(--bs-columns, 12), 1fr);gap:var(--bs-gap, 1.5rem)}.grid .g-col-1{grid-column:auto/span 1}.grid .g-col-2{grid-column:auto/span 2}.grid .g-col-3{grid-column:auto/span 3}.grid .g-col-4{grid-column:auto/span 4}.grid .g-col-5{grid-column:auto/span 5}.grid .g-col-6{grid-column:auto/span 6}.grid .g-col-7{grid-column:auto/span 7}.grid .g-col-8{grid-column:auto/span 8}.grid .g-col-9{grid-column:auto/span 9}.grid .g-col-10{grid-column:auto/span 10}.grid .g-col-11{grid-column:auto/span 11}.grid .g-col-12{grid-column:auto/span 12}.grid .g-start-1{grid-column-start:1}.grid .g-start-2{grid-column-start:2}.grid .g-start-3{grid-column-start:3}.grid .g-start-4{grid-column-start:4}.grid .g-start-5{grid-column-start:5}.grid .g-start-6{grid-column-start:6}.grid .g-start-7{grid-column-start:7}.grid .g-start-8{grid-column-start:8}.grid .g-start-9{grid-column-start:9}.grid .g-start-10{grid-column-start:10}.grid .g-start-11{grid-column-start:11}@media(min-width: 576px){.grid .g-col-sm-1{grid-column:auto/span 1}.grid .g-col-sm-2{grid-column:auto/span 2}.grid .g-col-sm-3{grid-column:auto/span 3}.grid .g-col-sm-4{grid-column:auto/span 4}.grid .g-col-sm-5{grid-column:auto/span 5}.grid .g-col-sm-6{grid-column:auto/span 6}.grid .g-col-sm-7{grid-column:auto/span 7}.grid .g-col-sm-8{grid-column:auto/span 8}.grid .g-col-sm-9{grid-column:auto/span 9}.grid .g-col-sm-10{grid-column:auto/span 10}.grid .g-col-sm-11{grid-column:auto/span 11}.grid .g-col-sm-12{grid-column:auto/span 12}.grid .g-start-sm-1{grid-column-start:1}.grid .g-start-sm-2{grid-column-start:2}.grid .g-start-sm-3{grid-column-start:3}.grid .g-start-sm-4{grid-column-start:4}.grid .g-start-sm-5{grid-column-start:5}.grid .g-start-sm-6{grid-column-start:6}.grid .g-start-sm-7{grid-column-start:7}.grid .g-start-sm-8{grid-column-start:8}.grid .g-start-sm-9{grid-column-start:9}.grid .g-start-sm-10{grid-column-start:10}.grid .g-start-sm-11{grid-column-start:11}}@media(min-width: 768px){.grid .g-col-md-1{grid-column:auto/span 1}.grid .g-col-md-2{grid-column:auto/span 2}.grid .g-col-md-3{grid-column:auto/span 3}.grid .g-col-md-4{grid-column:auto/span 4}.grid .g-col-md-5{grid-column:auto/span 5}.grid .g-col-md-6{grid-column:auto/span 6}.grid .g-col-md-7{grid-column:auto/span 7}.grid .g-col-md-8{grid-column:auto/span 8}.grid .g-col-md-9{grid-column:auto/span 9}.grid .g-col-md-10{grid-column:auto/span 10}.grid .g-col-md-11{grid-column:auto/span 11}.grid .g-col-md-12{grid-column:auto/span 12}.grid .g-start-md-1{grid-column-start:1}.grid .g-start-md-2{grid-column-start:2}.grid .g-start-md-3{grid-column-start:3}.grid .g-start-md-4{grid-column-start:4}.grid .g-start-md-5{grid-column-start:5}.grid .g-start-md-6{grid-column-start:6}.grid .g-start-md-7{grid-column-start:7}.grid .g-start-md-8{grid-column-start:8}.grid .g-start-md-9{grid-column-start:9}.grid .g-start-md-10{grid-column-start:10}.grid .g-start-md-11{grid-column-start:11}}@media(min-width: 992px){.grid .g-col-lg-1{grid-column:auto/span 1}.grid .g-col-lg-2{grid-column:auto/span 2}.grid .g-col-lg-3{grid-column:auto/span 3}.grid .g-col-lg-4{grid-column:auto/span 4}.grid .g-col-lg-5{grid-column:auto/span 5}.grid .g-col-lg-6{grid-column:auto/span 6}.grid .g-col-lg-7{grid-column:auto/span 7}.grid .g-col-lg-8{grid-column:auto/span 8}.grid .g-col-lg-9{grid-column:auto/span 9}.grid .g-col-lg-10{grid-column:auto/span 10}.grid .g-col-lg-11{grid-column:auto/span 11}.grid .g-col-lg-12{grid-column:auto/span 12}.grid .g-start-lg-1{grid-column-start:1}.grid .g-start-lg-2{grid-column-start:2}.grid .g-start-lg-3{grid-column-start:3}.grid .g-start-lg-4{grid-column-start:4}.grid .g-start-lg-5{grid-column-start:5}.grid .g-start-lg-6{grid-column-start:6}.grid .g-start-lg-7{grid-column-start:7}.grid .g-start-lg-8{grid-column-start:8}.grid .g-start-lg-9{grid-column-start:9}.grid .g-start-lg-10{grid-column-start:10}.grid .g-start-lg-11{grid-column-start:11}}@media(min-width: 1200px){.grid .g-col-xl-1{grid-column:auto/span 1}.grid .g-col-xl-2{grid-column:auto/span 2}.grid .g-col-xl-3{grid-column:auto/span 3}.grid .g-col-xl-4{grid-column:auto/span 4}.grid .g-col-xl-5{grid-column:auto/span 5}.grid .g-col-xl-6{grid-column:auto/span 6}.grid .g-col-xl-7{grid-column:auto/span 7}.grid .g-col-xl-8{grid-column:auto/span 8}.grid .g-col-xl-9{grid-column:auto/span 9}.grid .g-col-xl-10{grid-column:auto/span 10}.grid .g-col-xl-11{grid-column:auto/span 11}.grid .g-col-xl-12{grid-column:auto/span 12}.grid .g-start-xl-1{grid-column-start:1}.grid .g-start-xl-2{grid-column-start:2}.grid .g-start-xl-3{grid-column-start:3}.grid .g-start-xl-4{grid-column-start:4}.grid .g-start-xl-5{grid-column-start:5}.grid .g-start-xl-6{grid-column-start:6}.grid .g-start-xl-7{grid-column-start:7}.grid .g-start-xl-8{grid-column-start:8}.grid .g-start-xl-9{grid-column-start:9}.grid .g-start-xl-10{grid-column-start:10}.grid .g-start-xl-11{grid-column-start:11}}@media(min-width: 1400px){.grid .g-col-xxl-1{grid-column:auto/span 1}.grid .g-col-xxl-2{grid-column:auto/span 2}.grid .g-col-xxl-3{grid-column:auto/span 3}.grid .g-col-xxl-4{grid-column:auto/span 4}.grid .g-col-xxl-5{grid-column:auto/span 5}.grid .g-col-xxl-6{grid-column:auto/span 6}.grid .g-col-xxl-7{grid-column:auto/span 7}.grid .g-col-xxl-8{grid-column:auto/span 8}.grid .g-col-xxl-9{grid-column:auto/span 9}.grid .g-col-xxl-10{grid-column:auto/span 10}.grid .g-col-xxl-11{grid-column:auto/span 11}.grid .g-col-xxl-12{grid-column:auto/span 12}.grid .g-start-xxl-1{grid-column-start:1}.grid .g-start-xxl-2{grid-column-start:2}.grid .g-start-xxl-3{grid-column-start:3}.grid .g-start-xxl-4{grid-column-start:4}.grid .g-start-xxl-5{grid-column-start:5}.grid .g-start-xxl-6{grid-column-start:6}.grid .g-start-xxl-7{grid-column-start:7}.grid .g-start-xxl-8{grid-column-start:8}.grid .g-start-xxl-9{grid-column-start:9}.grid .g-start-xxl-10{grid-column-start:10}.grid .g-start-xxl-11{grid-column-start:11}}.table{--bs-table-bg: transparent;--bs-table-accent-bg: transparent;--bs-table-striped-color: #373a3c;--bs-table-striped-bg: rgba(0, 0, 0, 0.05);--bs-table-active-color: #373a3c;--bs-table-active-bg: rgba(0, 0, 0, 0.1);--bs-table-hover-color: #373a3c;--bs-table-hover-bg: rgba(0, 0, 0, 0.075);width:100%;margin-bottom:1rem;color:#373a3c;vertical-align:top;border-color:#dee2e6}.table>:not(caption)>*>*{padding:.5rem .5rem;background-color:var(--bs-table-bg);border-bottom-width:1px;box-shadow:inset 0 0 0 9999px var(--bs-table-accent-bg)}.table>tbody{vertical-align:inherit}.table>thead{vertical-align:bottom}.table>:not(:first-child){border-top:2px solid #b6babc}.caption-top{caption-side:top}.table-sm>:not(caption)>*>*{padding:.25rem .25rem}.table-bordered>:not(caption)>*{border-width:1px 0}.table-bordered>:not(caption)>*>*{border-width:0 1px}.table-borderless>:not(caption)>*>*{border-bottom-width:0}.table-borderless>:not(:first-child){border-top-width:0}.table-striped>tbody>tr:nth-of-type(odd)>*{--bs-table-accent-bg: var(--bs-table-striped-bg);color:var(--bs-table-striped-color)}.table-active{--bs-table-accent-bg: var(--bs-table-active-bg);color:var(--bs-table-active-color)}.table-hover>tbody>tr:hover>*{--bs-table-accent-bg: var(--bs-table-hover-bg);color:var(--bs-table-hover-color)}.table-primary{--bs-table-bg: #d4e6f9;--bs-table-striped-bg: #c9dbed;--bs-table-striped-color: #000;--bs-table-active-bg: #bfcfe0;--bs-table-active-color: #000;--bs-table-hover-bg: #c4d5e6;--bs-table-hover-color: #000;color:#000;border-color:#bfcfe0}.table-secondary{--bs-table-bg: #d7d8d8;--bs-table-striped-bg: #cccdcd;--bs-table-striped-color: #000;--bs-table-active-bg: #c2c2c2;--bs-table-active-color: #000;--bs-table-hover-bg: #c7c8c8;--bs-table-hover-color: #000;color:#000;border-color:#c2c2c2}.table-success{--bs-table-bg: #d9f0d1;--bs-table-striped-bg: #cee4c7;--bs-table-striped-color: #000;--bs-table-active-bg: #c3d8bc;--bs-table-active-color: #000;--bs-table-hover-bg: #c9dec1;--bs-table-hover-color: #000;color:#000;border-color:#c3d8bc}.table-info{--bs-table-bg: #ebddf1;--bs-table-striped-bg: #dfd2e5;--bs-table-striped-color: #000;--bs-table-active-bg: #d4c7d9;--bs-table-active-color: #000;--bs-table-hover-bg: #d9ccdf;--bs-table-hover-color: #000;color:#000;border-color:#d4c7d9}.table-warning{--bs-table-bg: #ffe3d1;--bs-table-striped-bg: #f2d8c7;--bs-table-striped-color: #000;--bs-table-active-bg: #e6ccbc;--bs-table-active-color: #000;--bs-table-hover-bg: #ecd2c1;--bs-table-hover-color: #000;color:#000;border-color:#e6ccbc}.table-danger{--bs-table-bg: #ffccd7;--bs-table-striped-bg: #f2c2cc;--bs-table-striped-color: #000;--bs-table-active-bg: #e6b8c2;--bs-table-active-color: #000;--bs-table-hover-bg: #ecbdc7;--bs-table-hover-color: #000;color:#000;border-color:#e6b8c2}.table-light{--bs-table-bg: #f8f9fa;--bs-table-striped-bg: #ecedee;--bs-table-striped-color: #000;--bs-table-active-bg: #dfe0e1;--bs-table-active-color: #000;--bs-table-hover-bg: #e5e6e7;--bs-table-hover-color: #000;color:#000;border-color:#dfe0e1}.table-dark{--bs-table-bg: #373a3c;--bs-table-striped-bg: #414446;--bs-table-striped-color: #fff;--bs-table-active-bg: #4b4e50;--bs-table-active-color: #fff;--bs-table-hover-bg: #46494b;--bs-table-hover-color: #fff;color:#fff;border-color:#4b4e50}.table-responsive{overflow-x:auto;-webkit-overflow-scrolling:touch}@media(max-width: 575.98px){.table-responsive-sm{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 767.98px){.table-responsive-md{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 991.98px){.table-responsive-lg{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 1199.98px){.table-responsive-xl{overflow-x:auto;-webkit-overflow-scrolling:touch}}@media(max-width: 1399.98px){.table-responsive-xxl{overflow-x:auto;-webkit-overflow-scrolling:touch}}.form-label,.shiny-input-container .control-label{margin-bottom:.5rem}.col-form-label{padding-top:calc(0.375rem + 1px);padding-bottom:calc(0.375rem + 1px);margin-bottom:0;font-size:inherit;line-height:1.5}.col-form-label-lg{padding-top:calc(0.5rem + 1px);padding-bottom:calc(0.5rem + 1px);font-size:1.25rem}.col-form-label-sm{padding-top:calc(0.25rem + 1px);padding-bottom:calc(0.25rem + 1px);font-size:0.875rem}.form-text{margin-top:.25rem;font-size:0.875em;color:#6c757d}.form-control{display:block;width:100%;padding:.375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#373a3c;background-color:#fff;background-clip:padding-box;border:1px solid #ced4da;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;border-radius:0;transition:border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-control{transition:none}}.form-control[type=file]{overflow:hidden}.form-control[type=file]:not(:disabled):not([readonly]){cursor:pointer}.form-control:focus{color:#373a3c;background-color:#fff;border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-control::-webkit-date-and-time-value{height:1.5em}.form-control::placeholder{color:#6c757d;opacity:1}.form-control:disabled,.form-control[readonly]{background-color:#e9ecef;opacity:1}.form-control::file-selector-button{padding:.375rem .75rem;margin:-0.375rem -0.75rem;margin-inline-end:.75rem;color:#373a3c;background-color:#e9ecef;pointer-events:none;border-color:inherit;border-style:solid;border-width:0;border-inline-end-width:1px;border-radius:0;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-control::file-selector-button{transition:none}}.form-control:hover:not(:disabled):not([readonly])::file-selector-button{background-color:#dde0e3}.form-control::-webkit-file-upload-button{padding:.375rem .75rem;margin:-0.375rem -0.75rem;margin-inline-end:.75rem;color:#373a3c;background-color:#e9ecef;pointer-events:none;border-color:inherit;border-style:solid;border-width:0;border-inline-end-width:1px;border-radius:0;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-control::-webkit-file-upload-button{transition:none}}.form-control:hover:not(:disabled):not([readonly])::-webkit-file-upload-button{background-color:#dde0e3}.form-control-plaintext{display:block;width:100%;padding:.375rem 0;margin-bottom:0;line-height:1.5;color:#373a3c;background-color:rgba(0,0,0,0);border:solid rgba(0,0,0,0);border-width:1px 0}.form-control-plaintext.form-control-sm,.form-control-plaintext.form-control-lg{padding-right:0;padding-left:0}.form-control-sm{min-height:calc(1.5em + 0.5rem + 2px);padding:.25rem .5rem;font-size:0.875rem}.form-control-sm::file-selector-button{padding:.25rem .5rem;margin:-0.25rem -0.5rem;margin-inline-end:.5rem}.form-control-sm::-webkit-file-upload-button{padding:.25rem .5rem;margin:-0.25rem -0.5rem;margin-inline-end:.5rem}.form-control-lg{min-height:calc(1.5em + 1rem + 2px);padding:.5rem 1rem;font-size:1.25rem}.form-control-lg::file-selector-button{padding:.5rem 1rem;margin:-0.5rem -1rem;margin-inline-end:1rem}.form-control-lg::-webkit-file-upload-button{padding:.5rem 1rem;margin:-0.5rem -1rem;margin-inline-end:1rem}textarea.form-control{min-height:calc(1.5em + 0.75rem + 2px)}textarea.form-control-sm{min-height:calc(1.5em + 0.5rem + 2px)}textarea.form-control-lg{min-height:calc(1.5em + 1rem + 2px)}.form-control-color{width:3rem;height:auto;padding:.375rem}.form-control-color:not(:disabled):not([readonly]){cursor:pointer}.form-control-color::-moz-color-swatch{height:1.5em}.form-control-color::-webkit-color-swatch{height:1.5em}.form-select{display:block;width:100%;padding:.375rem 2.25rem .375rem .75rem;-moz-padding-start:calc(0.75rem - 3px);font-size:1rem;font-weight:400;line-height:1.5;color:#373a3c;background-color:#fff;background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%23373a3c' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='M2 5l6 6 6-6'/%3e%3c/svg%3e");background-repeat:no-repeat;background-position:right .75rem center;background-size:16px 12px;border:1px solid #ced4da;border-radius:0;transition:border-color .15s ease-in-out,box-shadow .15s ease-in-out;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none}@media(prefers-reduced-motion: reduce){.form-select{transition:none}}.form-select:focus{border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-select[multiple],.form-select[size]:not([size="1"]){padding-right:.75rem;background-image:none}.form-select:disabled{background-color:#e9ecef}.form-select:-moz-focusring{color:rgba(0,0,0,0);text-shadow:0 0 0 #373a3c}.form-select-sm{padding-top:.25rem;padding-bottom:.25rem;padding-left:.5rem;font-size:0.875rem}.form-select-lg{padding-top:.5rem;padding-bottom:.5rem;padding-left:1rem;font-size:1.25rem}.form-check,.shiny-input-container .checkbox,.shiny-input-container .radio{display:block;min-height:1.5rem;padding-left:0;margin-bottom:.125rem}.form-check .form-check-input,.form-check .shiny-input-container .checkbox input,.form-check .shiny-input-container .radio input,.shiny-input-container .checkbox .form-check-input,.shiny-input-container .checkbox .shiny-input-container .checkbox input,.shiny-input-container .checkbox .shiny-input-container .radio input,.shiny-input-container .radio .form-check-input,.shiny-input-container .radio .shiny-input-container .checkbox input,.shiny-input-container .radio .shiny-input-container .radio input{float:left;margin-left:0}.form-check-input,.shiny-input-container .checkbox input,.shiny-input-container .checkbox-inline input,.shiny-input-container .radio input,.shiny-input-container .radio-inline input{width:1em;height:1em;margin-top:.3em;vertical-align:top;background-color:#fff;background-repeat:no-repeat;background-position:center;background-size:contain;border:1px solid rgba(0,0,0,.25);appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none;color-adjust:exact;-webkit-print-color-adjust:exact}.form-check-input[type=radio],.shiny-input-container .checkbox input[type=radio],.shiny-input-container .checkbox-inline input[type=radio],.shiny-input-container .radio input[type=radio],.shiny-input-container .radio-inline input[type=radio]{border-radius:50%}.form-check-input:active,.shiny-input-container .checkbox input:active,.shiny-input-container .checkbox-inline input:active,.shiny-input-container .radio input:active,.shiny-input-container .radio-inline input:active{filter:brightness(90%)}.form-check-input:focus,.shiny-input-container .checkbox input:focus,.shiny-input-container .checkbox-inline input:focus,.shiny-input-container .radio input:focus,.shiny-input-container .radio-inline input:focus{border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.form-check-input:checked,.shiny-input-container .checkbox input:checked,.shiny-input-container .checkbox-inline input:checked,.shiny-input-container .radio input:checked,.shiny-input-container .radio-inline input:checked{background-color:#2780e3;border-color:#2780e3}.form-check-input:checked[type=checkbox],.shiny-input-container .checkbox input:checked[type=checkbox],.shiny-input-container .checkbox-inline input:checked[type=checkbox],.shiny-input-container .radio input:checked[type=checkbox],.shiny-input-container .radio-inline input:checked[type=checkbox]{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3e%3cpath fill='none' stroke='%23fff' stroke-linecap='round' stroke-linejoin='round' stroke-width='3' d='M6 10l3 3l6-6'/%3e%3c/svg%3e")}.form-check-input:checked[type=radio],.shiny-input-container .checkbox input:checked[type=radio],.shiny-input-container .checkbox-inline input:checked[type=radio],.shiny-input-container .radio input:checked[type=radio],.shiny-input-container .radio-inline input:checked[type=radio]{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='2' fill='%23fff'/%3e%3c/svg%3e")}.form-check-input[type=checkbox]:indeterminate,.shiny-input-container .checkbox input[type=checkbox]:indeterminate,.shiny-input-container .checkbox-inline input[type=checkbox]:indeterminate,.shiny-input-container .radio input[type=checkbox]:indeterminate,.shiny-input-container .radio-inline input[type=checkbox]:indeterminate{background-color:#2780e3;border-color:#2780e3;background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20'%3e%3cpath fill='none' stroke='%23fff' stroke-linecap='round' stroke-linejoin='round' stroke-width='3' d='M6 10h8'/%3e%3c/svg%3e")}.form-check-input:disabled,.shiny-input-container .checkbox input:disabled,.shiny-input-container .checkbox-inline input:disabled,.shiny-input-container .radio input:disabled,.shiny-input-container .radio-inline input:disabled{pointer-events:none;filter:none;opacity:.5}.form-check-input[disabled]~.form-check-label,.form-check-input[disabled]~span,.form-check-input:disabled~.form-check-label,.form-check-input:disabled~span,.shiny-input-container .checkbox input[disabled]~.form-check-label,.shiny-input-container .checkbox input[disabled]~span,.shiny-input-container .checkbox input:disabled~.form-check-label,.shiny-input-container .checkbox input:disabled~span,.shiny-input-container .checkbox-inline input[disabled]~.form-check-label,.shiny-input-container .checkbox-inline input[disabled]~span,.shiny-input-container .checkbox-inline input:disabled~.form-check-label,.shiny-input-container .checkbox-inline input:disabled~span,.shiny-input-container .radio input[disabled]~.form-check-label,.shiny-input-container .radio input[disabled]~span,.shiny-input-container .radio input:disabled~.form-check-label,.shiny-input-container .radio input:disabled~span,.shiny-input-container .radio-inline input[disabled]~.form-check-label,.shiny-input-container .radio-inline input[disabled]~span,.shiny-input-container .radio-inline input:disabled~.form-check-label,.shiny-input-container .radio-inline input:disabled~span{opacity:.5}.form-check-label,.shiny-input-container .checkbox label,.shiny-input-container .checkbox-inline label,.shiny-input-container .radio label,.shiny-input-container .radio-inline label{cursor:pointer}.form-switch{padding-left:2.5em}.form-switch .form-check-input{width:2em;margin-left:-2.5em;background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='rgba%280, 0, 0, 0.25%29'/%3e%3c/svg%3e");background-position:left center;transition:background-position .15s ease-in-out}@media(prefers-reduced-motion: reduce){.form-switch .form-check-input{transition:none}}.form-switch .form-check-input:focus{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='%2393c0f1'/%3e%3c/svg%3e")}.form-switch .form-check-input:checked{background-position:right center;background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='-4 -4 8 8'%3e%3ccircle r='3' fill='%23fff'/%3e%3c/svg%3e")}.form-check-inline,.shiny-input-container .checkbox-inline,.shiny-input-container .radio-inline{display:inline-block;margin-right:1rem}.btn-check{position:absolute;clip:rect(0, 0, 0, 0);pointer-events:none}.btn-check[disabled]+.btn,.btn-check:disabled+.btn{pointer-events:none;filter:none;opacity:.65}.form-range{width:100%;height:1.5rem;padding:0;background-color:rgba(0,0,0,0);appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none}.form-range:focus{outline:0}.form-range:focus::-webkit-slider-thumb{box-shadow:0 0 0 1px #fff,0 0 0 .25rem rgba(39,128,227,.25)}.form-range:focus::-moz-range-thumb{box-shadow:0 0 0 1px #fff,0 0 0 .25rem rgba(39,128,227,.25)}.form-range::-moz-focus-outer{border:0}.form-range::-webkit-slider-thumb{width:1rem;height:1rem;margin-top:-0.25rem;background-color:#2780e3;border:0;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none}@media(prefers-reduced-motion: reduce){.form-range::-webkit-slider-thumb{transition:none}}.form-range::-webkit-slider-thumb:active{background-color:#bed9f7}.form-range::-webkit-slider-runnable-track{width:100%;height:.5rem;color:rgba(0,0,0,0);cursor:pointer;background-color:#dee2e6;border-color:rgba(0,0,0,0)}.form-range::-moz-range-thumb{width:1rem;height:1rem;background-color:#2780e3;border:0;transition:background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out;appearance:none;-webkit-appearance:none;-moz-appearance:none;-ms-appearance:none;-o-appearance:none}@media(prefers-reduced-motion: reduce){.form-range::-moz-range-thumb{transition:none}}.form-range::-moz-range-thumb:active{background-color:#bed9f7}.form-range::-moz-range-track{width:100%;height:.5rem;color:rgba(0,0,0,0);cursor:pointer;background-color:#dee2e6;border-color:rgba(0,0,0,0)}.form-range:disabled{pointer-events:none}.form-range:disabled::-webkit-slider-thumb{background-color:#adb5bd}.form-range:disabled::-moz-range-thumb{background-color:#adb5bd}.form-floating{position:relative}.form-floating>.form-control,.form-floating>.form-select{height:calc(3.5rem + 2px);line-height:1.25}.form-floating>label{position:absolute;top:0;left:0;height:100%;padding:1rem .75rem;pointer-events:none;border:1px solid rgba(0,0,0,0);transform-origin:0 0;transition:opacity .1s ease-in-out,transform .1s ease-in-out}@media(prefers-reduced-motion: reduce){.form-floating>label{transition:none}}.form-floating>.form-control{padding:1rem .75rem}.form-floating>.form-control::placeholder{color:rgba(0,0,0,0)}.form-floating>.form-control:focus,.form-floating>.form-control:not(:placeholder-shown){padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-control:-webkit-autofill{padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-select{padding-top:1.625rem;padding-bottom:.625rem}.form-floating>.form-control:focus~label,.form-floating>.form-control:not(:placeholder-shown)~label,.form-floating>.form-select~label{opacity:.65;transform:scale(0.85) translateY(-0.5rem) translateX(0.15rem)}.form-floating>.form-control:-webkit-autofill~label{opacity:.65;transform:scale(0.85) translateY(-0.5rem) translateX(0.15rem)}.input-group{position:relative;display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;align-items:stretch;-webkit-align-items:stretch;width:100%}.input-group>.form-control,.input-group>.form-select{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto;width:1%;min-width:0}.input-group>.form-control:focus,.input-group>.form-select:focus{z-index:3}.input-group .btn{position:relative;z-index:2}.input-group .btn:focus{z-index:3}.input-group-text{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;padding:.375rem .75rem;font-size:1rem;font-weight:400;line-height:1.5;color:#373a3c;text-align:center;white-space:nowrap;background-color:#e9ecef;border:1px solid #ced4da}.input-group-lg>.form-control,.input-group-lg>.form-select,.input-group-lg>.input-group-text,.input-group-lg>.btn{padding:.5rem 1rem;font-size:1.25rem}.input-group-sm>.form-control,.input-group-sm>.form-select,.input-group-sm>.input-group-text,.input-group-sm>.btn{padding:.25rem .5rem;font-size:0.875rem}.input-group-lg>.form-select,.input-group-sm>.form-select{padding-right:3rem}.input-group>:not(:first-child):not(.dropdown-menu):not(.valid-tooltip):not(.valid-feedback):not(.invalid-tooltip):not(.invalid-feedback){margin-left:-1px}.valid-feedback{display:none;width:100%;margin-top:.25rem;font-size:0.875em;color:#3fb618}.valid-tooltip{position:absolute;top:100%;z-index:5;display:none;max-width:100%;padding:.25rem .5rem;margin-top:.1rem;font-size:0.875rem;color:#fff;background-color:rgba(63,182,24,.9)}.was-validated :valid~.valid-feedback,.was-validated :valid~.valid-tooltip,.is-valid~.valid-feedback,.is-valid~.valid-tooltip{display:block}.was-validated .form-control:valid,.form-control.is-valid{border-color:#3fb618;padding-right:calc(1.5em + 0.75rem);background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3e%3cpath fill='%233fb618' d='M2.3 6.73L.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3e%3c/svg%3e");background-repeat:no-repeat;background-position:right calc(0.375em + 0.1875rem) center;background-size:calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-control:valid:focus,.form-control.is-valid:focus{border-color:#3fb618;box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated textarea.form-control:valid,textarea.form-control.is-valid{padding-right:calc(1.5em + 0.75rem);background-position:top calc(0.375em + 0.1875rem) right calc(0.375em + 0.1875rem)}.was-validated .form-select:valid,.form-select.is-valid{border-color:#3fb618}.was-validated .form-select:valid:not([multiple]):not([size]),.was-validated .form-select:valid:not([multiple])[size="1"],.form-select.is-valid:not([multiple]):not([size]),.form-select.is-valid:not([multiple])[size="1"]{padding-right:4.125rem;background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%23373a3c' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='M2 5l6 6 6-6'/%3e%3c/svg%3e"),url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 8 8'%3e%3cpath fill='%233fb618' d='M2.3 6.73L.6 4.53c-.4-1.04.46-1.4 1.1-.8l1.1 1.4 3.4-3.8c.6-.63 1.6-.27 1.2.7l-4 4.6c-.43.5-.8.4-1.1.1z'/%3e%3c/svg%3e");background-position:right .75rem center,center right 2.25rem;background-size:16px 12px,calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-select:valid:focus,.form-select.is-valid:focus{border-color:#3fb618;box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated .form-check-input:valid,.form-check-input.is-valid{border-color:#3fb618}.was-validated .form-check-input:valid:checked,.form-check-input.is-valid:checked{background-color:#3fb618}.was-validated .form-check-input:valid:focus,.form-check-input.is-valid:focus{box-shadow:0 0 0 .25rem rgba(63,182,24,.25)}.was-validated .form-check-input:valid~.form-check-label,.form-check-input.is-valid~.form-check-label{color:#3fb618}.form-check-inline .form-check-input~.valid-feedback{margin-left:.5em}.was-validated .input-group .form-control:valid,.input-group .form-control.is-valid,.was-validated .input-group .form-select:valid,.input-group .form-select.is-valid{z-index:1}.was-validated .input-group .form-control:valid:focus,.input-group .form-control.is-valid:focus,.was-validated .input-group .form-select:valid:focus,.input-group .form-select.is-valid:focus{z-index:3}.invalid-feedback{display:none;width:100%;margin-top:.25rem;font-size:0.875em;color:#ff0039}.invalid-tooltip{position:absolute;top:100%;z-index:5;display:none;max-width:100%;padding:.25rem .5rem;margin-top:.1rem;font-size:0.875rem;color:#fff;background-color:rgba(255,0,57,.9)}.was-validated :invalid~.invalid-feedback,.was-validated :invalid~.invalid-tooltip,.is-invalid~.invalid-feedback,.is-invalid~.invalid-tooltip{display:block}.was-validated .form-control:invalid,.form-control.is-invalid{border-color:#ff0039;padding-right:calc(1.5em + 0.75rem);background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 12 12' width='12' height='12' fill='none' stroke='%23ff0039'%3e%3ccircle cx='6' cy='6' r='4.5'/%3e%3cpath stroke-linejoin='round' d='M5.8 3.6h.4L6 6.5z'/%3e%3ccircle cx='6' cy='8.2' r='.6' fill='%23ff0039' stroke='none'/%3e%3c/svg%3e");background-repeat:no-repeat;background-position:right calc(0.375em + 0.1875rem) center;background-size:calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-control:invalid:focus,.form-control.is-invalid:focus{border-color:#ff0039;box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated textarea.form-control:invalid,textarea.form-control.is-invalid{padding-right:calc(1.5em + 0.75rem);background-position:top calc(0.375em + 0.1875rem) right calc(0.375em + 0.1875rem)}.was-validated .form-select:invalid,.form-select.is-invalid{border-color:#ff0039}.was-validated .form-select:invalid:not([multiple]):not([size]),.was-validated .form-select:invalid:not([multiple])[size="1"],.form-select.is-invalid:not([multiple]):not([size]),.form-select.is-invalid:not([multiple])[size="1"]{padding-right:4.125rem;background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%23373a3c' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='M2 5l6 6 6-6'/%3e%3c/svg%3e"),url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 12 12' width='12' height='12' fill='none' stroke='%23ff0039'%3e%3ccircle cx='6' cy='6' r='4.5'/%3e%3cpath stroke-linejoin='round' d='M5.8 3.6h.4L6 6.5z'/%3e%3ccircle cx='6' cy='8.2' r='.6' fill='%23ff0039' stroke='none'/%3e%3c/svg%3e");background-position:right .75rem center,center right 2.25rem;background-size:16px 12px,calc(0.75em + 0.375rem) calc(0.75em + 0.375rem)}.was-validated .form-select:invalid:focus,.form-select.is-invalid:focus{border-color:#ff0039;box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated .form-check-input:invalid,.form-check-input.is-invalid{border-color:#ff0039}.was-validated .form-check-input:invalid:checked,.form-check-input.is-invalid:checked{background-color:#ff0039}.was-validated .form-check-input:invalid:focus,.form-check-input.is-invalid:focus{box-shadow:0 0 0 .25rem rgba(255,0,57,.25)}.was-validated .form-check-input:invalid~.form-check-label,.form-check-input.is-invalid~.form-check-label{color:#ff0039}.form-check-inline .form-check-input~.invalid-feedback{margin-left:.5em}.was-validated .input-group .form-control:invalid,.input-group .form-control.is-invalid,.was-validated .input-group .form-select:invalid,.input-group .form-select.is-invalid{z-index:2}.was-validated .input-group .form-control:invalid:focus,.input-group .form-control.is-invalid:focus,.was-validated .input-group .form-select:invalid:focus,.input-group .form-select.is-invalid:focus{z-index:3}.btn{display:inline-block;font-weight:400;line-height:1.5;color:#373a3c;text-align:center;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;vertical-align:middle;cursor:pointer;user-select:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;background-color:rgba(0,0,0,0);border:1px solid rgba(0,0,0,0);padding:.375rem .75rem;font-size:1rem;border-radius:0;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.btn{transition:none}}.btn:hover{color:#373a3c}.btn-check:focus+.btn,.btn:focus{outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.btn:disabled,.btn.disabled,fieldset:disabled .btn{pointer-events:none;opacity:.65}.btn-default{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-default:hover{color:#fff;background-color:#2f3133;border-color:#2c2e30}.btn-check:focus+.btn-default,.btn-default:focus{color:#fff;background-color:#2f3133;border-color:#2c2e30;box-shadow:0 0 0 .25rem rgba(85,88,89,.5)}.btn-check:checked+.btn-default,.btn-check:active+.btn-default,.btn-default:active,.btn-default.active,.show>.btn-default.dropdown-toggle{color:#fff;background-color:#2c2e30;border-color:#292c2d}.btn-check:checked+.btn-default:focus,.btn-check:active+.btn-default:focus,.btn-default:active:focus,.btn-default.active:focus,.show>.btn-default.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(85,88,89,.5)}.btn-default:disabled,.btn-default.disabled{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-primary{color:#fff;background-color:#2780e3;border-color:#2780e3}.btn-primary:hover{color:#fff;background-color:#216dc1;border-color:#1f66b6}.btn-check:focus+.btn-primary,.btn-primary:focus{color:#fff;background-color:#216dc1;border-color:#1f66b6;box-shadow:0 0 0 .25rem rgba(71,147,231,.5)}.btn-check:checked+.btn-primary,.btn-check:active+.btn-primary,.btn-primary:active,.btn-primary.active,.show>.btn-primary.dropdown-toggle{color:#fff;background-color:#1f66b6;border-color:#1d60aa}.btn-check:checked+.btn-primary:focus,.btn-check:active+.btn-primary:focus,.btn-primary:active:focus,.btn-primary.active:focus,.show>.btn-primary.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(71,147,231,.5)}.btn-primary:disabled,.btn-primary.disabled{color:#fff;background-color:#2780e3;border-color:#2780e3}.btn-secondary{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-secondary:hover{color:#fff;background-color:#2f3133;border-color:#2c2e30}.btn-check:focus+.btn-secondary,.btn-secondary:focus{color:#fff;background-color:#2f3133;border-color:#2c2e30;box-shadow:0 0 0 .25rem rgba(85,88,89,.5)}.btn-check:checked+.btn-secondary,.btn-check:active+.btn-secondary,.btn-secondary:active,.btn-secondary.active,.show>.btn-secondary.dropdown-toggle{color:#fff;background-color:#2c2e30;border-color:#292c2d}.btn-check:checked+.btn-secondary:focus,.btn-check:active+.btn-secondary:focus,.btn-secondary:active:focus,.btn-secondary.active:focus,.show>.btn-secondary.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(85,88,89,.5)}.btn-secondary:disabled,.btn-secondary.disabled{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-success{color:#fff;background-color:#3fb618;border-color:#3fb618}.btn-success:hover{color:#fff;background-color:#369b14;border-color:#329213}.btn-check:focus+.btn-success,.btn-success:focus{color:#fff;background-color:#369b14;border-color:#329213;box-shadow:0 0 0 .25rem rgba(92,193,59,.5)}.btn-check:checked+.btn-success,.btn-check:active+.btn-success,.btn-success:active,.btn-success.active,.show>.btn-success.dropdown-toggle{color:#fff;background-color:#329213;border-color:#2f8912}.btn-check:checked+.btn-success:focus,.btn-check:active+.btn-success:focus,.btn-success:active:focus,.btn-success.active:focus,.show>.btn-success.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(92,193,59,.5)}.btn-success:disabled,.btn-success.disabled{color:#fff;background-color:#3fb618;border-color:#3fb618}.btn-info{color:#fff;background-color:#9954bb;border-color:#9954bb}.btn-info:hover{color:#fff;background-color:#82479f;border-color:#7a4396}.btn-check:focus+.btn-info,.btn-info:focus{color:#fff;background-color:#82479f;border-color:#7a4396;box-shadow:0 0 0 .25rem rgba(168,110,197,.5)}.btn-check:checked+.btn-info,.btn-check:active+.btn-info,.btn-info:active,.btn-info.active,.show>.btn-info.dropdown-toggle{color:#fff;background-color:#7a4396;border-color:#733f8c}.btn-check:checked+.btn-info:focus,.btn-check:active+.btn-info:focus,.btn-info:active:focus,.btn-info.active:focus,.show>.btn-info.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(168,110,197,.5)}.btn-info:disabled,.btn-info.disabled{color:#fff;background-color:#9954bb;border-color:#9954bb}.btn-warning{color:#fff;background-color:#ff7518;border-color:#ff7518}.btn-warning:hover{color:#fff;background-color:#d96314;border-color:#cc5e13}.btn-check:focus+.btn-warning,.btn-warning:focus{color:#fff;background-color:#d96314;border-color:#cc5e13;box-shadow:0 0 0 .25rem rgba(255,138,59,.5)}.btn-check:checked+.btn-warning,.btn-check:active+.btn-warning,.btn-warning:active,.btn-warning.active,.show>.btn-warning.dropdown-toggle{color:#fff;background-color:#cc5e13;border-color:#bf5812}.btn-check:checked+.btn-warning:focus,.btn-check:active+.btn-warning:focus,.btn-warning:active:focus,.btn-warning.active:focus,.show>.btn-warning.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(255,138,59,.5)}.btn-warning:disabled,.btn-warning.disabled{color:#fff;background-color:#ff7518;border-color:#ff7518}.btn-danger{color:#fff;background-color:#ff0039;border-color:#ff0039}.btn-danger:hover{color:#fff;background-color:#d90030;border-color:#cc002e}.btn-check:focus+.btn-danger,.btn-danger:focus{color:#fff;background-color:#d90030;border-color:#cc002e;box-shadow:0 0 0 .25rem rgba(255,38,87,.5)}.btn-check:checked+.btn-danger,.btn-check:active+.btn-danger,.btn-danger:active,.btn-danger.active,.show>.btn-danger.dropdown-toggle{color:#fff;background-color:#cc002e;border-color:#bf002b}.btn-check:checked+.btn-danger:focus,.btn-check:active+.btn-danger:focus,.btn-danger:active:focus,.btn-danger.active:focus,.show>.btn-danger.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(255,38,87,.5)}.btn-danger:disabled,.btn-danger.disabled{color:#fff;background-color:#ff0039;border-color:#ff0039}.btn-light{color:#000;background-color:#f8f9fa;border-color:#f8f9fa}.btn-light:hover{color:#000;background-color:#f9fafb;border-color:#f9fafb}.btn-check:focus+.btn-light,.btn-light:focus{color:#000;background-color:#f9fafb;border-color:#f9fafb;box-shadow:0 0 0 .25rem rgba(211,212,213,.5)}.btn-check:checked+.btn-light,.btn-check:active+.btn-light,.btn-light:active,.btn-light.active,.show>.btn-light.dropdown-toggle{color:#000;background-color:#f9fafb;border-color:#f9fafb}.btn-check:checked+.btn-light:focus,.btn-check:active+.btn-light:focus,.btn-light:active:focus,.btn-light.active:focus,.show>.btn-light.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(211,212,213,.5)}.btn-light:disabled,.btn-light.disabled{color:#000;background-color:#f8f9fa;border-color:#f8f9fa}.btn-dark{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-dark:hover{color:#fff;background-color:#2f3133;border-color:#2c2e30}.btn-check:focus+.btn-dark,.btn-dark:focus{color:#fff;background-color:#2f3133;border-color:#2c2e30;box-shadow:0 0 0 .25rem rgba(85,88,89,.5)}.btn-check:checked+.btn-dark,.btn-check:active+.btn-dark,.btn-dark:active,.btn-dark.active,.show>.btn-dark.dropdown-toggle{color:#fff;background-color:#2c2e30;border-color:#292c2d}.btn-check:checked+.btn-dark:focus,.btn-check:active+.btn-dark:focus,.btn-dark:active:focus,.btn-dark.active:focus,.show>.btn-dark.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(85,88,89,.5)}.btn-dark:disabled,.btn-dark.disabled{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-outline-default{color:#373a3c;border-color:#373a3c;background-color:rgba(0,0,0,0)}.btn-outline-default:hover{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-check:focus+.btn-outline-default,.btn-outline-default:focus{box-shadow:0 0 0 .25rem rgba(55,58,60,.5)}.btn-check:checked+.btn-outline-default,.btn-check:active+.btn-outline-default,.btn-outline-default:active,.btn-outline-default.active,.btn-outline-default.dropdown-toggle.show{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-check:checked+.btn-outline-default:focus,.btn-check:active+.btn-outline-default:focus,.btn-outline-default:active:focus,.btn-outline-default.active:focus,.btn-outline-default.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(55,58,60,.5)}.btn-outline-default:disabled,.btn-outline-default.disabled{color:#373a3c;background-color:rgba(0,0,0,0)}.btn-outline-primary{color:#2780e3;border-color:#2780e3;background-color:rgba(0,0,0,0)}.btn-outline-primary:hover{color:#fff;background-color:#2780e3;border-color:#2780e3}.btn-check:focus+.btn-outline-primary,.btn-outline-primary:focus{box-shadow:0 0 0 .25rem rgba(39,128,227,.5)}.btn-check:checked+.btn-outline-primary,.btn-check:active+.btn-outline-primary,.btn-outline-primary:active,.btn-outline-primary.active,.btn-outline-primary.dropdown-toggle.show{color:#fff;background-color:#2780e3;border-color:#2780e3}.btn-check:checked+.btn-outline-primary:focus,.btn-check:active+.btn-outline-primary:focus,.btn-outline-primary:active:focus,.btn-outline-primary.active:focus,.btn-outline-primary.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(39,128,227,.5)}.btn-outline-primary:disabled,.btn-outline-primary.disabled{color:#2780e3;background-color:rgba(0,0,0,0)}.btn-outline-secondary{color:#373a3c;border-color:#373a3c;background-color:rgba(0,0,0,0)}.btn-outline-secondary:hover{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-check:focus+.btn-outline-secondary,.btn-outline-secondary:focus{box-shadow:0 0 0 .25rem rgba(55,58,60,.5)}.btn-check:checked+.btn-outline-secondary,.btn-check:active+.btn-outline-secondary,.btn-outline-secondary:active,.btn-outline-secondary.active,.btn-outline-secondary.dropdown-toggle.show{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-check:checked+.btn-outline-secondary:focus,.btn-check:active+.btn-outline-secondary:focus,.btn-outline-secondary:active:focus,.btn-outline-secondary.active:focus,.btn-outline-secondary.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(55,58,60,.5)}.btn-outline-secondary:disabled,.btn-outline-secondary.disabled{color:#373a3c;background-color:rgba(0,0,0,0)}.btn-outline-success{color:#3fb618;border-color:#3fb618;background-color:rgba(0,0,0,0)}.btn-outline-success:hover{color:#fff;background-color:#3fb618;border-color:#3fb618}.btn-check:focus+.btn-outline-success,.btn-outline-success:focus{box-shadow:0 0 0 .25rem rgba(63,182,24,.5)}.btn-check:checked+.btn-outline-success,.btn-check:active+.btn-outline-success,.btn-outline-success:active,.btn-outline-success.active,.btn-outline-success.dropdown-toggle.show{color:#fff;background-color:#3fb618;border-color:#3fb618}.btn-check:checked+.btn-outline-success:focus,.btn-check:active+.btn-outline-success:focus,.btn-outline-success:active:focus,.btn-outline-success.active:focus,.btn-outline-success.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(63,182,24,.5)}.btn-outline-success:disabled,.btn-outline-success.disabled{color:#3fb618;background-color:rgba(0,0,0,0)}.btn-outline-info{color:#9954bb;border-color:#9954bb;background-color:rgba(0,0,0,0)}.btn-outline-info:hover{color:#fff;background-color:#9954bb;border-color:#9954bb}.btn-check:focus+.btn-outline-info,.btn-outline-info:focus{box-shadow:0 0 0 .25rem rgba(153,84,187,.5)}.btn-check:checked+.btn-outline-info,.btn-check:active+.btn-outline-info,.btn-outline-info:active,.btn-outline-info.active,.btn-outline-info.dropdown-toggle.show{color:#fff;background-color:#9954bb;border-color:#9954bb}.btn-check:checked+.btn-outline-info:focus,.btn-check:active+.btn-outline-info:focus,.btn-outline-info:active:focus,.btn-outline-info.active:focus,.btn-outline-info.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(153,84,187,.5)}.btn-outline-info:disabled,.btn-outline-info.disabled{color:#9954bb;background-color:rgba(0,0,0,0)}.btn-outline-warning{color:#ff7518;border-color:#ff7518;background-color:rgba(0,0,0,0)}.btn-outline-warning:hover{color:#fff;background-color:#ff7518;border-color:#ff7518}.btn-check:focus+.btn-outline-warning,.btn-outline-warning:focus{box-shadow:0 0 0 .25rem rgba(255,117,24,.5)}.btn-check:checked+.btn-outline-warning,.btn-check:active+.btn-outline-warning,.btn-outline-warning:active,.btn-outline-warning.active,.btn-outline-warning.dropdown-toggle.show{color:#fff;background-color:#ff7518;border-color:#ff7518}.btn-check:checked+.btn-outline-warning:focus,.btn-check:active+.btn-outline-warning:focus,.btn-outline-warning:active:focus,.btn-outline-warning.active:focus,.btn-outline-warning.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(255,117,24,.5)}.btn-outline-warning:disabled,.btn-outline-warning.disabled{color:#ff7518;background-color:rgba(0,0,0,0)}.btn-outline-danger{color:#ff0039;border-color:#ff0039;background-color:rgba(0,0,0,0)}.btn-outline-danger:hover{color:#fff;background-color:#ff0039;border-color:#ff0039}.btn-check:focus+.btn-outline-danger,.btn-outline-danger:focus{box-shadow:0 0 0 .25rem rgba(255,0,57,.5)}.btn-check:checked+.btn-outline-danger,.btn-check:active+.btn-outline-danger,.btn-outline-danger:active,.btn-outline-danger.active,.btn-outline-danger.dropdown-toggle.show{color:#fff;background-color:#ff0039;border-color:#ff0039}.btn-check:checked+.btn-outline-danger:focus,.btn-check:active+.btn-outline-danger:focus,.btn-outline-danger:active:focus,.btn-outline-danger.active:focus,.btn-outline-danger.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(255,0,57,.5)}.btn-outline-danger:disabled,.btn-outline-danger.disabled{color:#ff0039;background-color:rgba(0,0,0,0)}.btn-outline-light{color:#f8f9fa;border-color:#f8f9fa;background-color:rgba(0,0,0,0)}.btn-outline-light:hover{color:#000;background-color:#f8f9fa;border-color:#f8f9fa}.btn-check:focus+.btn-outline-light,.btn-outline-light:focus{box-shadow:0 0 0 .25rem rgba(248,249,250,.5)}.btn-check:checked+.btn-outline-light,.btn-check:active+.btn-outline-light,.btn-outline-light:active,.btn-outline-light.active,.btn-outline-light.dropdown-toggle.show{color:#000;background-color:#f8f9fa;border-color:#f8f9fa}.btn-check:checked+.btn-outline-light:focus,.btn-check:active+.btn-outline-light:focus,.btn-outline-light:active:focus,.btn-outline-light.active:focus,.btn-outline-light.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(248,249,250,.5)}.btn-outline-light:disabled,.btn-outline-light.disabled{color:#f8f9fa;background-color:rgba(0,0,0,0)}.btn-outline-dark{color:#373a3c;border-color:#373a3c;background-color:rgba(0,0,0,0)}.btn-outline-dark:hover{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-check:focus+.btn-outline-dark,.btn-outline-dark:focus{box-shadow:0 0 0 .25rem rgba(55,58,60,.5)}.btn-check:checked+.btn-outline-dark,.btn-check:active+.btn-outline-dark,.btn-outline-dark:active,.btn-outline-dark.active,.btn-outline-dark.dropdown-toggle.show{color:#fff;background-color:#373a3c;border-color:#373a3c}.btn-check:checked+.btn-outline-dark:focus,.btn-check:active+.btn-outline-dark:focus,.btn-outline-dark:active:focus,.btn-outline-dark.active:focus,.btn-outline-dark.dropdown-toggle.show:focus{box-shadow:0 0 0 .25rem rgba(55,58,60,.5)}.btn-outline-dark:disabled,.btn-outline-dark.disabled{color:#373a3c;background-color:rgba(0,0,0,0)}.btn-link{font-weight:400;color:#ca225e;text-decoration:underline;-webkit-text-decoration:underline;-moz-text-decoration:underline;-ms-text-decoration:underline;-o-text-decoration:underline}.btn-link:hover{color:#a21b4b}.btn-link:disabled,.btn-link.disabled{color:#6c757d}.btn-lg,.btn-group-lg>.btn{padding:.5rem 1rem;font-size:1.25rem;border-radius:0}.btn-sm,.btn-group-sm>.btn{padding:.25rem .5rem;font-size:0.875rem;border-radius:0}.fade{transition:opacity .15s linear}@media(prefers-reduced-motion: reduce){.fade{transition:none}}.fade:not(.show){opacity:0}.collapse:not(.show){display:none}.collapsing{height:0;overflow:hidden;transition:height .2s ease}@media(prefers-reduced-motion: reduce){.collapsing{transition:none}}.collapsing.collapse-horizontal{width:0;height:auto;transition:width .35s ease}@media(prefers-reduced-motion: reduce){.collapsing.collapse-horizontal{transition:none}}.dropup,.dropend,.dropdown,.dropstart{position:relative}.dropdown-toggle{white-space:nowrap}.dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:.3em solid;border-right:.3em solid rgba(0,0,0,0);border-bottom:0;border-left:.3em solid rgba(0,0,0,0)}.dropdown-toggle:empty::after{margin-left:0}.dropdown-menu{position:absolute;z-index:1000;display:none;min-width:10rem;padding:.5rem 0;margin:0;font-size:1rem;color:#373a3c;text-align:left;list-style:none;background-color:#fff;background-clip:padding-box;border:1px solid rgba(0,0,0,.15)}.dropdown-menu[data-bs-popper]{top:100%;left:0;margin-top:.125rem}.dropdown-menu-start{--bs-position: start}.dropdown-menu-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-end{--bs-position: end}.dropdown-menu-end[data-bs-popper]{right:0;left:auto}@media(min-width: 576px){.dropdown-menu-sm-start{--bs-position: start}.dropdown-menu-sm-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-sm-end{--bs-position: end}.dropdown-menu-sm-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 768px){.dropdown-menu-md-start{--bs-position: start}.dropdown-menu-md-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-md-end{--bs-position: end}.dropdown-menu-md-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 992px){.dropdown-menu-lg-start{--bs-position: start}.dropdown-menu-lg-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-lg-end{--bs-position: end}.dropdown-menu-lg-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 1200px){.dropdown-menu-xl-start{--bs-position: start}.dropdown-menu-xl-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-xl-end{--bs-position: end}.dropdown-menu-xl-end[data-bs-popper]{right:0;left:auto}}@media(min-width: 1400px){.dropdown-menu-xxl-start{--bs-position: start}.dropdown-menu-xxl-start[data-bs-popper]{right:auto;left:0}.dropdown-menu-xxl-end{--bs-position: end}.dropdown-menu-xxl-end[data-bs-popper]{right:0;left:auto}}.dropup .dropdown-menu[data-bs-popper]{top:auto;bottom:100%;margin-top:0;margin-bottom:.125rem}.dropup .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:0;border-right:.3em solid rgba(0,0,0,0);border-bottom:.3em solid;border-left:.3em solid rgba(0,0,0,0)}.dropup .dropdown-toggle:empty::after{margin-left:0}.dropend .dropdown-menu[data-bs-popper]{top:0;right:auto;left:100%;margin-top:0;margin-left:.125rem}.dropend .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:"";border-top:.3em solid rgba(0,0,0,0);border-right:0;border-bottom:.3em solid rgba(0,0,0,0);border-left:.3em solid}.dropend .dropdown-toggle:empty::after{margin-left:0}.dropend .dropdown-toggle::after{vertical-align:0}.dropstart .dropdown-menu[data-bs-popper]{top:0;right:100%;left:auto;margin-top:0;margin-right:.125rem}.dropstart .dropdown-toggle::after{display:inline-block;margin-left:.255em;vertical-align:.255em;content:""}.dropstart .dropdown-toggle::after{display:none}.dropstart .dropdown-toggle::before{display:inline-block;margin-right:.255em;vertical-align:.255em;content:"";border-top:.3em solid rgba(0,0,0,0);border-right:.3em solid;border-bottom:.3em solid rgba(0,0,0,0)}.dropstart .dropdown-toggle:empty::after{margin-left:0}.dropstart .dropdown-toggle::before{vertical-align:0}.dropdown-divider{height:0;margin:.5rem 0;overflow:hidden;border-top:1px solid rgba(0,0,0,.15)}.dropdown-item{display:block;width:100%;padding:.25rem 1rem;clear:both;font-weight:400;color:#212529;text-align:inherit;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;white-space:nowrap;background-color:rgba(0,0,0,0);border:0}.dropdown-item:hover,.dropdown-item:focus{color:#1e2125;background-color:#e9ecef}.dropdown-item.active,.dropdown-item:active{color:#fff;text-decoration:none;background-color:#2780e3}.dropdown-item.disabled,.dropdown-item:disabled{color:#adb5bd;pointer-events:none;background-color:rgba(0,0,0,0)}.dropdown-menu.show{display:block}.dropdown-header{display:block;padding:.5rem 1rem;margin-bottom:0;font-size:0.875rem;color:#6c757d;white-space:nowrap}.dropdown-item-text{display:block;padding:.25rem 1rem;color:#212529}.dropdown-menu-dark{color:#dee2e6;background-color:#373a3c;border-color:rgba(0,0,0,.15)}.dropdown-menu-dark .dropdown-item{color:#dee2e6}.dropdown-menu-dark .dropdown-item:hover,.dropdown-menu-dark .dropdown-item:focus{color:#fff;background-color:rgba(255,255,255,.15)}.dropdown-menu-dark .dropdown-item.active,.dropdown-menu-dark .dropdown-item:active{color:#fff;background-color:#2780e3}.dropdown-menu-dark .dropdown-item.disabled,.dropdown-menu-dark .dropdown-item:disabled{color:#adb5bd}.dropdown-menu-dark .dropdown-divider{border-color:rgba(0,0,0,.15)}.dropdown-menu-dark .dropdown-item-text{color:#dee2e6}.dropdown-menu-dark .dropdown-header{color:#adb5bd}.btn-group,.btn-group-vertical{position:relative;display:inline-flex;vertical-align:middle}.btn-group>.btn,.btn-group-vertical>.btn{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto}.btn-group>.btn-check:checked+.btn,.btn-group>.btn-check:focus+.btn,.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active,.btn-group-vertical>.btn-check:checked+.btn,.btn-group-vertical>.btn-check:focus+.btn,.btn-group-vertical>.btn:hover,.btn-group-vertical>.btn:focus,.btn-group-vertical>.btn:active,.btn-group-vertical>.btn.active{z-index:1}.btn-toolbar{display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;justify-content:flex-start;-webkit-justify-content:flex-start}.btn-toolbar .input-group{width:auto}.btn-group>.btn:not(:first-child),.btn-group>.btn-group:not(:first-child){margin-left:-1px}.dropdown-toggle-split{padding-right:.5625rem;padding-left:.5625rem}.dropdown-toggle-split::after,.dropup .dropdown-toggle-split::after,.dropend .dropdown-toggle-split::after{margin-left:0}.dropstart .dropdown-toggle-split::before{margin-right:0}.btn-sm+.dropdown-toggle-split,.btn-group-sm>.btn+.dropdown-toggle-split{padding-right:.375rem;padding-left:.375rem}.btn-lg+.dropdown-toggle-split,.btn-group-lg>.btn+.dropdown-toggle-split{padding-right:.75rem;padding-left:.75rem}.btn-group-vertical{flex-direction:column;-webkit-flex-direction:column;align-items:flex-start;-webkit-align-items:flex-start;justify-content:center;-webkit-justify-content:center}.btn-group-vertical>.btn,.btn-group-vertical>.btn-group{width:100%}.btn-group-vertical>.btn:not(:first-child),.btn-group-vertical>.btn-group:not(:first-child){margin-top:-1px}.nav{display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;padding-left:0;margin-bottom:0;list-style:none}.nav-link{display:block;padding:.5rem 1rem;color:#ca225e;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out}@media(prefers-reduced-motion: reduce){.nav-link{transition:none}}.nav-link:hover,.nav-link:focus{color:#a21b4b}.nav-link.disabled{color:#6c757d;pointer-events:none;cursor:default}.nav-tabs{border-bottom:1px solid #dee2e6}.nav-tabs .nav-link{margin-bottom:-1px;background:none;border:1px solid rgba(0,0,0,0)}.nav-tabs .nav-link:hover,.nav-tabs .nav-link:focus{border-color:#e9ecef #e9ecef #dee2e6;isolation:isolate}.nav-tabs .nav-link.disabled{color:#6c757d;background-color:rgba(0,0,0,0);border-color:rgba(0,0,0,0)}.nav-tabs .nav-link.active,.nav-tabs .nav-item.show .nav-link{color:#495057;background-color:#fff;border-color:#dee2e6 #dee2e6 #fff}.nav-tabs .dropdown-menu{margin-top:-1px}.nav-pills .nav-link{background:none;border:0}.nav-pills .nav-link.active,.nav-pills .show>.nav-link{color:#fff;background-color:#2780e3}.nav-fill>.nav-link,.nav-fill .nav-item{flex:1 1 auto;-webkit-flex:1 1 auto;text-align:center}.nav-justified>.nav-link,.nav-justified .nav-item{flex-basis:0;-webkit-flex-basis:0;flex-grow:1;-webkit-flex-grow:1;text-align:center}.nav-fill .nav-item .nav-link,.nav-justified .nav-item .nav-link{width:100%}.tab-content>.tab-pane{display:none}.tab-content>.active{display:block}.navbar{position:relative;display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding-top:.5rem;padding-bottom:.5rem}.navbar>.container-xxl,.navbar>.container-xl,.navbar>.container-lg,.navbar>.container-md,.navbar>.container-sm,.navbar>.container,.navbar>.container-fluid{display:flex;display:-webkit-flex;flex-wrap:inherit;-webkit-flex-wrap:inherit;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between}.navbar-brand{padding-top:.3125rem;padding-bottom:.3125rem;margin-right:1rem;font-size:1.25rem;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;white-space:nowrap}.navbar-nav{display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;padding-left:0;margin-bottom:0;list-style:none}.navbar-nav .nav-link{padding-right:0;padding-left:0}.navbar-nav .dropdown-menu{position:static}.navbar-text{padding-top:.5rem;padding-bottom:.5rem}.navbar-collapse{flex-basis:100%;-webkit-flex-basis:100%;flex-grow:1;-webkit-flex-grow:1;align-items:center;-webkit-align-items:center}.navbar-toggler{padding:.25 0;font-size:1.25rem;line-height:1;background-color:rgba(0,0,0,0);border:1px solid rgba(0,0,0,0);transition:box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.navbar-toggler{transition:none}}.navbar-toggler:hover{text-decoration:none}.navbar-toggler:focus{text-decoration:none;outline:0;box-shadow:0 0 0 .25rem}.navbar-toggler-icon{display:inline-block;width:1.5em;height:1.5em;vertical-align:middle;background-repeat:no-repeat;background-position:center;background-size:100%}.navbar-nav-scroll{max-height:var(--bs-scroll-height, 75vh);overflow-y:auto}@media(min-width: 576px){.navbar-expand-sm{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-sm .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-sm .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-sm .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-sm .navbar-nav-scroll{overflow:visible}.navbar-expand-sm .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-sm .navbar-toggler{display:none}.navbar-expand-sm .offcanvas-header{display:none}.navbar-expand-sm .offcanvas{position:inherit;bottom:0;z-index:1000;flex-grow:1;-webkit-flex-grow:1;visibility:visible !important;background-color:rgba(0,0,0,0);border-right:0;border-left:0;transition:none;transform:none}.navbar-expand-sm .offcanvas-top,.navbar-expand-sm .offcanvas-bottom{height:auto;border-top:0;border-bottom:0}.navbar-expand-sm .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 768px){.navbar-expand-md{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-md .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-md .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-md .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-md .navbar-nav-scroll{overflow:visible}.navbar-expand-md .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-md .navbar-toggler{display:none}.navbar-expand-md .offcanvas-header{display:none}.navbar-expand-md .offcanvas{position:inherit;bottom:0;z-index:1000;flex-grow:1;-webkit-flex-grow:1;visibility:visible !important;background-color:rgba(0,0,0,0);border-right:0;border-left:0;transition:none;transform:none}.navbar-expand-md .offcanvas-top,.navbar-expand-md .offcanvas-bottom{height:auto;border-top:0;border-bottom:0}.navbar-expand-md .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 992px){.navbar-expand-lg{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-lg .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-lg .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-lg .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-lg .navbar-nav-scroll{overflow:visible}.navbar-expand-lg .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-lg .navbar-toggler{display:none}.navbar-expand-lg .offcanvas-header{display:none}.navbar-expand-lg .offcanvas{position:inherit;bottom:0;z-index:1000;flex-grow:1;-webkit-flex-grow:1;visibility:visible !important;background-color:rgba(0,0,0,0);border-right:0;border-left:0;transition:none;transform:none}.navbar-expand-lg .offcanvas-top,.navbar-expand-lg .offcanvas-bottom{height:auto;border-top:0;border-bottom:0}.navbar-expand-lg .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 1200px){.navbar-expand-xl{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-xl .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-xl .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-xl .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-xl .navbar-nav-scroll{overflow:visible}.navbar-expand-xl .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-xl .navbar-toggler{display:none}.navbar-expand-xl .offcanvas-header{display:none}.navbar-expand-xl .offcanvas{position:inherit;bottom:0;z-index:1000;flex-grow:1;-webkit-flex-grow:1;visibility:visible !important;background-color:rgba(0,0,0,0);border-right:0;border-left:0;transition:none;transform:none}.navbar-expand-xl .offcanvas-top,.navbar-expand-xl .offcanvas-bottom{height:auto;border-top:0;border-bottom:0}.navbar-expand-xl .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}@media(min-width: 1400px){.navbar-expand-xxl{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand-xxl .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand-xxl .navbar-nav .dropdown-menu{position:absolute}.navbar-expand-xxl .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand-xxl .navbar-nav-scroll{overflow:visible}.navbar-expand-xxl .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand-xxl .navbar-toggler{display:none}.navbar-expand-xxl .offcanvas-header{display:none}.navbar-expand-xxl .offcanvas{position:inherit;bottom:0;z-index:1000;flex-grow:1;-webkit-flex-grow:1;visibility:visible !important;background-color:rgba(0,0,0,0);border-right:0;border-left:0;transition:none;transform:none}.navbar-expand-xxl .offcanvas-top,.navbar-expand-xxl .offcanvas-bottom{height:auto;border-top:0;border-bottom:0}.navbar-expand-xxl .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}}.navbar-expand{flex-wrap:nowrap;-webkit-flex-wrap:nowrap;justify-content:flex-start;-webkit-justify-content:flex-start}.navbar-expand .navbar-nav{flex-direction:row;-webkit-flex-direction:row}.navbar-expand .navbar-nav .dropdown-menu{position:absolute}.navbar-expand .navbar-nav .nav-link{padding-right:.5rem;padding-left:.5rem}.navbar-expand .navbar-nav-scroll{overflow:visible}.navbar-expand .navbar-collapse{display:flex !important;display:-webkit-flex !important;flex-basis:auto;-webkit-flex-basis:auto}.navbar-expand .navbar-toggler{display:none}.navbar-expand .offcanvas-header{display:none}.navbar-expand .offcanvas{position:inherit;bottom:0;z-index:1000;flex-grow:1;-webkit-flex-grow:1;visibility:visible !important;background-color:rgba(0,0,0,0);border-right:0;border-left:0;transition:none;transform:none}.navbar-expand .offcanvas-top,.navbar-expand .offcanvas-bottom{height:auto;border-top:0;border-bottom:0}.navbar-expand .offcanvas-body{display:flex;display:-webkit-flex;flex-grow:0;-webkit-flex-grow:0;padding:0;overflow-y:visible}.navbar-light{background-color:#fff}.navbar-light .navbar-brand{color:#595959}.navbar-light .navbar-brand:hover,.navbar-light .navbar-brand:focus{color:#aa1d4f}.navbar-light .navbar-nav .nav-link{color:#595959}.navbar-light .navbar-nav .nav-link:hover,.navbar-light .navbar-nav .nav-link:focus{color:rgba(170,29,79,.8)}.navbar-light .navbar-nav .nav-link.disabled{color:rgba(89,89,89,.75)}.navbar-light .navbar-nav .show>.nav-link,.navbar-light .navbar-nav .nav-link.active{color:#aa1d4f}.navbar-light .navbar-toggler{color:#595959;border-color:rgba(89,89,89,0)}.navbar-light .navbar-toggler-icon{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='%23595959' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e")}.navbar-light .navbar-text{color:#595959}.navbar-light .navbar-text a,.navbar-light .navbar-text a:hover,.navbar-light .navbar-text a:focus{color:#aa1d4f}.navbar-dark{background-color:#fff}.navbar-dark .navbar-brand{color:#595959}.navbar-dark .navbar-brand:hover,.navbar-dark .navbar-brand:focus{color:#aa1d4f}.navbar-dark .navbar-nav .nav-link{color:#595959}.navbar-dark .navbar-nav .nav-link:hover,.navbar-dark .navbar-nav .nav-link:focus{color:rgba(170,29,79,.8)}.navbar-dark .navbar-nav .nav-link.disabled{color:rgba(89,89,89,.75)}.navbar-dark .navbar-nav .show>.nav-link,.navbar-dark .navbar-nav .active>.nav-link,.navbar-dark .navbar-nav .nav-link.active{color:#aa1d4f}.navbar-dark .navbar-toggler{color:#595959;border-color:rgba(89,89,89,0)}.navbar-dark .navbar-toggler-icon{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 30 30'%3e%3cpath stroke='%23595959' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3e%3c/svg%3e")}.navbar-dark .navbar-text{color:#595959}.navbar-dark .navbar-text a,.navbar-dark .navbar-text a:hover,.navbar-dark .navbar-text a:focus{color:#aa1d4f}.card{position:relative;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;min-width:0;word-wrap:break-word;background-color:#fff;background-clip:border-box;border:1px solid rgba(0,0,0,.125)}.card>hr{margin-right:0;margin-left:0}.card>.list-group{border-top:inherit;border-bottom:inherit}.card>.list-group:first-child{border-top-width:0}.card>.list-group:last-child{border-bottom-width:0}.card>.card-header+.list-group,.card>.list-group+.card-footer{border-top:0}.card-body{flex:1 1 auto;-webkit-flex:1 1 auto;padding:1rem 1rem}.card-title{margin-bottom:.5rem}.card-subtitle{margin-top:-0.25rem;margin-bottom:0}.card-text:last-child{margin-bottom:0}.card-link+.card-link{margin-left:1rem}.card-header{padding:.5rem 1rem;margin-bottom:0;background-color:#adb5bd;border-bottom:1px solid rgba(0,0,0,.125)}.card-footer{padding:.5rem 1rem;background-color:#adb5bd;border-top:1px solid rgba(0,0,0,.125)}.card-header-tabs{margin-right:-0.5rem;margin-bottom:-0.5rem;margin-left:-0.5rem;border-bottom:0}.card-header-pills{margin-right:-0.5rem;margin-left:-0.5rem}.card-img-overlay{position:absolute;top:0;right:0;bottom:0;left:0;padding:1rem}.card-img,.card-img-top,.card-img-bottom{width:100%}.card-group>.card{margin-bottom:.75rem}@media(min-width: 576px){.card-group{display:flex;display:-webkit-flex;flex-flow:row wrap;-webkit-flex-flow:row wrap}.card-group>.card{flex:1 0 0%;-webkit-flex:1 0 0%;margin-bottom:0}.card-group>.card+.card{margin-left:0;border-left:0}}.accordion-button{position:relative;display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;width:100%;padding:1rem 1.25rem;font-size:1rem;color:#373a3c;text-align:left;background-color:#fff;border:0;overflow-anchor:none;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out,border-radius .15s ease}@media(prefers-reduced-motion: reduce){.accordion-button{transition:none}}.accordion-button:not(.collapsed){color:#2373cc;background-color:#e9f2fc;box-shadow:inset 0 -1px 0 rgba(0,0,0,.125)}.accordion-button:not(.collapsed)::after{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%232373cc'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e");transform:rotate(-180deg)}.accordion-button::after{flex-shrink:0;-webkit-flex-shrink:0;width:1.25rem;height:1.25rem;margin-left:auto;content:"";background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23373a3c'%3e%3cpath fill-rule='evenodd' d='M1.646 4.646a.5.5 0 0 1 .708 0L8 10.293l5.646-5.647a.5.5 0 0 1 .708.708l-6 6a.5.5 0 0 1-.708 0l-6-6a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e");background-repeat:no-repeat;background-size:1.25rem;transition:transform .2s ease-in-out}@media(prefers-reduced-motion: reduce){.accordion-button::after{transition:none}}.accordion-button:hover{z-index:2}.accordion-button:focus{z-index:3;border-color:#93c0f1;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.accordion-header{margin-bottom:0}.accordion-item{background-color:#fff;border:1px solid rgba(0,0,0,.125)}.accordion-item:not(:first-of-type){border-top:0}.accordion-body{padding:1rem 1.25rem}.accordion-flush .accordion-collapse{border-width:0}.accordion-flush .accordion-item{border-right:0;border-left:0}.accordion-flush .accordion-item:first-child{border-top:0}.accordion-flush .accordion-item:last-child{border-bottom:0}.breadcrumb{display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;padding:0 0;margin-bottom:1rem;list-style:none}.breadcrumb-item+.breadcrumb-item{padding-left:.5rem}.breadcrumb-item+.breadcrumb-item::before{float:left;padding-right:.5rem;color:#6c757d;content:var(--bs-breadcrumb-divider, ">") /* rtl: var(--bs-breadcrumb-divider, ">") */}.breadcrumb-item.active{color:#6c757d}.pagination{display:flex;display:-webkit-flex;padding-left:0;list-style:none}.page-link{position:relative;display:block;color:#ca225e;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;background-color:#fff;border:1px solid #dee2e6;transition:color .15s ease-in-out,background-color .15s ease-in-out,border-color .15s ease-in-out,box-shadow .15s ease-in-out}@media(prefers-reduced-motion: reduce){.page-link{transition:none}}.page-link:hover{z-index:2;color:#a21b4b;background-color:#e9ecef;border-color:#dee2e6}.page-link:focus{z-index:3;color:#a21b4b;background-color:#e9ecef;outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25)}.page-item:not(:first-child) .page-link{margin-left:-1px}.page-item.active .page-link{z-index:3;color:#fff;background-color:#2780e3;border-color:#2780e3}.page-item.disabled .page-link{color:#6c757d;pointer-events:none;background-color:#fff;border-color:#dee2e6}.page-link{padding:.375rem .75rem}.pagination-lg .page-link{padding:.75rem 1.5rem;font-size:1.25rem}.pagination-sm .page-link{padding:.25rem .5rem;font-size:0.875rem}.badge{display:inline-block;padding:.35em .65em;font-size:0.75em;font-weight:700;line-height:1;color:#fff;text-align:center;white-space:nowrap;vertical-align:baseline}.badge:empty{display:none}.btn .badge{position:relative;top:-1px}.alert{position:relative;padding:1rem 1rem;margin-bottom:1rem;border:0 solid rgba(0,0,0,0)}.alert-heading{color:inherit}.alert-link{font-weight:700}.alert-dismissible{padding-right:3rem}.alert-dismissible .btn-close{position:absolute;top:0;right:0;z-index:2;padding:1.25rem 1rem}.alert-default{color:#212324;background-color:#d7d8d8;border-color:#c3c4c5}.alert-default .alert-link{color:#1a1c1d}.alert-primary{color:#174d88;background-color:#d4e6f9;border-color:#bed9f7}.alert-primary .alert-link{color:#123e6d}.alert-secondary{color:#212324;background-color:#d7d8d8;border-color:#c3c4c5}.alert-secondary .alert-link{color:#1a1c1d}.alert-success{color:#266d0e;background-color:#d9f0d1;border-color:#c5e9ba}.alert-success .alert-link{color:#1e570b}.alert-info{color:#5c3270;background-color:#ebddf1;border-color:#e0cceb}.alert-info .alert-link{color:#4a285a}.alert-warning{color:#99460e;background-color:#ffe3d1;border-color:#ffd6ba}.alert-warning .alert-link{color:#7a380b}.alert-danger{color:#902;background-color:#ffccd7;border-color:#ffb3c4}.alert-danger .alert-link{color:#7a001b}.alert-light{color:#959596;background-color:#fefefe;border-color:#fdfdfe}.alert-light .alert-link{color:#777778}.alert-dark{color:#212324;background-color:#d7d8d8;border-color:#c3c4c5}.alert-dark .alert-link{color:#1a1c1d}@keyframes progress-bar-stripes{0%{background-position-x:.5rem}}.progress{display:flex;display:-webkit-flex;height:.5rem;overflow:hidden;font-size:0.75rem;background-color:#e9ecef}.progress-bar{display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;justify-content:center;-webkit-justify-content:center;overflow:hidden;color:#fff;text-align:center;white-space:nowrap;background-color:#2780e3;transition:width .6s ease}@media(prefers-reduced-motion: reduce){.progress-bar{transition:none}}.progress-bar-striped{background-image:linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);background-size:.5rem .5rem}.progress-bar-animated{animation:1s linear infinite progress-bar-stripes}@media(prefers-reduced-motion: reduce){.progress-bar-animated{animation:none}}.list-group{display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;padding-left:0;margin-bottom:0}.list-group-numbered{list-style-type:none;counter-reset:section}.list-group-numbered>li::before{content:counters(section, ".") ". ";counter-increment:section}.list-group-item-action{width:100%;color:#495057;text-align:inherit}.list-group-item-action:hover,.list-group-item-action:focus{z-index:1;color:#495057;text-decoration:none;background-color:#f8f9fa}.list-group-item-action:active{color:#373a3c;background-color:#e9ecef}.list-group-item{position:relative;display:block;padding:.5rem 1rem;color:#212529;text-decoration:none;-webkit-text-decoration:none;-moz-text-decoration:none;-ms-text-decoration:none;-o-text-decoration:none;background-color:#fff;border:1px solid rgba(0,0,0,.125)}.list-group-item.disabled,.list-group-item:disabled{color:#6c757d;pointer-events:none;background-color:#fff}.list-group-item.active{z-index:2;color:#fff;background-color:#2780e3;border-color:#2780e3}.list-group-item+.list-group-item{border-top-width:0}.list-group-item+.list-group-item.active{margin-top:-1px;border-top-width:1px}.list-group-horizontal{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal>.list-group-item.active{margin-top:0}.list-group-horizontal>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}@media(min-width: 576px){.list-group-horizontal-sm{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-sm>.list-group-item.active{margin-top:0}.list-group-horizontal-sm>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-sm>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}@media(min-width: 768px){.list-group-horizontal-md{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-md>.list-group-item.active{margin-top:0}.list-group-horizontal-md>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-md>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}@media(min-width: 992px){.list-group-horizontal-lg{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-lg>.list-group-item.active{margin-top:0}.list-group-horizontal-lg>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-lg>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}@media(min-width: 1200px){.list-group-horizontal-xl{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-xl>.list-group-item.active{margin-top:0}.list-group-horizontal-xl>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-xl>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}@media(min-width: 1400px){.list-group-horizontal-xxl{flex-direction:row;-webkit-flex-direction:row}.list-group-horizontal-xxl>.list-group-item.active{margin-top:0}.list-group-horizontal-xxl>.list-group-item+.list-group-item{border-top-width:1px;border-left-width:0}.list-group-horizontal-xxl>.list-group-item+.list-group-item.active{margin-left:-1px;border-left-width:1px}}.list-group-flush>.list-group-item{border-width:0 0 1px}.list-group-flush>.list-group-item:last-child{border-bottom-width:0}.list-group-item-default{color:#212324;background-color:#d7d8d8}.list-group-item-default.list-group-item-action:hover,.list-group-item-default.list-group-item-action:focus{color:#212324;background-color:#c2c2c2}.list-group-item-default.list-group-item-action.active{color:#fff;background-color:#212324;border-color:#212324}.list-group-item-primary{color:#174d88;background-color:#d4e6f9}.list-group-item-primary.list-group-item-action:hover,.list-group-item-primary.list-group-item-action:focus{color:#174d88;background-color:#bfcfe0}.list-group-item-primary.list-group-item-action.active{color:#fff;background-color:#174d88;border-color:#174d88}.list-group-item-secondary{color:#212324;background-color:#d7d8d8}.list-group-item-secondary.list-group-item-action:hover,.list-group-item-secondary.list-group-item-action:focus{color:#212324;background-color:#c2c2c2}.list-group-item-secondary.list-group-item-action.active{color:#fff;background-color:#212324;border-color:#212324}.list-group-item-success{color:#266d0e;background-color:#d9f0d1}.list-group-item-success.list-group-item-action:hover,.list-group-item-success.list-group-item-action:focus{color:#266d0e;background-color:#c3d8bc}.list-group-item-success.list-group-item-action.active{color:#fff;background-color:#266d0e;border-color:#266d0e}.list-group-item-info{color:#5c3270;background-color:#ebddf1}.list-group-item-info.list-group-item-action:hover,.list-group-item-info.list-group-item-action:focus{color:#5c3270;background-color:#d4c7d9}.list-group-item-info.list-group-item-action.active{color:#fff;background-color:#5c3270;border-color:#5c3270}.list-group-item-warning{color:#99460e;background-color:#ffe3d1}.list-group-item-warning.list-group-item-action:hover,.list-group-item-warning.list-group-item-action:focus{color:#99460e;background-color:#e6ccbc}.list-group-item-warning.list-group-item-action.active{color:#fff;background-color:#99460e;border-color:#99460e}.list-group-item-danger{color:#902;background-color:#ffccd7}.list-group-item-danger.list-group-item-action:hover,.list-group-item-danger.list-group-item-action:focus{color:#902;background-color:#e6b8c2}.list-group-item-danger.list-group-item-action.active{color:#fff;background-color:#902;border-color:#902}.list-group-item-light{color:#959596;background-color:#fefefe}.list-group-item-light.list-group-item-action:hover,.list-group-item-light.list-group-item-action:focus{color:#959596;background-color:#e5e5e5}.list-group-item-light.list-group-item-action.active{color:#fff;background-color:#959596;border-color:#959596}.list-group-item-dark{color:#212324;background-color:#d7d8d8}.list-group-item-dark.list-group-item-action:hover,.list-group-item-dark.list-group-item-action:focus{color:#212324;background-color:#c2c2c2}.list-group-item-dark.list-group-item-action.active{color:#fff;background-color:#212324;border-color:#212324}.btn-close{box-sizing:content-box;width:1em;height:1em;padding:.25em .25em;color:#000;background:rgba(0,0,0,0) url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23000'%3e%3cpath d='M.293.293a1 1 0 011.414 0L8 6.586 14.293.293a1 1 0 111.414 1.414L9.414 8l6.293 6.293a1 1 0 01-1.414 1.414L8 9.414l-6.293 6.293a1 1 0 01-1.414-1.414L6.586 8 .293 1.707a1 1 0 010-1.414z'/%3e%3c/svg%3e") center/1em auto no-repeat;border:0;opacity:.5}.btn-close:hover{color:#000;text-decoration:none;opacity:.75}.btn-close:focus{outline:0;box-shadow:0 0 0 .25rem rgba(39,128,227,.25);opacity:1}.btn-close:disabled,.btn-close.disabled{pointer-events:none;user-select:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;opacity:.25}.btn-close-white{filter:invert(1) grayscale(100%) brightness(200%)}.toast{width:350px;max-width:100%;font-size:0.875rem;pointer-events:auto;background-color:rgba(255,255,255,.85);background-clip:padding-box;border:1px solid rgba(0,0,0,.1);box-shadow:0 .5rem 1rem rgba(0,0,0,.15)}.toast.showing{opacity:0}.toast:not(.show){display:none}.toast-container{width:max-content;width:-webkit-max-content;width:-moz-max-content;width:-ms-max-content;width:-o-max-content;max-width:100%;pointer-events:none}.toast-container>:not(:last-child){margin-bottom:.75rem}.toast-header{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;padding:.5rem .75rem;color:#6c757d;background-color:rgba(255,255,255,.85);background-clip:padding-box;border-bottom:1px solid rgba(0,0,0,.05)}.toast-header .btn-close{margin-right:-0.375rem;margin-left:.75rem}.toast-body{padding:.75rem;word-wrap:break-word}.modal{position:fixed;top:0;left:0;z-index:1055;display:none;width:100%;height:100%;overflow-x:hidden;overflow-y:auto;outline:0}.modal-dialog{position:relative;width:auto;margin:.5rem;pointer-events:none}.modal.fade .modal-dialog{transition:transform .3s ease-out;transform:translate(0, -50px)}@media(prefers-reduced-motion: reduce){.modal.fade .modal-dialog{transition:none}}.modal.show .modal-dialog{transform:none}.modal.modal-static .modal-dialog{transform:scale(1.02)}.modal-dialog-scrollable{height:calc(100% - 1rem)}.modal-dialog-scrollable .modal-content{max-height:100%;overflow:hidden}.modal-dialog-scrollable .modal-body{overflow-y:auto}.modal-dialog-centered{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;min-height:calc(100% - 1rem)}.modal-content{position:relative;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;width:100%;pointer-events:auto;background-color:#fff;background-clip:padding-box;border:1px solid rgba(0,0,0,.2);outline:0}.modal-backdrop{position:fixed;top:0;left:0;z-index:1050;width:100vw;height:100vh;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop.show{opacity:.5}.modal-header{display:flex;display:-webkit-flex;flex-shrink:0;-webkit-flex-shrink:0;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding:1rem 1rem;border-bottom:1px solid #dee2e6}.modal-header .btn-close{padding:.5rem .5rem;margin:-0.5rem -0.5rem -0.5rem auto}.modal-title{margin-bottom:0;line-height:1.5}.modal-body{position:relative;flex:1 1 auto;-webkit-flex:1 1 auto;padding:1rem}.modal-footer{display:flex;display:-webkit-flex;flex-wrap:wrap;-webkit-flex-wrap:wrap;flex-shrink:0;-webkit-flex-shrink:0;align-items:center;-webkit-align-items:center;justify-content:flex-end;-webkit-justify-content:flex-end;padding:.75rem;border-top:1px solid #dee2e6}.modal-footer>*{margin:.25rem}@media(min-width: 576px){.modal-dialog{max-width:500px;margin:1.75rem auto}.modal-dialog-scrollable{height:calc(100% - 3.5rem)}.modal-dialog-centered{min-height:calc(100% - 3.5rem)}.modal-sm{max-width:300px}}@media(min-width: 992px){.modal-lg,.modal-xl{max-width:800px}}@media(min-width: 1200px){.modal-xl{max-width:1140px}}.modal-fullscreen{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen .modal-content{height:100%;border:0}.modal-fullscreen .modal-body{overflow-y:auto}@media(max-width: 575.98px){.modal-fullscreen-sm-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-sm-down .modal-content{height:100%;border:0}.modal-fullscreen-sm-down .modal-body{overflow-y:auto}}@media(max-width: 767.98px){.modal-fullscreen-md-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-md-down .modal-content{height:100%;border:0}.modal-fullscreen-md-down .modal-body{overflow-y:auto}}@media(max-width: 991.98px){.modal-fullscreen-lg-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-lg-down .modal-content{height:100%;border:0}.modal-fullscreen-lg-down .modal-body{overflow-y:auto}}@media(max-width: 1199.98px){.modal-fullscreen-xl-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-xl-down .modal-content{height:100%;border:0}.modal-fullscreen-xl-down .modal-body{overflow-y:auto}}@media(max-width: 1399.98px){.modal-fullscreen-xxl-down{width:100vw;max-width:none;height:100%;margin:0}.modal-fullscreen-xxl-down .modal-content{height:100%;border:0}.modal-fullscreen-xxl-down .modal-body{overflow-y:auto}}.tooltip{position:absolute;z-index:1080;display:block;margin:0;font-family:var(--bs-font-sans-serif);font-style:normal;font-weight:400;line-height:1.6;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;letter-spacing:normal;word-break:normal;word-spacing:normal;white-space:normal;line-break:auto;font-size:0.875rem;word-wrap:break-word;opacity:0}.tooltip.show{opacity:.9}.tooltip .tooltip-arrow{position:absolute;display:block;width:.8rem;height:.4rem}.tooltip .tooltip-arrow::before{position:absolute;content:"";border-color:rgba(0,0,0,0);border-style:solid}.bs-tooltip-top,.bs-tooltip-auto[data-popper-placement^=top]{padding:.4rem 0}.bs-tooltip-top .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=top] .tooltip-arrow{bottom:0}.bs-tooltip-top .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=top] .tooltip-arrow::before{top:-1px;border-width:.4rem .4rem 0;border-top-color:#000}.bs-tooltip-end,.bs-tooltip-auto[data-popper-placement^=right]{padding:0 .4rem}.bs-tooltip-end .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=right] .tooltip-arrow{left:0;width:.4rem;height:.8rem}.bs-tooltip-end .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=right] .tooltip-arrow::before{right:-1px;border-width:.4rem .4rem .4rem 0;border-right-color:#000}.bs-tooltip-bottom,.bs-tooltip-auto[data-popper-placement^=bottom]{padding:.4rem 0}.bs-tooltip-bottom .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=bottom] .tooltip-arrow{top:0}.bs-tooltip-bottom .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=bottom] .tooltip-arrow::before{bottom:-1px;border-width:0 .4rem .4rem;border-bottom-color:#000}.bs-tooltip-start,.bs-tooltip-auto[data-popper-placement^=left]{padding:0 .4rem}.bs-tooltip-start .tooltip-arrow,.bs-tooltip-auto[data-popper-placement^=left] .tooltip-arrow{right:0;width:.4rem;height:.8rem}.bs-tooltip-start .tooltip-arrow::before,.bs-tooltip-auto[data-popper-placement^=left] .tooltip-arrow::before{left:-1px;border-width:.4rem 0 .4rem .4rem;border-left-color:#000}.tooltip-inner{max-width:200px;padding:.25rem .5rem;color:#fff;text-align:center;background-color:#000}.popover{position:absolute;top:0;left:0 /* rtl:ignore */;z-index:1070;display:block;max-width:276px;font-family:var(--bs-font-sans-serif);font-style:normal;font-weight:400;line-height:1.6;text-align:left;text-align:start;text-decoration:none;text-shadow:none;text-transform:none;letter-spacing:normal;word-break:normal;word-spacing:normal;white-space:normal;line-break:auto;font-size:0.875rem;word-wrap:break-word;background-color:#fff;background-clip:padding-box;border:1px solid rgba(0,0,0,.2)}.popover .popover-arrow{position:absolute;display:block;width:1rem;height:.5rem}.popover .popover-arrow::before,.popover .popover-arrow::after{position:absolute;display:block;content:"";border-color:rgba(0,0,0,0);border-style:solid}.bs-popover-top>.popover-arrow,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow{bottom:calc(-0.5rem - 1px)}.bs-popover-top>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::before{bottom:0;border-width:.5rem .5rem 0;border-top-color:rgba(0,0,0,.25)}.bs-popover-top>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=top]>.popover-arrow::after{bottom:1px;border-width:.5rem .5rem 0;border-top-color:#fff}.bs-popover-end>.popover-arrow,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow{left:calc(-0.5rem - 1px);width:.5rem;height:1rem}.bs-popover-end>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::before{left:0;border-width:.5rem .5rem .5rem 0;border-right-color:rgba(0,0,0,.25)}.bs-popover-end>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=right]>.popover-arrow::after{left:1px;border-width:.5rem .5rem .5rem 0;border-right-color:#fff}.bs-popover-bottom>.popover-arrow,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow{top:calc(-0.5rem - 1px)}.bs-popover-bottom>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::before{top:0;border-width:0 .5rem .5rem .5rem;border-bottom-color:rgba(0,0,0,.25)}.bs-popover-bottom>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=bottom]>.popover-arrow::after{top:1px;border-width:0 .5rem .5rem .5rem;border-bottom-color:#fff}.bs-popover-bottom .popover-header::before,.bs-popover-auto[data-popper-placement^=bottom] .popover-header::before{position:absolute;top:0;left:50%;display:block;width:1rem;margin-left:-0.5rem;content:"";border-bottom:1px solid #f0f0f0}.bs-popover-start>.popover-arrow,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow{right:calc(-0.5rem - 1px);width:.5rem;height:1rem}.bs-popover-start>.popover-arrow::before,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::before{right:0;border-width:.5rem 0 .5rem .5rem;border-left-color:rgba(0,0,0,.25)}.bs-popover-start>.popover-arrow::after,.bs-popover-auto[data-popper-placement^=left]>.popover-arrow::after{right:1px;border-width:.5rem 0 .5rem .5rem;border-left-color:#fff}.popover-header{padding:.5rem 1rem;margin-bottom:0;font-size:1rem;background-color:#f0f0f0;border-bottom:1px solid rgba(0,0,0,.2)}.popover-header:empty{display:none}.popover-body{padding:1rem 1rem;color:#373a3c}.carousel{position:relative}.carousel.pointer-event{touch-action:pan-y;-webkit-touch-action:pan-y;-moz-touch-action:pan-y;-ms-touch-action:pan-y;-o-touch-action:pan-y}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel-inner::after{display:block;clear:both;content:""}.carousel-item{position:relative;display:none;float:left;width:100%;margin-right:-100%;backface-visibility:hidden;-webkit-backface-visibility:hidden;-moz-backface-visibility:hidden;-ms-backface-visibility:hidden;-o-backface-visibility:hidden;transition:transform .6s ease-in-out}@media(prefers-reduced-motion: reduce){.carousel-item{transition:none}}.carousel-item.active,.carousel-item-next,.carousel-item-prev{display:block}.carousel-item-next:not(.carousel-item-start),.active.carousel-item-end{transform:translateX(100%)}.carousel-item-prev:not(.carousel-item-end),.active.carousel-item-start{transform:translateX(-100%)}.carousel-fade .carousel-item{opacity:0;transition-property:opacity;transform:none}.carousel-fade .carousel-item.active,.carousel-fade .carousel-item-next.carousel-item-start,.carousel-fade .carousel-item-prev.carousel-item-end{z-index:1;opacity:1}.carousel-fade .active.carousel-item-start,.carousel-fade .active.carousel-item-end{z-index:0;opacity:0;transition:opacity 0s .6s}@media(prefers-reduced-motion: reduce){.carousel-fade .active.carousel-item-start,.carousel-fade .active.carousel-item-end{transition:none}}.carousel-control-prev,.carousel-control-next{position:absolute;top:0;bottom:0;z-index:1;display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;justify-content:center;-webkit-justify-content:center;width:15%;padding:0;color:#fff;text-align:center;background:none;border:0;opacity:.5;transition:opacity .15s ease}@media(prefers-reduced-motion: reduce){.carousel-control-prev,.carousel-control-next{transition:none}}.carousel-control-prev:hover,.carousel-control-prev:focus,.carousel-control-next:hover,.carousel-control-next:focus{color:#fff;text-decoration:none;outline:0;opacity:.9}.carousel-control-prev{left:0}.carousel-control-next{right:0}.carousel-control-prev-icon,.carousel-control-next-icon{display:inline-block;width:2rem;height:2rem;background-repeat:no-repeat;background-position:50%;background-size:100% 100%}.carousel-control-prev-icon{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23fff'%3e%3cpath d='M11.354 1.646a.5.5 0 0 1 0 .708L5.707 8l5.647 5.646a.5.5 0 0 1-.708.708l-6-6a.5.5 0 0 1 0-.708l6-6a.5.5 0 0 1 .708 0z'/%3e%3c/svg%3e")}.carousel-control-next-icon{background-image:url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16' fill='%23fff'%3e%3cpath d='M4.646 1.646a.5.5 0 0 1 .708 0l6 6a.5.5 0 0 1 0 .708l-6 6a.5.5 0 0 1-.708-.708L10.293 8 4.646 2.354a.5.5 0 0 1 0-.708z'/%3e%3c/svg%3e")}.carousel-indicators{position:absolute;right:0;bottom:0;left:0;z-index:2;display:flex;display:-webkit-flex;justify-content:center;-webkit-justify-content:center;padding:0;margin-right:15%;margin-bottom:1rem;margin-left:15%;list-style:none}.carousel-indicators [data-bs-target]{box-sizing:content-box;flex:0 1 auto;-webkit-flex:0 1 auto;width:30px;height:3px;padding:0;margin-right:3px;margin-left:3px;text-indent:-999px;cursor:pointer;background-color:#fff;background-clip:padding-box;border:0;border-top:10px solid rgba(0,0,0,0);border-bottom:10px solid rgba(0,0,0,0);opacity:.5;transition:opacity .6s ease}@media(prefers-reduced-motion: reduce){.carousel-indicators [data-bs-target]{transition:none}}.carousel-indicators .active{opacity:1}.carousel-caption{position:absolute;right:15%;bottom:1.25rem;left:15%;padding-top:1.25rem;padding-bottom:1.25rem;color:#fff;text-align:center}.carousel-dark .carousel-control-prev-icon,.carousel-dark .carousel-control-next-icon{filter:invert(1) grayscale(100)}.carousel-dark .carousel-indicators [data-bs-target]{background-color:#000}.carousel-dark .carousel-caption{color:#000}@keyframes spinner-border{to{transform:rotate(360deg) /* rtl:ignore */}}.spinner-border{display:inline-block;width:2rem;height:2rem;vertical-align:-0.125em;border:.25em solid currentColor;border-right-color:rgba(0,0,0,0);border-radius:50%;animation:.75s linear infinite spinner-border}.spinner-border-sm{width:1rem;height:1rem;border-width:.2em}@keyframes spinner-grow{0%{transform:scale(0)}50%{opacity:1;transform:none}}.spinner-grow{display:inline-block;width:2rem;height:2rem;vertical-align:-0.125em;background-color:currentColor;border-radius:50%;opacity:0;animation:.75s linear infinite spinner-grow}.spinner-grow-sm{width:1rem;height:1rem}@media(prefers-reduced-motion: reduce){.spinner-border,.spinner-grow{animation-duration:1.5s;-webkit-animation-duration:1.5s;-moz-animation-duration:1.5s;-ms-animation-duration:1.5s;-o-animation-duration:1.5s}}.offcanvas{position:fixed;bottom:0;z-index:1045;display:flex;display:-webkit-flex;flex-direction:column;-webkit-flex-direction:column;max-width:100%;visibility:hidden;background-color:#fff;background-clip:padding-box;outline:0;transition:transform .3s ease-in-out}@media(prefers-reduced-motion: reduce){.offcanvas{transition:none}}.offcanvas-backdrop{position:fixed;top:0;left:0;z-index:1040;width:100vw;height:100vh;background-color:#000}.offcanvas-backdrop.fade{opacity:0}.offcanvas-backdrop.show{opacity:.5}.offcanvas-header{display:flex;display:-webkit-flex;align-items:center;-webkit-align-items:center;justify-content:space-between;-webkit-justify-content:space-between;padding:1rem 1rem}.offcanvas-header .btn-close{padding:.5rem .5rem;margin-top:-0.5rem;margin-right:-0.5rem;margin-bottom:-0.5rem}.offcanvas-title{margin-bottom:0;line-height:1.5}.offcanvas-body{flex-grow:1;-webkit-flex-grow:1;padding:1rem 1rem;overflow-y:auto}.offcanvas-start{top:0;left:0;width:400px;border-right:1px solid rgba(0,0,0,.2);transform:translateX(-100%)}.offcanvas-end{top:0;right:0;width:400px;border-left:1px solid rgba(0,0,0,.2);transform:translateX(100%)}.offcanvas-top{top:0;right:0;left:0;height:30vh;max-height:100%;border-bottom:1px solid rgba(0,0,0,.2);transform:translateY(-100%)}.offcanvas-bottom{right:0;left:0;height:30vh;max-height:100%;border-top:1px solid rgba(0,0,0,.2);transform:translateY(100%)}.offcanvas.show{transform:none}.placeholder{display:inline-block;min-height:1em;vertical-align:middle;cursor:wait;background-color:currentColor;opacity:.5}.placeholder.btn::before{display:inline-block;content:""}.placeholder-xs{min-height:.6em}.placeholder-sm{min-height:.8em}.placeholder-lg{min-height:1.2em}.placeholder-glow .placeholder{animation:placeholder-glow 2s ease-in-out infinite}@keyframes placeholder-glow{50%{opacity:.2}}.placeholder-wave{mask-image:linear-gradient(130deg, #000 55%, rgba(0, 0, 0, 0.8) 75%, #000 95%);-webkit-mask-image:linear-gradient(130deg, #000 55%, rgba(0, 0, 0, 0.8) 75%, #000 95%);mask-size:200% 100%;-webkit-mask-size:200% 100%;animation:placeholder-wave 2s linear infinite}@keyframes placeholder-wave{100%{mask-position:-200% 0%;-webkit-mask-position:-200% 0%}}.clearfix::after{display:block;clear:both;content:""}.link-default{color:#373a3c}.link-default:hover,.link-default:focus{color:#2c2e30}.link-primary{color:#2780e3}.link-primary:hover,.link-primary:focus{color:#1f66b6}.link-secondary{color:#373a3c}.link-secondary:hover,.link-secondary:focus{color:#2c2e30}.link-success{color:#3fb618}.link-success:hover,.link-success:focus{color:#329213}.link-info{color:#9954bb}.link-info:hover,.link-info:focus{color:#7a4396}.link-warning{color:#ff7518}.link-warning:hover,.link-warning:focus{color:#cc5e13}.link-danger{color:#ff0039}.link-danger:hover,.link-danger:focus{color:#cc002e}.link-light{color:#f8f9fa}.link-light:hover,.link-light:focus{color:#f9fafb}.link-dark{color:#373a3c}.link-dark:hover,.link-dark:focus{color:#2c2e30}.ratio{position:relative;width:100%}.ratio::before{display:block;padding-top:var(--bs-aspect-ratio);content:""}.ratio>*{position:absolute;top:0;left:0;width:100%;height:100%}.ratio-1x1{--bs-aspect-ratio: 100%}.ratio-4x3{--bs-aspect-ratio: 75%}.ratio-16x9{--bs-aspect-ratio: 56.25%}.ratio-21x9{--bs-aspect-ratio: 42.8571428571%}.fixed-top{position:fixed;top:0;right:0;left:0;z-index:1030}.fixed-bottom{position:fixed;right:0;bottom:0;left:0;z-index:1030}.sticky-top{position:sticky;top:0;z-index:1020}@media(min-width: 576px){.sticky-sm-top{position:sticky;top:0;z-index:1020}}@media(min-width: 768px){.sticky-md-top{position:sticky;top:0;z-index:1020}}@media(min-width: 992px){.sticky-lg-top{position:sticky;top:0;z-index:1020}}@media(min-width: 1200px){.sticky-xl-top{position:sticky;top:0;z-index:1020}}@media(min-width: 1400px){.sticky-xxl-top{position:sticky;top:0;z-index:1020}}.hstack{display:flex;display:-webkit-flex;flex-direction:row;-webkit-flex-direction:row;align-items:center;-webkit-align-items:center;align-self:stretch;-webkit-align-self:stretch}.vstack{display:flex;display:-webkit-flex;flex:1 1 auto;-webkit-flex:1 1 auto;flex-direction:column;-webkit-flex-direction:column;align-self:stretch;-webkit-align-self:stretch}.visually-hidden,.visually-hidden-focusable:not(:focus):not(:focus-within){position:absolute !important;width:1px !important;height:1px !important;padding:0 !important;margin:-1px !important;overflow:hidden !important;clip:rect(0, 0, 0, 0) !important;white-space:nowrap !important;border:0 !important}.stretched-link::after{position:absolute;top:0;right:0;bottom:0;left:0;z-index:1;content:""}.text-truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.vr{display:inline-block;align-self:stretch;-webkit-align-self:stretch;width:1px;min-height:1em;background-color:currentColor;opacity:.25}.align-baseline{vertical-align:baseline !important}.align-top{vertical-align:top !important}.align-middle{vertical-align:middle !important}.align-bottom{vertical-align:bottom !important}.align-text-bottom{vertical-align:text-bottom !important}.align-text-top{vertical-align:text-top !important}.float-start{float:left !important}.float-end{float:right !important}.float-none{float:none !important}.opacity-0{opacity:0 !important}.opacity-25{opacity:.25 !important}.opacity-50{opacity:.5 !important}.opacity-75{opacity:.75 !important}.opacity-100{opacity:1 !important}.overflow-auto{overflow:auto !important}.overflow-hidden{overflow:hidden !important}.overflow-visible{overflow:visible !important}.overflow-scroll{overflow:scroll !important}.d-inline{display:inline !important}.d-inline-block{display:inline-block !important}.d-block{display:block !important}.d-grid{display:grid !important}.d-table{display:table !important}.d-table-row{display:table-row !important}.d-table-cell{display:table-cell !important}.d-flex{display:flex !important}.d-inline-flex{display:inline-flex !important}.d-none{display:none !important}.shadow{box-shadow:0 .5rem 1rem rgba(0,0,0,.15) !important}.shadow-sm{box-shadow:0 .125rem .25rem rgba(0,0,0,.075) !important}.shadow-lg{box-shadow:0 1rem 3rem rgba(0,0,0,.175) !important}.shadow-none{box-shadow:none !important}.position-static{position:static !important}.position-relative{position:relative !important}.position-absolute{position:absolute !important}.position-fixed{position:fixed !important}.position-sticky{position:sticky !important}.top-0{top:0 !important}.top-50{top:50% !important}.top-100{top:100% !important}.bottom-0{bottom:0 !important}.bottom-50{bottom:50% !important}.bottom-100{bottom:100% !important}.start-0{left:0 !important}.start-50{left:50% !important}.start-100{left:100% !important}.end-0{right:0 !important}.end-50{right:50% !important}.end-100{right:100% !important}.translate-middle{transform:translate(-50%, -50%) !important}.translate-middle-x{transform:translateX(-50%) !important}.translate-middle-y{transform:translateY(-50%) !important}.border{border:1px solid #dee2e6 !important}.border-0{border:0 !important}.border-top{border-top:1px solid #dee2e6 !important}.border-top-0{border-top:0 !important}.border-end{border-right:1px solid #dee2e6 !important}.border-end-0{border-right:0 !important}.border-bottom{border-bottom:1px solid #dee2e6 !important}.border-bottom-0{border-bottom:0 !important}.border-start{border-left:1px solid #dee2e6 !important}.border-start-0{border-left:0 !important}.border-default{border-color:#373a3c !important}.border-primary{border-color:#2780e3 !important}.border-secondary{border-color:#373a3c !important}.border-success{border-color:#3fb618 !important}.border-info{border-color:#9954bb !important}.border-warning{border-color:#ff7518 !important}.border-danger{border-color:#ff0039 !important}.border-light{border-color:#f8f9fa !important}.border-dark{border-color:#373a3c !important}.border-white{border-color:#fff !important}.border-1{border-width:1px !important}.border-2{border-width:2px !important}.border-3{border-width:3px !important}.border-4{border-width:4px !important}.border-5{border-width:5px !important}.w-25{width:25% !important}.w-50{width:50% !important}.w-75{width:75% !important}.w-100{width:100% !important}.w-auto{width:auto !important}.mw-100{max-width:100% !important}.vw-100{width:100vw !important}.min-vw-100{min-width:100vw !important}.h-25{height:25% !important}.h-50{height:50% !important}.h-75{height:75% !important}.h-100{height:100% !important}.h-auto{height:auto !important}.mh-100{max-height:100% !important}.vh-100{height:100vh !important}.min-vh-100{min-height:100vh !important}.flex-fill{flex:1 1 auto !important}.flex-row{flex-direction:row !important}.flex-column{flex-direction:column !important}.flex-row-reverse{flex-direction:row-reverse !important}.flex-column-reverse{flex-direction:column-reverse !important}.flex-grow-0{flex-grow:0 !important}.flex-grow-1{flex-grow:1 !important}.flex-shrink-0{flex-shrink:0 !important}.flex-shrink-1{flex-shrink:1 !important}.flex-wrap{flex-wrap:wrap !important}.flex-nowrap{flex-wrap:nowrap !important}.flex-wrap-reverse{flex-wrap:wrap-reverse !important}.gap-0{gap:0 !important}.gap-1{gap:.25rem !important}.gap-2{gap:.5rem !important}.gap-3{gap:1rem !important}.gap-4{gap:1.5rem !important}.gap-5{gap:3rem !important}.justify-content-start{justify-content:flex-start !important}.justify-content-end{justify-content:flex-end !important}.justify-content-center{justify-content:center !important}.justify-content-between{justify-content:space-between !important}.justify-content-around{justify-content:space-around !important}.justify-content-evenly{justify-content:space-evenly !important}.align-items-start{align-items:flex-start !important}.align-items-end{align-items:flex-end !important}.align-items-center{align-items:center !important}.align-items-baseline{align-items:baseline !important}.align-items-stretch{align-items:stretch !important}.align-content-start{align-content:flex-start !important}.align-content-end{align-content:flex-end !important}.align-content-center{align-content:center !important}.align-content-between{align-content:space-between !important}.align-content-around{align-content:space-around !important}.align-content-stretch{align-content:stretch !important}.align-self-auto{align-self:auto !important}.align-self-start{align-self:flex-start !important}.align-self-end{align-self:flex-end !important}.align-self-center{align-self:center !important}.align-self-baseline{align-self:baseline !important}.align-self-stretch{align-self:stretch !important}.order-first{order:-1 !important}.order-0{order:0 !important}.order-1{order:1 !important}.order-2{order:2 !important}.order-3{order:3 !important}.order-4{order:4 !important}.order-5{order:5 !important}.order-last{order:6 !important}.m-0{margin:0 !important}.m-1{margin:.25rem !important}.m-2{margin:.5rem !important}.m-3{margin:1rem !important}.m-4{margin:1.5rem !important}.m-5{margin:3rem !important}.m-auto{margin:auto !important}.mx-0{margin-right:0 !important;margin-left:0 !important}.mx-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-3{margin-right:1rem !important;margin-left:1rem !important}.mx-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-5{margin-right:3rem !important;margin-left:3rem !important}.mx-auto{margin-right:auto !important;margin-left:auto !important}.my-0{margin-top:0 !important;margin-bottom:0 !important}.my-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-0{margin-top:0 !important}.mt-1{margin-top:.25rem !important}.mt-2{margin-top:.5rem !important}.mt-3{margin-top:1rem !important}.mt-4{margin-top:1.5rem !important}.mt-5{margin-top:3rem !important}.mt-auto{margin-top:auto !important}.me-0{margin-right:0 !important}.me-1{margin-right:.25rem !important}.me-2{margin-right:.5rem !important}.me-3{margin-right:1rem !important}.me-4{margin-right:1.5rem !important}.me-5{margin-right:3rem !important}.me-auto{margin-right:auto !important}.mb-0{margin-bottom:0 !important}.mb-1{margin-bottom:.25rem !important}.mb-2{margin-bottom:.5rem !important}.mb-3{margin-bottom:1rem !important}.mb-4{margin-bottom:1.5rem !important}.mb-5{margin-bottom:3rem !important}.mb-auto{margin-bottom:auto !important}.ms-0{margin-left:0 !important}.ms-1{margin-left:.25rem !important}.ms-2{margin-left:.5rem !important}.ms-3{margin-left:1rem !important}.ms-4{margin-left:1.5rem !important}.ms-5{margin-left:3rem !important}.ms-auto{margin-left:auto !important}.p-0{padding:0 !important}.p-1{padding:.25rem !important}.p-2{padding:.5rem !important}.p-3{padding:1rem !important}.p-4{padding:1.5rem !important}.p-5{padding:3rem !important}.px-0{padding-right:0 !important;padding-left:0 !important}.px-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-3{padding-right:1rem !important;padding-left:1rem !important}.px-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-5{padding-right:3rem !important;padding-left:3rem !important}.py-0{padding-top:0 !important;padding-bottom:0 !important}.py-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-0{padding-top:0 !important}.pt-1{padding-top:.25rem !important}.pt-2{padding-top:.5rem !important}.pt-3{padding-top:1rem !important}.pt-4{padding-top:1.5rem !important}.pt-5{padding-top:3rem !important}.pe-0{padding-right:0 !important}.pe-1{padding-right:.25rem !important}.pe-2{padding-right:.5rem !important}.pe-3{padding-right:1rem !important}.pe-4{padding-right:1.5rem !important}.pe-5{padding-right:3rem !important}.pb-0{padding-bottom:0 !important}.pb-1{padding-bottom:.25rem !important}.pb-2{padding-bottom:.5rem !important}.pb-3{padding-bottom:1rem !important}.pb-4{padding-bottom:1.5rem !important}.pb-5{padding-bottom:3rem !important}.ps-0{padding-left:0 !important}.ps-1{padding-left:.25rem !important}.ps-2{padding-left:.5rem !important}.ps-3{padding-left:1rem !important}.ps-4{padding-left:1.5rem !important}.ps-5{padding-left:3rem !important}.font-monospace{font-family:var(--bs-font-monospace) !important}.fs-1{font-size:2.4em !important}.fs-2{font-size:calc(1.29rem + 0.48vw) !important}.fs-3{font-size:calc(1.27rem + 0.24vw) !important}.fs-4{font-size:1.25rem !important}.fs-5{font-size:1.1rem !important}.fs-6{font-size:1rem !important}.fst-italic{font-style:italic !important}.fst-normal{font-style:normal !important}.fw-light{font-weight:300 !important}.fw-lighter{font-weight:lighter !important}.fw-normal{font-weight:400 !important}.fw-bold{font-weight:700 !important}.fw-bolder{font-weight:bolder !important}.lh-1{line-height:1 !important}.lh-sm{line-height:1.25 !important}.lh-base{line-height:1.6 !important}.lh-lg{line-height:2 !important}.text-start{text-align:left !important}.text-end{text-align:right !important}.text-center{text-align:center !important}.text-decoration-none{text-decoration:none !important}.text-decoration-underline{text-decoration:underline !important}.text-decoration-line-through{text-decoration:line-through !important}.text-lowercase{text-transform:lowercase !important}.text-uppercase{text-transform:uppercase !important}.text-capitalize{text-transform:capitalize !important}.text-wrap{white-space:normal !important}.text-nowrap{white-space:nowrap !important}.text-break{word-wrap:break-word !important;word-break:break-word !important}.text-default{--bs-text-opacity: 1;color:rgba(var(--bs-default-rgb), var(--bs-text-opacity)) !important}.text-primary{--bs-text-opacity: 1;color:rgba(var(--bs-primary-rgb), var(--bs-text-opacity)) !important}.text-secondary{--bs-text-opacity: 1;color:rgba(var(--bs-secondary-rgb), var(--bs-text-opacity)) !important}.text-success{--bs-text-opacity: 1;color:rgba(var(--bs-success-rgb), var(--bs-text-opacity)) !important}.text-info{--bs-text-opacity: 1;color:rgba(var(--bs-info-rgb), var(--bs-text-opacity)) !important}.text-warning{--bs-text-opacity: 1;color:rgba(var(--bs-warning-rgb), var(--bs-text-opacity)) !important}.text-danger{--bs-text-opacity: 1;color:rgba(var(--bs-danger-rgb), var(--bs-text-opacity)) !important}.text-light{--bs-text-opacity: 1;color:rgba(var(--bs-light-rgb), var(--bs-text-opacity)) !important}.text-dark{--bs-text-opacity: 1;color:rgba(var(--bs-dark-rgb), var(--bs-text-opacity)) !important}.text-black{--bs-text-opacity: 1;color:rgba(var(--bs-black-rgb), var(--bs-text-opacity)) !important}.text-white{--bs-text-opacity: 1;color:rgba(var(--bs-white-rgb), var(--bs-text-opacity)) !important}.text-body{--bs-text-opacity: 1;color:rgba(var(--bs-body-color-rgb), var(--bs-text-opacity)) !important}.text-muted{--bs-text-opacity: 1;color:#6c757d !important}.text-black-50{--bs-text-opacity: 1;color:rgba(0,0,0,.5) !important}.text-white-50{--bs-text-opacity: 1;color:rgba(255,255,255,.5) !important}.text-reset{--bs-text-opacity: 1;color:inherit !important}.text-opacity-25{--bs-text-opacity: 0.25}.text-opacity-50{--bs-text-opacity: 0.5}.text-opacity-75{--bs-text-opacity: 0.75}.text-opacity-100{--bs-text-opacity: 1}.bg-default{--bs-bg-opacity: 1;background-color:rgba(var(--bs-default-rgb), var(--bs-bg-opacity)) !important}.bg-primary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-primary-rgb), var(--bs-bg-opacity)) !important}.bg-secondary{--bs-bg-opacity: 1;background-color:rgba(var(--bs-secondary-rgb), var(--bs-bg-opacity)) !important}.bg-success{--bs-bg-opacity: 1;background-color:rgba(var(--bs-success-rgb), var(--bs-bg-opacity)) !important}.bg-info{--bs-bg-opacity: 1;background-color:rgba(var(--bs-info-rgb), var(--bs-bg-opacity)) !important}.bg-warning{--bs-bg-opacity: 1;background-color:rgba(var(--bs-warning-rgb), var(--bs-bg-opacity)) !important}.bg-danger{--bs-bg-opacity: 1;background-color:rgba(var(--bs-danger-rgb), var(--bs-bg-opacity)) !important}.bg-light{--bs-bg-opacity: 1;background-color:rgba(var(--bs-light-rgb), var(--bs-bg-opacity)) !important}.bg-dark{--bs-bg-opacity: 1;background-color:rgba(var(--bs-dark-rgb), var(--bs-bg-opacity)) !important}.bg-black{--bs-bg-opacity: 1;background-color:rgba(var(--bs-black-rgb), var(--bs-bg-opacity)) !important}.bg-white{--bs-bg-opacity: 1;background-color:rgba(var(--bs-white-rgb), var(--bs-bg-opacity)) !important}.bg-body{--bs-bg-opacity: 1;background-color:rgba(var(--bs-body-bg-rgb), var(--bs-bg-opacity)) !important}.bg-transparent{--bs-bg-opacity: 1;background-color:rgba(0,0,0,0) !important}.bg-opacity-10{--bs-bg-opacity: 0.1}.bg-opacity-25{--bs-bg-opacity: 0.25}.bg-opacity-50{--bs-bg-opacity: 0.5}.bg-opacity-75{--bs-bg-opacity: 0.75}.bg-opacity-100{--bs-bg-opacity: 1}.bg-gradient{background-image:var(--bs-gradient) !important}.user-select-all{user-select:all !important}.user-select-auto{user-select:auto !important}.user-select-none{user-select:none !important}.pe-none{pointer-events:none !important}.pe-auto{pointer-events:auto !important}.rounded{border-radius:.25rem !important}.rounded-0{border-radius:0 !important}.rounded-1{border-radius:.2em !important}.rounded-2{border-radius:.25rem !important}.rounded-3{border-radius:.3rem !important}.rounded-circle{border-radius:50% !important}.rounded-pill{border-radius:50rem !important}.rounded-top{border-top-left-radius:.25rem !important;border-top-right-radius:.25rem !important}.rounded-end{border-top-right-radius:.25rem !important;border-bottom-right-radius:.25rem !important}.rounded-bottom{border-bottom-right-radius:.25rem !important;border-bottom-left-radius:.25rem !important}.rounded-start{border-bottom-left-radius:.25rem !important;border-top-left-radius:.25rem !important}.visible{visibility:visible !important}.invisible{visibility:hidden !important}@media(min-width: 576px){.float-sm-start{float:left !important}.float-sm-end{float:right !important}.float-sm-none{float:none !important}.d-sm-inline{display:inline !important}.d-sm-inline-block{display:inline-block !important}.d-sm-block{display:block !important}.d-sm-grid{display:grid !important}.d-sm-table{display:table !important}.d-sm-table-row{display:table-row !important}.d-sm-table-cell{display:table-cell !important}.d-sm-flex{display:flex !important}.d-sm-inline-flex{display:inline-flex !important}.d-sm-none{display:none !important}.flex-sm-fill{flex:1 1 auto !important}.flex-sm-row{flex-direction:row !important}.flex-sm-column{flex-direction:column !important}.flex-sm-row-reverse{flex-direction:row-reverse !important}.flex-sm-column-reverse{flex-direction:column-reverse !important}.flex-sm-grow-0{flex-grow:0 !important}.flex-sm-grow-1{flex-grow:1 !important}.flex-sm-shrink-0{flex-shrink:0 !important}.flex-sm-shrink-1{flex-shrink:1 !important}.flex-sm-wrap{flex-wrap:wrap !important}.flex-sm-nowrap{flex-wrap:nowrap !important}.flex-sm-wrap-reverse{flex-wrap:wrap-reverse !important}.gap-sm-0{gap:0 !important}.gap-sm-1{gap:.25rem !important}.gap-sm-2{gap:.5rem !important}.gap-sm-3{gap:1rem !important}.gap-sm-4{gap:1.5rem !important}.gap-sm-5{gap:3rem !important}.justify-content-sm-start{justify-content:flex-start !important}.justify-content-sm-end{justify-content:flex-end !important}.justify-content-sm-center{justify-content:center !important}.justify-content-sm-between{justify-content:space-between !important}.justify-content-sm-around{justify-content:space-around !important}.justify-content-sm-evenly{justify-content:space-evenly !important}.align-items-sm-start{align-items:flex-start !important}.align-items-sm-end{align-items:flex-end !important}.align-items-sm-center{align-items:center !important}.align-items-sm-baseline{align-items:baseline !important}.align-items-sm-stretch{align-items:stretch !important}.align-content-sm-start{align-content:flex-start !important}.align-content-sm-end{align-content:flex-end !important}.align-content-sm-center{align-content:center !important}.align-content-sm-between{align-content:space-between !important}.align-content-sm-around{align-content:space-around !important}.align-content-sm-stretch{align-content:stretch !important}.align-self-sm-auto{align-self:auto !important}.align-self-sm-start{align-self:flex-start !important}.align-self-sm-end{align-self:flex-end !important}.align-self-sm-center{align-self:center !important}.align-self-sm-baseline{align-self:baseline !important}.align-self-sm-stretch{align-self:stretch !important}.order-sm-first{order:-1 !important}.order-sm-0{order:0 !important}.order-sm-1{order:1 !important}.order-sm-2{order:2 !important}.order-sm-3{order:3 !important}.order-sm-4{order:4 !important}.order-sm-5{order:5 !important}.order-sm-last{order:6 !important}.m-sm-0{margin:0 !important}.m-sm-1{margin:.25rem !important}.m-sm-2{margin:.5rem !important}.m-sm-3{margin:1rem !important}.m-sm-4{margin:1.5rem !important}.m-sm-5{margin:3rem !important}.m-sm-auto{margin:auto !important}.mx-sm-0{margin-right:0 !important;margin-left:0 !important}.mx-sm-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-sm-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-sm-3{margin-right:1rem !important;margin-left:1rem !important}.mx-sm-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-sm-5{margin-right:3rem !important;margin-left:3rem !important}.mx-sm-auto{margin-right:auto !important;margin-left:auto !important}.my-sm-0{margin-top:0 !important;margin-bottom:0 !important}.my-sm-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-sm-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-sm-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-sm-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-sm-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-sm-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-sm-0{margin-top:0 !important}.mt-sm-1{margin-top:.25rem !important}.mt-sm-2{margin-top:.5rem !important}.mt-sm-3{margin-top:1rem !important}.mt-sm-4{margin-top:1.5rem !important}.mt-sm-5{margin-top:3rem !important}.mt-sm-auto{margin-top:auto !important}.me-sm-0{margin-right:0 !important}.me-sm-1{margin-right:.25rem !important}.me-sm-2{margin-right:.5rem !important}.me-sm-3{margin-right:1rem !important}.me-sm-4{margin-right:1.5rem !important}.me-sm-5{margin-right:3rem !important}.me-sm-auto{margin-right:auto !important}.mb-sm-0{margin-bottom:0 !important}.mb-sm-1{margin-bottom:.25rem !important}.mb-sm-2{margin-bottom:.5rem !important}.mb-sm-3{margin-bottom:1rem !important}.mb-sm-4{margin-bottom:1.5rem !important}.mb-sm-5{margin-bottom:3rem !important}.mb-sm-auto{margin-bottom:auto !important}.ms-sm-0{margin-left:0 !important}.ms-sm-1{margin-left:.25rem !important}.ms-sm-2{margin-left:.5rem !important}.ms-sm-3{margin-left:1rem !important}.ms-sm-4{margin-left:1.5rem !important}.ms-sm-5{margin-left:3rem !important}.ms-sm-auto{margin-left:auto !important}.p-sm-0{padding:0 !important}.p-sm-1{padding:.25rem !important}.p-sm-2{padding:.5rem !important}.p-sm-3{padding:1rem !important}.p-sm-4{padding:1.5rem !important}.p-sm-5{padding:3rem !important}.px-sm-0{padding-right:0 !important;padding-left:0 !important}.px-sm-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-sm-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-sm-3{padding-right:1rem !important;padding-left:1rem !important}.px-sm-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-sm-5{padding-right:3rem !important;padding-left:3rem !important}.py-sm-0{padding-top:0 !important;padding-bottom:0 !important}.py-sm-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-sm-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-sm-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-sm-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-sm-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-sm-0{padding-top:0 !important}.pt-sm-1{padding-top:.25rem !important}.pt-sm-2{padding-top:.5rem !important}.pt-sm-3{padding-top:1rem !important}.pt-sm-4{padding-top:1.5rem !important}.pt-sm-5{padding-top:3rem !important}.pe-sm-0{padding-right:0 !important}.pe-sm-1{padding-right:.25rem !important}.pe-sm-2{padding-right:.5rem !important}.pe-sm-3{padding-right:1rem !important}.pe-sm-4{padding-right:1.5rem !important}.pe-sm-5{padding-right:3rem !important}.pb-sm-0{padding-bottom:0 !important}.pb-sm-1{padding-bottom:.25rem !important}.pb-sm-2{padding-bottom:.5rem !important}.pb-sm-3{padding-bottom:1rem !important}.pb-sm-4{padding-bottom:1.5rem !important}.pb-sm-5{padding-bottom:3rem !important}.ps-sm-0{padding-left:0 !important}.ps-sm-1{padding-left:.25rem !important}.ps-sm-2{padding-left:.5rem !important}.ps-sm-3{padding-left:1rem !important}.ps-sm-4{padding-left:1.5rem !important}.ps-sm-5{padding-left:3rem !important}.text-sm-start{text-align:left !important}.text-sm-end{text-align:right !important}.text-sm-center{text-align:center !important}}@media(min-width: 768px){.float-md-start{float:left !important}.float-md-end{float:right !important}.float-md-none{float:none !important}.d-md-inline{display:inline !important}.d-md-inline-block{display:inline-block !important}.d-md-block{display:block !important}.d-md-grid{display:grid !important}.d-md-table{display:table !important}.d-md-table-row{display:table-row !important}.d-md-table-cell{display:table-cell !important}.d-md-flex{display:flex !important}.d-md-inline-flex{display:inline-flex !important}.d-md-none{display:none !important}.flex-md-fill{flex:1 1 auto !important}.flex-md-row{flex-direction:row !important}.flex-md-column{flex-direction:column !important}.flex-md-row-reverse{flex-direction:row-reverse !important}.flex-md-column-reverse{flex-direction:column-reverse !important}.flex-md-grow-0{flex-grow:0 !important}.flex-md-grow-1{flex-grow:1 !important}.flex-md-shrink-0{flex-shrink:0 !important}.flex-md-shrink-1{flex-shrink:1 !important}.flex-md-wrap{flex-wrap:wrap !important}.flex-md-nowrap{flex-wrap:nowrap !important}.flex-md-wrap-reverse{flex-wrap:wrap-reverse !important}.gap-md-0{gap:0 !important}.gap-md-1{gap:.25rem !important}.gap-md-2{gap:.5rem !important}.gap-md-3{gap:1rem !important}.gap-md-4{gap:1.5rem !important}.gap-md-5{gap:3rem !important}.justify-content-md-start{justify-content:flex-start !important}.justify-content-md-end{justify-content:flex-end !important}.justify-content-md-center{justify-content:center !important}.justify-content-md-between{justify-content:space-between !important}.justify-content-md-around{justify-content:space-around !important}.justify-content-md-evenly{justify-content:space-evenly !important}.align-items-md-start{align-items:flex-start !important}.align-items-md-end{align-items:flex-end !important}.align-items-md-center{align-items:center !important}.align-items-md-baseline{align-items:baseline !important}.align-items-md-stretch{align-items:stretch !important}.align-content-md-start{align-content:flex-start !important}.align-content-md-end{align-content:flex-end !important}.align-content-md-center{align-content:center !important}.align-content-md-between{align-content:space-between !important}.align-content-md-around{align-content:space-around !important}.align-content-md-stretch{align-content:stretch !important}.align-self-md-auto{align-self:auto !important}.align-self-md-start{align-self:flex-start !important}.align-self-md-end{align-self:flex-end !important}.align-self-md-center{align-self:center !important}.align-self-md-baseline{align-self:baseline !important}.align-self-md-stretch{align-self:stretch !important}.order-md-first{order:-1 !important}.order-md-0{order:0 !important}.order-md-1{order:1 !important}.order-md-2{order:2 !important}.order-md-3{order:3 !important}.order-md-4{order:4 !important}.order-md-5{order:5 !important}.order-md-last{order:6 !important}.m-md-0{margin:0 !important}.m-md-1{margin:.25rem !important}.m-md-2{margin:.5rem !important}.m-md-3{margin:1rem !important}.m-md-4{margin:1.5rem !important}.m-md-5{margin:3rem !important}.m-md-auto{margin:auto !important}.mx-md-0{margin-right:0 !important;margin-left:0 !important}.mx-md-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-md-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-md-3{margin-right:1rem !important;margin-left:1rem !important}.mx-md-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-md-5{margin-right:3rem !important;margin-left:3rem !important}.mx-md-auto{margin-right:auto !important;margin-left:auto !important}.my-md-0{margin-top:0 !important;margin-bottom:0 !important}.my-md-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-md-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-md-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-md-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-md-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-md-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-md-0{margin-top:0 !important}.mt-md-1{margin-top:.25rem !important}.mt-md-2{margin-top:.5rem !important}.mt-md-3{margin-top:1rem !important}.mt-md-4{margin-top:1.5rem !important}.mt-md-5{margin-top:3rem !important}.mt-md-auto{margin-top:auto !important}.me-md-0{margin-right:0 !important}.me-md-1{margin-right:.25rem !important}.me-md-2{margin-right:.5rem !important}.me-md-3{margin-right:1rem !important}.me-md-4{margin-right:1.5rem !important}.me-md-5{margin-right:3rem !important}.me-md-auto{margin-right:auto !important}.mb-md-0{margin-bottom:0 !important}.mb-md-1{margin-bottom:.25rem !important}.mb-md-2{margin-bottom:.5rem !important}.mb-md-3{margin-bottom:1rem !important}.mb-md-4{margin-bottom:1.5rem !important}.mb-md-5{margin-bottom:3rem !important}.mb-md-auto{margin-bottom:auto !important}.ms-md-0{margin-left:0 !important}.ms-md-1{margin-left:.25rem !important}.ms-md-2{margin-left:.5rem !important}.ms-md-3{margin-left:1rem !important}.ms-md-4{margin-left:1.5rem !important}.ms-md-5{margin-left:3rem !important}.ms-md-auto{margin-left:auto !important}.p-md-0{padding:0 !important}.p-md-1{padding:.25rem !important}.p-md-2{padding:.5rem !important}.p-md-3{padding:1rem !important}.p-md-4{padding:1.5rem !important}.p-md-5{padding:3rem !important}.px-md-0{padding-right:0 !important;padding-left:0 !important}.px-md-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-md-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-md-3{padding-right:1rem !important;padding-left:1rem !important}.px-md-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-md-5{padding-right:3rem !important;padding-left:3rem !important}.py-md-0{padding-top:0 !important;padding-bottom:0 !important}.py-md-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-md-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-md-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-md-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-md-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-md-0{padding-top:0 !important}.pt-md-1{padding-top:.25rem !important}.pt-md-2{padding-top:.5rem !important}.pt-md-3{padding-top:1rem !important}.pt-md-4{padding-top:1.5rem !important}.pt-md-5{padding-top:3rem !important}.pe-md-0{padding-right:0 !important}.pe-md-1{padding-right:.25rem !important}.pe-md-2{padding-right:.5rem !important}.pe-md-3{padding-right:1rem !important}.pe-md-4{padding-right:1.5rem !important}.pe-md-5{padding-right:3rem !important}.pb-md-0{padding-bottom:0 !important}.pb-md-1{padding-bottom:.25rem !important}.pb-md-2{padding-bottom:.5rem !important}.pb-md-3{padding-bottom:1rem !important}.pb-md-4{padding-bottom:1.5rem !important}.pb-md-5{padding-bottom:3rem !important}.ps-md-0{padding-left:0 !important}.ps-md-1{padding-left:.25rem !important}.ps-md-2{padding-left:.5rem !important}.ps-md-3{padding-left:1rem !important}.ps-md-4{padding-left:1.5rem !important}.ps-md-5{padding-left:3rem !important}.text-md-start{text-align:left !important}.text-md-end{text-align:right !important}.text-md-center{text-align:center !important}}@media(min-width: 992px){.float-lg-start{float:left !important}.float-lg-end{float:right !important}.float-lg-none{float:none !important}.d-lg-inline{display:inline !important}.d-lg-inline-block{display:inline-block !important}.d-lg-block{display:block !important}.d-lg-grid{display:grid !important}.d-lg-table{display:table !important}.d-lg-table-row{display:table-row !important}.d-lg-table-cell{display:table-cell !important}.d-lg-flex{display:flex !important}.d-lg-inline-flex{display:inline-flex !important}.d-lg-none{display:none !important}.flex-lg-fill{flex:1 1 auto !important}.flex-lg-row{flex-direction:row !important}.flex-lg-column{flex-direction:column !important}.flex-lg-row-reverse{flex-direction:row-reverse !important}.flex-lg-column-reverse{flex-direction:column-reverse !important}.flex-lg-grow-0{flex-grow:0 !important}.flex-lg-grow-1{flex-grow:1 !important}.flex-lg-shrink-0{flex-shrink:0 !important}.flex-lg-shrink-1{flex-shrink:1 !important}.flex-lg-wrap{flex-wrap:wrap !important}.flex-lg-nowrap{flex-wrap:nowrap !important}.flex-lg-wrap-reverse{flex-wrap:wrap-reverse !important}.gap-lg-0{gap:0 !important}.gap-lg-1{gap:.25rem !important}.gap-lg-2{gap:.5rem !important}.gap-lg-3{gap:1rem !important}.gap-lg-4{gap:1.5rem !important}.gap-lg-5{gap:3rem !important}.justify-content-lg-start{justify-content:flex-start !important}.justify-content-lg-end{justify-content:flex-end !important}.justify-content-lg-center{justify-content:center !important}.justify-content-lg-between{justify-content:space-between !important}.justify-content-lg-around{justify-content:space-around !important}.justify-content-lg-evenly{justify-content:space-evenly !important}.align-items-lg-start{align-items:flex-start !important}.align-items-lg-end{align-items:flex-end !important}.align-items-lg-center{align-items:center !important}.align-items-lg-baseline{align-items:baseline !important}.align-items-lg-stretch{align-items:stretch !important}.align-content-lg-start{align-content:flex-start !important}.align-content-lg-end{align-content:flex-end !important}.align-content-lg-center{align-content:center !important}.align-content-lg-between{align-content:space-between !important}.align-content-lg-around{align-content:space-around !important}.align-content-lg-stretch{align-content:stretch !important}.align-self-lg-auto{align-self:auto !important}.align-self-lg-start{align-self:flex-start !important}.align-self-lg-end{align-self:flex-end !important}.align-self-lg-center{align-self:center !important}.align-self-lg-baseline{align-self:baseline !important}.align-self-lg-stretch{align-self:stretch !important}.order-lg-first{order:-1 !important}.order-lg-0{order:0 !important}.order-lg-1{order:1 !important}.order-lg-2{order:2 !important}.order-lg-3{order:3 !important}.order-lg-4{order:4 !important}.order-lg-5{order:5 !important}.order-lg-last{order:6 !important}.m-lg-0{margin:0 !important}.m-lg-1{margin:.25rem !important}.m-lg-2{margin:.5rem !important}.m-lg-3{margin:1rem !important}.m-lg-4{margin:1.5rem !important}.m-lg-5{margin:3rem !important}.m-lg-auto{margin:auto !important}.mx-lg-0{margin-right:0 !important;margin-left:0 !important}.mx-lg-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-lg-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-lg-3{margin-right:1rem !important;margin-left:1rem !important}.mx-lg-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-lg-5{margin-right:3rem !important;margin-left:3rem !important}.mx-lg-auto{margin-right:auto !important;margin-left:auto !important}.my-lg-0{margin-top:0 !important;margin-bottom:0 !important}.my-lg-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-lg-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-lg-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-lg-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-lg-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-lg-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-lg-0{margin-top:0 !important}.mt-lg-1{margin-top:.25rem !important}.mt-lg-2{margin-top:.5rem !important}.mt-lg-3{margin-top:1rem !important}.mt-lg-4{margin-top:1.5rem !important}.mt-lg-5{margin-top:3rem !important}.mt-lg-auto{margin-top:auto !important}.me-lg-0{margin-right:0 !important}.me-lg-1{margin-right:.25rem !important}.me-lg-2{margin-right:.5rem !important}.me-lg-3{margin-right:1rem !important}.me-lg-4{margin-right:1.5rem !important}.me-lg-5{margin-right:3rem !important}.me-lg-auto{margin-right:auto !important}.mb-lg-0{margin-bottom:0 !important}.mb-lg-1{margin-bottom:.25rem !important}.mb-lg-2{margin-bottom:.5rem !important}.mb-lg-3{margin-bottom:1rem !important}.mb-lg-4{margin-bottom:1.5rem !important}.mb-lg-5{margin-bottom:3rem !important}.mb-lg-auto{margin-bottom:auto !important}.ms-lg-0{margin-left:0 !important}.ms-lg-1{margin-left:.25rem !important}.ms-lg-2{margin-left:.5rem !important}.ms-lg-3{margin-left:1rem !important}.ms-lg-4{margin-left:1.5rem !important}.ms-lg-5{margin-left:3rem !important}.ms-lg-auto{margin-left:auto !important}.p-lg-0{padding:0 !important}.p-lg-1{padding:.25rem !important}.p-lg-2{padding:.5rem !important}.p-lg-3{padding:1rem !important}.p-lg-4{padding:1.5rem !important}.p-lg-5{padding:3rem !important}.px-lg-0{padding-right:0 !important;padding-left:0 !important}.px-lg-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-lg-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-lg-3{padding-right:1rem !important;padding-left:1rem !important}.px-lg-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-lg-5{padding-right:3rem !important;padding-left:3rem !important}.py-lg-0{padding-top:0 !important;padding-bottom:0 !important}.py-lg-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-lg-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-lg-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-lg-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-lg-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-lg-0{padding-top:0 !important}.pt-lg-1{padding-top:.25rem !important}.pt-lg-2{padding-top:.5rem !important}.pt-lg-3{padding-top:1rem !important}.pt-lg-4{padding-top:1.5rem !important}.pt-lg-5{padding-top:3rem !important}.pe-lg-0{padding-right:0 !important}.pe-lg-1{padding-right:.25rem !important}.pe-lg-2{padding-right:.5rem !important}.pe-lg-3{padding-right:1rem !important}.pe-lg-4{padding-right:1.5rem !important}.pe-lg-5{padding-right:3rem !important}.pb-lg-0{padding-bottom:0 !important}.pb-lg-1{padding-bottom:.25rem !important}.pb-lg-2{padding-bottom:.5rem !important}.pb-lg-3{padding-bottom:1rem !important}.pb-lg-4{padding-bottom:1.5rem !important}.pb-lg-5{padding-bottom:3rem !important}.ps-lg-0{padding-left:0 !important}.ps-lg-1{padding-left:.25rem !important}.ps-lg-2{padding-left:.5rem !important}.ps-lg-3{padding-left:1rem !important}.ps-lg-4{padding-left:1.5rem !important}.ps-lg-5{padding-left:3rem !important}.text-lg-start{text-align:left !important}.text-lg-end{text-align:right !important}.text-lg-center{text-align:center !important}}@media(min-width: 1200px){.float-xl-start{float:left !important}.float-xl-end{float:right !important}.float-xl-none{float:none !important}.d-xl-inline{display:inline !important}.d-xl-inline-block{display:inline-block !important}.d-xl-block{display:block !important}.d-xl-grid{display:grid !important}.d-xl-table{display:table !important}.d-xl-table-row{display:table-row !important}.d-xl-table-cell{display:table-cell !important}.d-xl-flex{display:flex !important}.d-xl-inline-flex{display:inline-flex !important}.d-xl-none{display:none !important}.flex-xl-fill{flex:1 1 auto !important}.flex-xl-row{flex-direction:row !important}.flex-xl-column{flex-direction:column !important}.flex-xl-row-reverse{flex-direction:row-reverse !important}.flex-xl-column-reverse{flex-direction:column-reverse !important}.flex-xl-grow-0{flex-grow:0 !important}.flex-xl-grow-1{flex-grow:1 !important}.flex-xl-shrink-0{flex-shrink:0 !important}.flex-xl-shrink-1{flex-shrink:1 !important}.flex-xl-wrap{flex-wrap:wrap !important}.flex-xl-nowrap{flex-wrap:nowrap !important}.flex-xl-wrap-reverse{flex-wrap:wrap-reverse !important}.gap-xl-0{gap:0 !important}.gap-xl-1{gap:.25rem !important}.gap-xl-2{gap:.5rem !important}.gap-xl-3{gap:1rem !important}.gap-xl-4{gap:1.5rem !important}.gap-xl-5{gap:3rem !important}.justify-content-xl-start{justify-content:flex-start !important}.justify-content-xl-end{justify-content:flex-end !important}.justify-content-xl-center{justify-content:center !important}.justify-content-xl-between{justify-content:space-between !important}.justify-content-xl-around{justify-content:space-around !important}.justify-content-xl-evenly{justify-content:space-evenly !important}.align-items-xl-start{align-items:flex-start !important}.align-items-xl-end{align-items:flex-end !important}.align-items-xl-center{align-items:center !important}.align-items-xl-baseline{align-items:baseline !important}.align-items-xl-stretch{align-items:stretch !important}.align-content-xl-start{align-content:flex-start !important}.align-content-xl-end{align-content:flex-end !important}.align-content-xl-center{align-content:center !important}.align-content-xl-between{align-content:space-between !important}.align-content-xl-around{align-content:space-around !important}.align-content-xl-stretch{align-content:stretch !important}.align-self-xl-auto{align-self:auto !important}.align-self-xl-start{align-self:flex-start !important}.align-self-xl-end{align-self:flex-end !important}.align-self-xl-center{align-self:center !important}.align-self-xl-baseline{align-self:baseline !important}.align-self-xl-stretch{align-self:stretch !important}.order-xl-first{order:-1 !important}.order-xl-0{order:0 !important}.order-xl-1{order:1 !important}.order-xl-2{order:2 !important}.order-xl-3{order:3 !important}.order-xl-4{order:4 !important}.order-xl-5{order:5 !important}.order-xl-last{order:6 !important}.m-xl-0{margin:0 !important}.m-xl-1{margin:.25rem !important}.m-xl-2{margin:.5rem !important}.m-xl-3{margin:1rem !important}.m-xl-4{margin:1.5rem !important}.m-xl-5{margin:3rem !important}.m-xl-auto{margin:auto !important}.mx-xl-0{margin-right:0 !important;margin-left:0 !important}.mx-xl-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-xl-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-xl-3{margin-right:1rem !important;margin-left:1rem !important}.mx-xl-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-xl-5{margin-right:3rem !important;margin-left:3rem !important}.mx-xl-auto{margin-right:auto !important;margin-left:auto !important}.my-xl-0{margin-top:0 !important;margin-bottom:0 !important}.my-xl-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-xl-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-xl-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-xl-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-xl-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-xl-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-xl-0{margin-top:0 !important}.mt-xl-1{margin-top:.25rem !important}.mt-xl-2{margin-top:.5rem !important}.mt-xl-3{margin-top:1rem !important}.mt-xl-4{margin-top:1.5rem !important}.mt-xl-5{margin-top:3rem !important}.mt-xl-auto{margin-top:auto !important}.me-xl-0{margin-right:0 !important}.me-xl-1{margin-right:.25rem !important}.me-xl-2{margin-right:.5rem !important}.me-xl-3{margin-right:1rem !important}.me-xl-4{margin-right:1.5rem !important}.me-xl-5{margin-right:3rem !important}.me-xl-auto{margin-right:auto !important}.mb-xl-0{margin-bottom:0 !important}.mb-xl-1{margin-bottom:.25rem !important}.mb-xl-2{margin-bottom:.5rem !important}.mb-xl-3{margin-bottom:1rem !important}.mb-xl-4{margin-bottom:1.5rem !important}.mb-xl-5{margin-bottom:3rem !important}.mb-xl-auto{margin-bottom:auto !important}.ms-xl-0{margin-left:0 !important}.ms-xl-1{margin-left:.25rem !important}.ms-xl-2{margin-left:.5rem !important}.ms-xl-3{margin-left:1rem !important}.ms-xl-4{margin-left:1.5rem !important}.ms-xl-5{margin-left:3rem !important}.ms-xl-auto{margin-left:auto !important}.p-xl-0{padding:0 !important}.p-xl-1{padding:.25rem !important}.p-xl-2{padding:.5rem !important}.p-xl-3{padding:1rem !important}.p-xl-4{padding:1.5rem !important}.p-xl-5{padding:3rem !important}.px-xl-0{padding-right:0 !important;padding-left:0 !important}.px-xl-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-xl-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-xl-3{padding-right:1rem !important;padding-left:1rem !important}.px-xl-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-xl-5{padding-right:3rem !important;padding-left:3rem !important}.py-xl-0{padding-top:0 !important;padding-bottom:0 !important}.py-xl-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-xl-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-xl-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-xl-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-xl-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-xl-0{padding-top:0 !important}.pt-xl-1{padding-top:.25rem !important}.pt-xl-2{padding-top:.5rem !important}.pt-xl-3{padding-top:1rem !important}.pt-xl-4{padding-top:1.5rem !important}.pt-xl-5{padding-top:3rem !important}.pe-xl-0{padding-right:0 !important}.pe-xl-1{padding-right:.25rem !important}.pe-xl-2{padding-right:.5rem !important}.pe-xl-3{padding-right:1rem !important}.pe-xl-4{padding-right:1.5rem !important}.pe-xl-5{padding-right:3rem !important}.pb-xl-0{padding-bottom:0 !important}.pb-xl-1{padding-bottom:.25rem !important}.pb-xl-2{padding-bottom:.5rem !important}.pb-xl-3{padding-bottom:1rem !important}.pb-xl-4{padding-bottom:1.5rem !important}.pb-xl-5{padding-bottom:3rem !important}.ps-xl-0{padding-left:0 !important}.ps-xl-1{padding-left:.25rem !important}.ps-xl-2{padding-left:.5rem !important}.ps-xl-3{padding-left:1rem !important}.ps-xl-4{padding-left:1.5rem !important}.ps-xl-5{padding-left:3rem !important}.text-xl-start{text-align:left !important}.text-xl-end{text-align:right !important}.text-xl-center{text-align:center !important}}@media(min-width: 1400px){.float-xxl-start{float:left !important}.float-xxl-end{float:right !important}.float-xxl-none{float:none !important}.d-xxl-inline{display:inline !important}.d-xxl-inline-block{display:inline-block !important}.d-xxl-block{display:block !important}.d-xxl-grid{display:grid !important}.d-xxl-table{display:table !important}.d-xxl-table-row{display:table-row !important}.d-xxl-table-cell{display:table-cell !important}.d-xxl-flex{display:flex !important}.d-xxl-inline-flex{display:inline-flex !important}.d-xxl-none{display:none !important}.flex-xxl-fill{flex:1 1 auto !important}.flex-xxl-row{flex-direction:row !important}.flex-xxl-column{flex-direction:column !important}.flex-xxl-row-reverse{flex-direction:row-reverse !important}.flex-xxl-column-reverse{flex-direction:column-reverse !important}.flex-xxl-grow-0{flex-grow:0 !important}.flex-xxl-grow-1{flex-grow:1 !important}.flex-xxl-shrink-0{flex-shrink:0 !important}.flex-xxl-shrink-1{flex-shrink:1 !important}.flex-xxl-wrap{flex-wrap:wrap !important}.flex-xxl-nowrap{flex-wrap:nowrap !important}.flex-xxl-wrap-reverse{flex-wrap:wrap-reverse !important}.gap-xxl-0{gap:0 !important}.gap-xxl-1{gap:.25rem !important}.gap-xxl-2{gap:.5rem !important}.gap-xxl-3{gap:1rem !important}.gap-xxl-4{gap:1.5rem !important}.gap-xxl-5{gap:3rem !important}.justify-content-xxl-start{justify-content:flex-start !important}.justify-content-xxl-end{justify-content:flex-end !important}.justify-content-xxl-center{justify-content:center !important}.justify-content-xxl-between{justify-content:space-between !important}.justify-content-xxl-around{justify-content:space-around !important}.justify-content-xxl-evenly{justify-content:space-evenly !important}.align-items-xxl-start{align-items:flex-start !important}.align-items-xxl-end{align-items:flex-end !important}.align-items-xxl-center{align-items:center !important}.align-items-xxl-baseline{align-items:baseline !important}.align-items-xxl-stretch{align-items:stretch !important}.align-content-xxl-start{align-content:flex-start !important}.align-content-xxl-end{align-content:flex-end !important}.align-content-xxl-center{align-content:center !important}.align-content-xxl-between{align-content:space-between !important}.align-content-xxl-around{align-content:space-around !important}.align-content-xxl-stretch{align-content:stretch !important}.align-self-xxl-auto{align-self:auto !important}.align-self-xxl-start{align-self:flex-start !important}.align-self-xxl-end{align-self:flex-end !important}.align-self-xxl-center{align-self:center !important}.align-self-xxl-baseline{align-self:baseline !important}.align-self-xxl-stretch{align-self:stretch !important}.order-xxl-first{order:-1 !important}.order-xxl-0{order:0 !important}.order-xxl-1{order:1 !important}.order-xxl-2{order:2 !important}.order-xxl-3{order:3 !important}.order-xxl-4{order:4 !important}.order-xxl-5{order:5 !important}.order-xxl-last{order:6 !important}.m-xxl-0{margin:0 !important}.m-xxl-1{margin:.25rem !important}.m-xxl-2{margin:.5rem !important}.m-xxl-3{margin:1rem !important}.m-xxl-4{margin:1.5rem !important}.m-xxl-5{margin:3rem !important}.m-xxl-auto{margin:auto !important}.mx-xxl-0{margin-right:0 !important;margin-left:0 !important}.mx-xxl-1{margin-right:.25rem !important;margin-left:.25rem !important}.mx-xxl-2{margin-right:.5rem !important;margin-left:.5rem !important}.mx-xxl-3{margin-right:1rem !important;margin-left:1rem !important}.mx-xxl-4{margin-right:1.5rem !important;margin-left:1.5rem !important}.mx-xxl-5{margin-right:3rem !important;margin-left:3rem !important}.mx-xxl-auto{margin-right:auto !important;margin-left:auto !important}.my-xxl-0{margin-top:0 !important;margin-bottom:0 !important}.my-xxl-1{margin-top:.25rem !important;margin-bottom:.25rem !important}.my-xxl-2{margin-top:.5rem !important;margin-bottom:.5rem !important}.my-xxl-3{margin-top:1rem !important;margin-bottom:1rem !important}.my-xxl-4{margin-top:1.5rem !important;margin-bottom:1.5rem !important}.my-xxl-5{margin-top:3rem !important;margin-bottom:3rem !important}.my-xxl-auto{margin-top:auto !important;margin-bottom:auto !important}.mt-xxl-0{margin-top:0 !important}.mt-xxl-1{margin-top:.25rem !important}.mt-xxl-2{margin-top:.5rem !important}.mt-xxl-3{margin-top:1rem !important}.mt-xxl-4{margin-top:1.5rem !important}.mt-xxl-5{margin-top:3rem !important}.mt-xxl-auto{margin-top:auto !important}.me-xxl-0{margin-right:0 !important}.me-xxl-1{margin-right:.25rem !important}.me-xxl-2{margin-right:.5rem !important}.me-xxl-3{margin-right:1rem !important}.me-xxl-4{margin-right:1.5rem !important}.me-xxl-5{margin-right:3rem !important}.me-xxl-auto{margin-right:auto !important}.mb-xxl-0{margin-bottom:0 !important}.mb-xxl-1{margin-bottom:.25rem !important}.mb-xxl-2{margin-bottom:.5rem !important}.mb-xxl-3{margin-bottom:1rem !important}.mb-xxl-4{margin-bottom:1.5rem !important}.mb-xxl-5{margin-bottom:3rem !important}.mb-xxl-auto{margin-bottom:auto !important}.ms-xxl-0{margin-left:0 !important}.ms-xxl-1{margin-left:.25rem !important}.ms-xxl-2{margin-left:.5rem !important}.ms-xxl-3{margin-left:1rem !important}.ms-xxl-4{margin-left:1.5rem !important}.ms-xxl-5{margin-left:3rem !important}.ms-xxl-auto{margin-left:auto !important}.p-xxl-0{padding:0 !important}.p-xxl-1{padding:.25rem !important}.p-xxl-2{padding:.5rem !important}.p-xxl-3{padding:1rem !important}.p-xxl-4{padding:1.5rem !important}.p-xxl-5{padding:3rem !important}.px-xxl-0{padding-right:0 !important;padding-left:0 !important}.px-xxl-1{padding-right:.25rem !important;padding-left:.25rem !important}.px-xxl-2{padding-right:.5rem !important;padding-left:.5rem !important}.px-xxl-3{padding-right:1rem !important;padding-left:1rem !important}.px-xxl-4{padding-right:1.5rem !important;padding-left:1.5rem !important}.px-xxl-5{padding-right:3rem !important;padding-left:3rem !important}.py-xxl-0{padding-top:0 !important;padding-bottom:0 !important}.py-xxl-1{padding-top:.25rem !important;padding-bottom:.25rem !important}.py-xxl-2{padding-top:.5rem !important;padding-bottom:.5rem !important}.py-xxl-3{padding-top:1rem !important;padding-bottom:1rem !important}.py-xxl-4{padding-top:1.5rem !important;padding-bottom:1.5rem !important}.py-xxl-5{padding-top:3rem !important;padding-bottom:3rem !important}.pt-xxl-0{padding-top:0 !important}.pt-xxl-1{padding-top:.25rem !important}.pt-xxl-2{padding-top:.5rem !important}.pt-xxl-3{padding-top:1rem !important}.pt-xxl-4{padding-top:1.5rem !important}.pt-xxl-5{padding-top:3rem !important}.pe-xxl-0{padding-right:0 !important}.pe-xxl-1{padding-right:.25rem !important}.pe-xxl-2{padding-right:.5rem !important}.pe-xxl-3{padding-right:1rem !important}.pe-xxl-4{padding-right:1.5rem !important}.pe-xxl-5{padding-right:3rem !important}.pb-xxl-0{padding-bottom:0 !important}.pb-xxl-1{padding-bottom:.25rem !important}.pb-xxl-2{padding-bottom:.5rem !important}.pb-xxl-3{padding-bottom:1rem !important}.pb-xxl-4{padding-bottom:1.5rem !important}.pb-xxl-5{padding-bottom:3rem !important}.ps-xxl-0{padding-left:0 !important}.ps-xxl-1{padding-left:.25rem !important}.ps-xxl-2{padding-left:.5rem !important}.ps-xxl-3{padding-left:1rem !important}.ps-xxl-4{padding-left:1.5rem !important}.ps-xxl-5{padding-left:3rem !important}.text-xxl-start{text-align:left !important}.text-xxl-end{text-align:right !important}.text-xxl-center{text-align:center !important}}.bg-default{color:#fff}.bg-primary{color:#fff}.bg-secondary{color:#fff}.bg-success{color:#fff}.bg-info{color:#fff}.bg-warning{color:#fff}.bg-danger{color:#fff}.bg-light{color:#000}.bg-dark{color:#fff}@media(min-width: 1200px){.fs-2{font-size:1.65rem !important}.fs-3{font-size:1.45rem !important}}@media print{.d-print-inline{display:inline !important}.d-print-inline-block{display:inline-block !important}.d-print-block{display:block !important}.d-print-grid{display:grid !important}.d-print-table{display:table !important}.d-print-table-row{display:table-row !important}.d-print-table-cell{display:table-cell !important}.d-print-flex{display:flex !important}.d-print-inline-flex{display:inline-flex !important}.d-print-none{display:none !important}}.quarto-container{min-height:calc(100vh - 132px)}footer.footer .nav-footer,#quarto-header>nav{padding-left:1em;padding-right:1em}nav[role=doc-toc]{padding-left:.5em}#quarto-content>*{padding-top:14px}@media(max-width: 991.98px){#quarto-content>*{padding-top:0}#quarto-content .subtitle{padding-top:14px}#quarto-content section:first-of-type h2:first-of-type,#quarto-content section:first-of-type .h2:first-of-type{margin-top:1rem}}.headroom-target,header.headroom{will-change:transform;transition:position 200ms linear;transition:all 200ms linear}header.headroom--pinned{transform:translateY(0%)}header.headroom--unpinned{transform:translateY(-100%)}.navbar-container{width:100%}.navbar-brand{overflow:hidden;text-overflow:ellipsis}.navbar-brand-container{max-width:calc(100% - 115px);min-width:0;display:flex;align-items:center}@media(min-width: 992px){.navbar-brand-container{margin-right:1em}}.navbar-brand.navbar-brand-logo{margin-right:4px;display:inline-flex}.navbar-toggler{flex-basis:content;flex-shrink:0}.navbar .navbar-toggler{order:-1;margin-right:.5em}.navbar-logo{max-height:24px;width:auto;padding-right:4px}nav .nav-item:not(.compact){padding-top:1px}nav .nav-link i,nav .dropdown-item i{padding-right:1px}.navbar-expand-lg .navbar-nav .nav-link{padding-left:.6rem;padding-right:.6rem}nav .nav-item.compact .nav-link{padding-left:.5rem;padding-right:.5rem;font-size:1.1rem}.navbar .quarto-navbar-tools div.dropdown{display:inline-block}.navbar .quarto-navbar-tools .quarto-navigation-tool{color:#595959}.navbar .quarto-navbar-tools .quarto-navigation-tool:hover{color:#aa1d4f}@media(max-width: 991.98px){.navbar .quarto-navbar-tools{margin-top:.25em;padding-top:.75em;display:block;color:solid #d9d9d9 1px;text-align:center;vertical-align:middle;margin-right:auto}}.navbar-nav .dropdown-menu{min-width:220px;font-size:.9rem}.navbar .navbar-nav .nav-link.dropdown-toggle::after{opacity:.75;vertical-align:.175em}.navbar ul.dropdown-menu{padding-top:0;padding-bottom:0}.navbar .dropdown-header{text-transform:uppercase;font-size:.8rem;padding:0 .5rem}.navbar .dropdown-item{padding:.4rem .5rem}.navbar .dropdown-item>i.bi{margin-left:.1rem;margin-right:.25em}.sidebar #quarto-search{margin-top:-1px}.sidebar #quarto-search svg.aa-SubmitIcon{width:16px;height:16px}.sidebar-navigation a{color:inherit}.sidebar-title{margin-top:.25rem;padding-bottom:.5rem;font-size:1.3rem;line-height:1.6rem;visibility:visible}.sidebar-title>a{font-size:inherit;text-decoration:none}.sidebar-title .sidebar-tools-main{margin-top:-6px}@media(max-width: 991.98px){#quarto-sidebar div.sidebar-header{padding-top:.2em}}.sidebar-header-stacked .sidebar-title{margin-top:.6rem}.sidebar-logo{max-width:90%;padding-bottom:.5rem}.sidebar-logo-link{text-decoration:none}.sidebar-navigation li a{text-decoration:none}.sidebar-navigation .quarto-navigation-tool{opacity:.7;font-size:.875rem}#quarto-sidebar>nav>.sidebar-tools-main{margin-left:14px}.sidebar-tools-main{display:inline-flex;margin-left:0px;order:2}.sidebar-tools-main:not(.tools-wide){vertical-align:middle}.sidebar-navigation .quarto-navigation-tool.dropdown-toggle::after{display:none}.sidebar.sidebar-navigation>*{padding-top:1em}.sidebar-item{margin-bottom:.2em}.sidebar-section{margin-top:.2em;padding-left:.5em;padding-bottom:.2em}.sidebar-item .sidebar-item-container{display:flex;justify-content:space-between}.sidebar-item-toggle:hover{cursor:pointer}.sidebar-item .sidebar-item-toggle .bi{font-size:.7rem;text-align:center}.sidebar-item .sidebar-item-toggle .bi-chevron-right::before{transition:transform 200ms ease}.sidebar-item .sidebar-item-toggle[aria-expanded=false] .bi-chevron-right::before{transform:none}.sidebar-item .sidebar-item-toggle[aria-expanded=true] .bi-chevron-right::before{transform:rotate(90deg)}.sidebar-navigation .sidebar-divider{margin-left:0;margin-right:0;margin-top:.5rem;margin-bottom:.5rem}@media(max-width: 991.98px){.quarto-secondary-nav{display:block}.quarto-secondary-nav button.quarto-search-button{padding-right:0em;padding-left:2em}.quarto-secondary-nav button.quarto-btn-toggle{margin-left:-0.75rem;margin-right:.15rem}.quarto-secondary-nav nav.quarto-page-breadcrumbs{display:flex;align-items:center;padding-right:1em;margin-left:-0.25em}.quarto-secondary-nav nav.quarto-page-breadcrumbs a{text-decoration:none}.quarto-secondary-nav nav.quarto-page-breadcrumbs ol.breadcrumb{margin-bottom:0}}@media(min-width: 992px){.quarto-secondary-nav{display:none}}.quarto-secondary-nav .quarto-btn-toggle{color:#595959}.quarto-secondary-nav[aria-expanded=false] .quarto-btn-toggle .bi-chevron-right::before{transform:none}.quarto-secondary-nav[aria-expanded=true] .quarto-btn-toggle .bi-chevron-right::before{transform:rotate(90deg)}.quarto-secondary-nav .quarto-btn-toggle .bi-chevron-right::before{transition:transform 200ms ease}.quarto-secondary-nav{cursor:pointer}.quarto-secondary-nav-title{margin-top:.3em;color:#595959;padding-top:4px}.quarto-secondary-nav nav.quarto-page-breadcrumbs{color:#595959}.quarto-secondary-nav nav.quarto-page-breadcrumbs a{color:#595959}.quarto-secondary-nav nav.quarto-page-breadcrumbs a:hover{color:rgba(170,29,79,.8)}.quarto-secondary-nav nav.quarto-page-breadcrumbs .breadcrumb-item::before{color:#8c8c8c}div.sidebar-item-container{color:#595959}div.sidebar-item-container:hover,div.sidebar-item-container:focus{color:rgba(170,29,79,.8)}div.sidebar-item-container.disabled{color:rgba(89,89,89,.75)}div.sidebar-item-container .active,div.sidebar-item-container .show>.nav-link,div.sidebar-item-container .sidebar-link>code{color:#aa1d4f}div.sidebar.sidebar-navigation.rollup.quarto-sidebar-toggle-contents,nav.sidebar.sidebar-navigation:not(.rollup){background-color:#fff}@media(max-width: 991.98px){.sidebar-navigation .sidebar-item a,.nav-page .nav-page-text,.sidebar-navigation{font-size:1rem}.sidebar-navigation ul.sidebar-section.depth1 .sidebar-section-item{font-size:1.1rem}.sidebar-logo{display:none}.sidebar.sidebar-navigation{position:static;border-bottom:1px solid #dee2e6}.sidebar.sidebar-navigation.collapsing{position:fixed;z-index:1000}.sidebar.sidebar-navigation.show{position:fixed;z-index:1000}.sidebar.sidebar-navigation{min-height:100%}nav.quarto-secondary-nav{background-color:#fff;border-bottom:1px solid #dee2e6}.sidebar .sidebar-footer{visibility:visible;padding-top:1rem;position:inherit}.sidebar-tools-collapse{display:block}}#quarto-sidebar{transition:width .15s ease-in}#quarto-sidebar>*{padding-right:1em}@media(max-width: 991.98px){#quarto-sidebar .sidebar-menu-container{white-space:nowrap;min-width:225px}#quarto-sidebar.show{transition:width .15s ease-out}}@media(min-width: 992px){#quarto-sidebar{display:flex;flex-direction:column}.nav-page .nav-page-text,.sidebar-navigation .sidebar-section .sidebar-item{font-size:.875rem}.sidebar-navigation .sidebar-item{font-size:.925rem}.sidebar.sidebar-navigation{display:block;position:sticky}.sidebar-search{width:100%}.sidebar .sidebar-footer{visibility:visible}}@media(max-width: 991.98px){#quarto-sidebar-glass{position:fixed;top:0;bottom:0;left:0;right:0;background-color:rgba(255,255,255,0);transition:background-color .15s ease-in;z-index:-1}#quarto-sidebar-glass.collapsing{z-index:1000}#quarto-sidebar-glass.show{transition:background-color .15s ease-out;background-color:rgba(102,102,102,.4);z-index:1000}}.sidebar .sidebar-footer{padding:.5rem 1rem;align-self:flex-end;color:#6c757d;width:100%}.quarto-page-breadcrumbs .breadcrumb-item+.breadcrumb-item,.quarto-page-breadcrumbs .breadcrumb-item{padding-right:.33em;padding-left:0}.quarto-page-breadcrumbs .breadcrumb-item::before{padding-right:.33em}.quarto-sidebar-footer{font-size:.875em}.sidebar-section .bi-chevron-right{vertical-align:middle}.sidebar-section .bi-chevron-right::before{font-size:.9em}.notransition{-webkit-transition:none !important;-moz-transition:none !important;-o-transition:none !important;transition:none !important}.btn:focus:not(:focus-visible){box-shadow:none}.page-navigation{display:flex;justify-content:space-between}.nav-page{padding-bottom:.75em}.nav-page .bi{font-size:1.8rem;vertical-align:middle}.nav-page .nav-page-text{padding-left:.25em;padding-right:.25em}.nav-page a{color:#6c757d;text-decoration:none;display:flex;align-items:center}.nav-page a:hover{color:#a21b4b}.toc-actions{display:flex}.toc-actions p{margin-block-start:0;margin-block-end:0}.toc-actions a{text-decoration:none;color:inherit;font-weight:400}.toc-actions a:hover{color:#a21b4b}.toc-actions .action-links{margin-left:4px}.sidebar nav[role=doc-toc] .toc-actions .bi{margin-left:-4px;font-size:.7rem;color:#6c757d}.sidebar nav[role=doc-toc] .toc-actions .bi:before{padding-top:3px}#quarto-margin-sidebar .toc-actions .bi:before{margin-top:.3rem;font-size:.7rem;color:#6c757d;vertical-align:top}.sidebar nav[role=doc-toc] .toc-actions>div:first-of-type{margin-top:-3px}#quarto-margin-sidebar .toc-actions p,.sidebar nav[role=doc-toc] .toc-actions p{font-size:.875rem}.nav-footer .toc-actions{padding-bottom:.5em;padding-top:.5em}.nav-footer .toc-actions :first-child{margin-left:auto}.nav-footer .toc-actions :last-child{margin-right:auto}.nav-footer .toc-actions .action-links{display:flex}.nav-footer .toc-actions .action-links p{padding-right:1.5em}.nav-footer .toc-actions .action-links p:last-of-type{padding-right:0}.nav-footer{display:flex;flex-direction:row;flex-wrap:wrap;justify-content:space-between;align-items:baseline;text-align:center;padding-top:.5rem;padding-bottom:.5rem;background-color:rgba(202,34,94,.2)}body.nav-fixed{padding-top:64px}.nav-footer-contents{color:#6c757d;margin-top:.25rem}.nav-footer{min-height:3.5em;color:#ca225e}.nav-footer a{color:#ca225e}.nav-footer .nav-footer-left{font-size:.825em}.nav-footer .nav-footer-center{font-size:.825em}.nav-footer .nav-footer-right{font-size:.825em}.nav-footer-left .footer-items,.nav-footer-center .footer-items,.nav-footer-right .footer-items{display:inline-flex;padding-top:.3em;padding-bottom:.3em;margin-bottom:0em}.nav-footer-left .footer-items .nav-link,.nav-footer-center .footer-items .nav-link,.nav-footer-right .footer-items .nav-link{padding-left:.6em;padding-right:.6em}.nav-footer-left{flex:1 1 0px;text-align:left}.nav-footer-right{flex:1 1 0px;text-align:right}.nav-footer-center{flex:1 1 0px;min-height:3em;text-align:center}.nav-footer-center .footer-items{justify-content:center}@media(max-width: 767.98px){.nav-footer-center{margin-top:3em}}.navbar .quarto-reader-toggle.reader .quarto-reader-toggle-btn{background-color:#595959;border-radius:3px}.quarto-reader-toggle.reader.quarto-navigation-tool .quarto-reader-toggle-btn{background-color:#595959;border-radius:3px}.quarto-reader-toggle .quarto-reader-toggle-btn{display:inline-flex;padding-left:.2em;padding-right:.2em;margin-left:-0.2em;margin-right:-0.2em;text-align:center}.navbar .quarto-reader-toggle:not(.reader) .bi::before{background-image:url('data:image/svg+xml,')}.navbar .quarto-reader-toggle.reader .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-reader-toggle:not(.reader) .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-reader-toggle.reader .bi::before{background-image:url('data:image/svg+xml,')}#quarto-back-to-top{display:none;position:fixed;bottom:50px;background-color:#fff;border-radius:.25rem;box-shadow:0 .2rem .5rem #6c757d,0 0 .05rem #6c757d;color:#6c757d;text-decoration:none;font-size:.9em;text-align:center;left:50%;padding:.4rem .8rem;transform:translate(-50%, 0)}.aa-DetachedOverlay ul.aa-List,#quarto-search-results ul.aa-List{list-style:none;padding-left:0}.aa-DetachedOverlay .aa-Panel,#quarto-search-results .aa-Panel{background-color:#fff;position:absolute;z-index:2000}#quarto-search-results .aa-Panel{max-width:400px}#quarto-search input{font-size:.925rem}@media(min-width: 992px){.navbar #quarto-search{margin-left:.25rem;order:999}}@media(max-width: 991.98px){#quarto-sidebar .sidebar-search{display:none}}#quarto-sidebar .sidebar-search .aa-Autocomplete{width:100%}.navbar .aa-Autocomplete .aa-Form{width:180px}.navbar #quarto-search.type-overlay .aa-Autocomplete{width:40px}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form{background-color:inherit;border:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form:focus-within{box-shadow:none;outline:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-InputWrapper{display:none}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-InputWrapper:focus-within{display:inherit}.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-Label svg,.navbar #quarto-search.type-overlay .aa-Autocomplete .aa-Form .aa-LoadingIndicator svg{width:26px;height:26px;color:#595959;opacity:1}.navbar #quarto-search.type-overlay .aa-Autocomplete svg.aa-SubmitIcon{width:26px;height:26px;color:#595959;opacity:1}.aa-Autocomplete .aa-Form,.aa-DetachedFormContainer .aa-Form{align-items:center;background-color:#fff;border:1px solid #ced4da;border-radius:.25rem;color:#373a3c;display:flex;line-height:1em;margin:0;position:relative;width:100%}.aa-Autocomplete .aa-Form:focus-within,.aa-DetachedFormContainer .aa-Form:focus-within{box-shadow:rgba(39,128,227,.6) 0 0 0 1px;outline:currentColor none medium}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix{align-items:center;display:flex;flex-shrink:0;order:1}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-Label,.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-Label,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator{cursor:initial;flex-shrink:0;padding:0;text-align:left}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-Label svg,.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-Label svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator svg{color:#373a3c;opacity:.5}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-SubmitButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-SubmitButton{appearance:none;background:none;border:0;margin:0}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator{align-items:center;display:flex;justify-content:center}.aa-Autocomplete .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperPrefix .aa-LoadingIndicator[hidden]{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapper,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper{order:3;position:relative;width:100%}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input{appearance:none;background:none;border:0;color:#373a3c;font:inherit;height:calc(1.5em + .1rem + 2px);padding:0;width:100%}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::placeholder,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::placeholder{color:#373a3c;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input:focus{border-color:none;box-shadow:none;outline:none}.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-decoration,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-cancel-button,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-button,.aa-Autocomplete .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-decoration,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-decoration,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-cancel-button,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-button,.aa-DetachedFormContainer .aa-Form .aa-InputWrapper .aa-Input::-webkit-search-results-decoration{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix{align-items:center;display:flex;order:4}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton{align-items:center;background:none;border:0;color:#373a3c;opacity:.8;cursor:pointer;display:flex;margin:0;width:calc(1.5em + .1rem + 2px)}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:hover,.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:hover,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton:focus{color:#373a3c;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton[hidden]{display:none}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-ClearButton svg,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-ClearButton svg{width:calc(1.5em + 0.75rem + 2px)}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton{border:none;align-items:center;background:none;color:#373a3c;opacity:.4;font-size:.7rem;cursor:pointer;display:none;margin:0;width:calc(1em + .1rem + 2px)}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:hover,.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:focus,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:hover,.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton:focus{color:#373a3c;opacity:.8}.aa-Autocomplete .aa-Form .aa-InputWrapperSuffix .aa-CopyButton[hidden],.aa-DetachedFormContainer .aa-Form .aa-InputWrapperSuffix .aa-CopyButton[hidden]{display:none}.aa-PanelLayout:empty{display:none}.quarto-search-no-results.no-query{display:none}.aa-Source:has(.no-query){display:none}#quarto-search-results .aa-Panel{border:solid #ced4da 1px}#quarto-search-results .aa-SourceNoResults{width:398px}.aa-DetachedOverlay .aa-Panel,#quarto-search-results .aa-Panel{max-height:65vh;overflow-y:auto;font-size:.925rem}.aa-DetachedOverlay .aa-SourceNoResults,#quarto-search-results .aa-SourceNoResults{height:60px;display:flex;justify-content:center;align-items:center}.aa-DetachedOverlay .search-error,#quarto-search-results .search-error{padding-top:10px;padding-left:20px;padding-right:20px;cursor:default}.aa-DetachedOverlay .search-error .search-error-title,#quarto-search-results .search-error .search-error-title{font-size:1.1rem;margin-bottom:.5rem}.aa-DetachedOverlay .search-error .search-error-title .search-error-icon,#quarto-search-results .search-error .search-error-title .search-error-icon{margin-right:8px}.aa-DetachedOverlay .search-error .search-error-text,#quarto-search-results .search-error .search-error-text{font-weight:300}.aa-DetachedOverlay .search-result-text,#quarto-search-results .search-result-text{font-weight:300;overflow:hidden;text-overflow:ellipsis;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;line-height:1.2rem;max-height:2.4rem}.aa-DetachedOverlay .aa-SourceHeader .search-result-header,#quarto-search-results .aa-SourceHeader .search-result-header{font-size:.875rem;background-color:#f2f2f2;padding-left:14px;padding-bottom:4px;padding-top:4px}.aa-DetachedOverlay .aa-SourceHeader .search-result-header-no-results,#quarto-search-results .aa-SourceHeader .search-result-header-no-results{display:none}.aa-DetachedOverlay .aa-SourceFooter .algolia-search-logo,#quarto-search-results .aa-SourceFooter .algolia-search-logo{width:110px;opacity:.85;margin:8px;float:right}.aa-DetachedOverlay .search-result-section,#quarto-search-results .search-result-section{font-size:.925em}.aa-DetachedOverlay a.search-result-link,#quarto-search-results a.search-result-link{color:inherit;text-decoration:none}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item,#quarto-search-results li.aa-Item[aria-selected=true] .search-item{background-color:#2780e3}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item.search-result-more,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-section,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-text,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-title-container,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-result-text-container,#quarto-search-results li.aa-Item[aria-selected=true] .search-item.search-result-more,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-section,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-text,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-title-container,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-result-text-container{color:#fff;background-color:#2780e3}.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item mark.search-match,.aa-DetachedOverlay li.aa-Item[aria-selected=true] .search-item .search-match.mark,#quarto-search-results li.aa-Item[aria-selected=true] .search-item mark.search-match,#quarto-search-results li.aa-Item[aria-selected=true] .search-item .search-match.mark{color:#fff;background-color:#4b95e8}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item,#quarto-search-results li.aa-Item[aria-selected=false] .search-item{background-color:#fff}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item.search-result-more,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-section,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-text,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-title-container,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-result-text-container,#quarto-search-results li.aa-Item[aria-selected=false] .search-item.search-result-more,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-section,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-text,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-title-container,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-result-text-container{color:#373a3c}.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item mark.search-match,.aa-DetachedOverlay li.aa-Item[aria-selected=false] .search-item .search-match.mark,#quarto-search-results li.aa-Item[aria-selected=false] .search-item mark.search-match,#quarto-search-results li.aa-Item[aria-selected=false] .search-item .search-match.mark{color:inherit;background-color:#e5effc}.aa-DetachedOverlay .aa-Item .search-result-doc:not(.document-selectable) .search-result-title-container,#quarto-search-results .aa-Item .search-result-doc:not(.document-selectable) .search-result-title-container{background-color:#fff;color:#373a3c}.aa-DetachedOverlay .aa-Item .search-result-doc:not(.document-selectable) .search-result-text-container,#quarto-search-results .aa-Item .search-result-doc:not(.document-selectable) .search-result-text-container{padding-top:0px}.aa-DetachedOverlay li.aa-Item .search-result-doc.document-selectable .search-result-text-container,#quarto-search-results li.aa-Item .search-result-doc.document-selectable .search-result-text-container{margin-top:-4px}.aa-DetachedOverlay .aa-Item,#quarto-search-results .aa-Item{cursor:pointer}.aa-DetachedOverlay .aa-Item .search-item,#quarto-search-results .aa-Item .search-item{border-left:none;border-right:none;border-top:none;background-color:#fff;border-color:#ced4da;color:#373a3c}.aa-DetachedOverlay .aa-Item .search-item p,#quarto-search-results .aa-Item .search-item p{margin-top:0;margin-bottom:0}.aa-DetachedOverlay .aa-Item .search-item i.bi,#quarto-search-results .aa-Item .search-item i.bi{padding-left:8px;padding-right:8px;font-size:1.3em}.aa-DetachedOverlay .aa-Item .search-item .search-result-title,#quarto-search-results .aa-Item .search-item .search-result-title{margin-top:.3em;margin-bottom:.1rem}.aa-DetachedOverlay .aa-Item .search-result-title-container,#quarto-search-results .aa-Item .search-result-title-container{font-size:1em;display:flex;padding:6px 4px 6px 4px}.aa-DetachedOverlay .aa-Item .search-result-text-container,#quarto-search-results .aa-Item .search-result-text-container{padding-bottom:8px;padding-right:8px;margin-left:44px}.aa-DetachedOverlay .aa-Item .search-result-doc-section,.aa-DetachedOverlay .aa-Item .search-result-more,#quarto-search-results .aa-Item .search-result-doc-section,#quarto-search-results .aa-Item .search-result-more{padding-top:8px;padding-bottom:8px;padding-left:44px}.aa-DetachedOverlay .aa-Item .search-result-more,#quarto-search-results .aa-Item .search-result-more{font-size:.8em;font-weight:400}.aa-DetachedOverlay .aa-Item .search-result-doc,#quarto-search-results .aa-Item .search-result-doc{border-top:1px solid #ced4da}.aa-DetachedSearchButton{background:none;border:none}.aa-DetachedSearchButton .aa-DetachedSearchButtonPlaceholder{display:none}.navbar .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon{color:#595959}.sidebar-tools-collapse #quarto-search,.sidebar-tools-main #quarto-search{display:inline}.sidebar-tools-collapse #quarto-search .aa-Autocomplete,.sidebar-tools-main #quarto-search .aa-Autocomplete{display:inline}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton{padding-left:4px;padding-right:4px}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon{color:#595959}.sidebar-tools-collapse #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon .aa-SubmitIcon,.sidebar-tools-main #quarto-search .aa-DetachedSearchButton .aa-DetachedSearchButtonIcon .aa-SubmitIcon{margin-top:-3px}.aa-DetachedContainer{background:rgba(255,255,255,.65);width:90%;bottom:0;box-shadow:rgba(206,212,218,.6) 0 0 0 1px;outline:currentColor none medium;display:flex;flex-direction:column;left:0;margin:0;overflow:hidden;padding:0;position:fixed;right:0;top:0;z-index:1101}.aa-DetachedContainer::after{height:32px}.aa-DetachedContainer .aa-SourceHeader{margin:var(--aa-spacing-half) 0 var(--aa-spacing-half) 2px}.aa-DetachedContainer .aa-Panel{background-color:#fff;border-radius:0;box-shadow:none;flex-grow:1;margin:0;padding:0;position:relative}.aa-DetachedContainer .aa-PanelLayout{bottom:0;box-shadow:none;left:0;margin:0;max-height:none;overflow-y:auto;position:absolute;right:0;top:0;width:100%}.aa-DetachedFormContainer{background-color:#fff;border-bottom:1px solid #ced4da;display:flex;flex-direction:row;justify-content:space-between;margin:0;padding:.5em}.aa-DetachedCancelButton{background:none;font-size:.8em;border:0;border-radius:3px;color:#373a3c;cursor:pointer;margin:0 0 0 .5em;padding:0 .5em}.aa-DetachedCancelButton:hover,.aa-DetachedCancelButton:focus{box-shadow:rgba(39,128,227,.6) 0 0 0 1px;outline:currentColor none medium}.aa-DetachedContainer--modal{bottom:inherit;height:auto;margin:0 auto;position:absolute;top:100px;border-radius:6px;max-width:850px}@media(max-width: 575.98px){.aa-DetachedContainer--modal{width:100%;top:0px;border-radius:0px;border:none}}.aa-DetachedContainer--modal .aa-PanelLayout{max-height:var(--aa-detached-modal-max-height);padding-bottom:var(--aa-spacing-half);position:static}.aa-Detached{height:100vh;overflow:hidden}.aa-DetachedOverlay{background-color:rgba(55,58,60,.4);position:fixed;left:0;right:0;top:0;margin:0;padding:0;height:100vh;z-index:1100}.quarto-listing{padding-bottom:1em}.listing-pagination{padding-top:.5em}ul.pagination{float:right;padding-left:8px;padding-top:.5em}ul.pagination li{padding-right:.75em}ul.pagination li.disabled a,ul.pagination li.active a{color:#373a3c;text-decoration:none}ul.pagination li:last-of-type{padding-right:0}.listing-actions-group{display:flex}.quarto-listing-filter{margin-bottom:1em;width:200px;margin-left:auto}.quarto-listing-sort{margin-bottom:1em;margin-right:auto;width:auto}.quarto-listing-sort .input-group-text{font-size:.8em}.input-group-text{border-right:none}.quarto-listing-sort select.form-select{font-size:.8em}.listing-no-matching{text-align:center;padding-top:2em;padding-bottom:3em;font-size:1em}#quarto-margin-sidebar .quarto-listing-category{padding-top:0;font-size:1rem}#quarto-margin-sidebar .quarto-listing-category-title{cursor:pointer;font-weight:600;font-size:1rem}.quarto-listing-category .category{cursor:pointer}.quarto-listing-category .category.active{font-weight:600}.quarto-listing-category.category-cloud{display:flex;flex-wrap:wrap;align-items:baseline}.quarto-listing-category.category-cloud .category{padding-right:5px}.quarto-listing-category.category-cloud .category-cloud-1{font-size:.75em}.quarto-listing-category.category-cloud .category-cloud-2{font-size:.95em}.quarto-listing-category.category-cloud .category-cloud-3{font-size:1.15em}.quarto-listing-category.category-cloud .category-cloud-4{font-size:1.35em}.quarto-listing-category.category-cloud .category-cloud-5{font-size:1.55em}.quarto-listing-category.category-cloud .category-cloud-6{font-size:1.75em}.quarto-listing-category.category-cloud .category-cloud-7{font-size:1.95em}.quarto-listing-category.category-cloud .category-cloud-8{font-size:2.15em}.quarto-listing-category.category-cloud .category-cloud-9{font-size:2.35em}.quarto-listing-category.category-cloud .category-cloud-10{font-size:2.55em}.quarto-listing-cols-1{grid-template-columns:repeat(1, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-1{grid-template-columns:repeat(1, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-1{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-2{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-2{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-2{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-3{grid-template-columns:repeat(3, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-3{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-3{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-4{grid-template-columns:repeat(4, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-4{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-4{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-5{grid-template-columns:repeat(5, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-5{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-5{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-6{grid-template-columns:repeat(6, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-6{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-6{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-7{grid-template-columns:repeat(7, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-7{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-7{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-8{grid-template-columns:repeat(8, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-8{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-8{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-9{grid-template-columns:repeat(9, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-9{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-9{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-10{grid-template-columns:repeat(10, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-10{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-10{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-11{grid-template-columns:repeat(11, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-11{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-11{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-cols-12{grid-template-columns:repeat(12, minmax(0, 1fr));gap:1.5em}@media(max-width: 767.98px){.quarto-listing-cols-12{grid-template-columns:repeat(2, minmax(0, 1fr));gap:1.5em}}@media(max-width: 575.98px){.quarto-listing-cols-12{grid-template-columns:minmax(0, 1fr);gap:1.5em}}.quarto-listing-grid{gap:1.5em}.quarto-grid-item.borderless{border:none}.quarto-grid-item.borderless .listing-categories .listing-category:last-of-type,.quarto-grid-item.borderless .listing-categories .listing-category:first-of-type{padding-left:0}.quarto-grid-item.borderless .listing-categories .listing-category{border:0}.quarto-grid-link{text-decoration:none;color:inherit}.quarto-grid-link:hover{text-decoration:none;color:inherit}.quarto-grid-item h5.title,.quarto-grid-item .title.h5{margin-top:0;margin-bottom:0}.quarto-grid-item .card-footer{display:flex;justify-content:space-between;font-size:.8em}.quarto-grid-item .card-footer p{margin-bottom:0}.quarto-grid-item p.card-img-top{margin-bottom:0}.quarto-grid-item p.card-img-top>img{object-fit:cover}.quarto-grid-item .card-other-values{margin-top:.5em;font-size:.8em}.quarto-grid-item .card-other-values tr{margin-bottom:.5em}.quarto-grid-item .card-other-values tr>td:first-of-type{font-weight:600;padding-right:1em;padding-left:1em;vertical-align:top}.quarto-grid-item div.post-contents{display:flex;flex-direction:column;text-decoration:none;height:100%}.quarto-grid-item .listing-item-img-placeholder{background-color:#adb5bd;flex-shrink:0}.quarto-grid-item .card-attribution{padding-top:1em;display:flex;gap:1em;text-transform:uppercase;color:#6c757d;font-weight:500;flex-grow:10;align-items:flex-end}.quarto-grid-item .description{padding-bottom:1em}.quarto-grid-item .card-attribution .date{align-self:flex-end}.quarto-grid-item .card-attribution.justify{justify-content:space-between}.quarto-grid-item .card-attribution.start{justify-content:flex-start}.quarto-grid-item .card-attribution.end{justify-content:flex-end}.quarto-grid-item .card-title{margin-bottom:.1em}.quarto-grid-item .card-subtitle{padding-top:.25em}.quarto-grid-item .card-text{font-size:.9em}.quarto-grid-item .listing-reading-time{padding-bottom:.25em}.quarto-grid-item .card-text-small{font-size:.8em}.quarto-grid-item .card-subtitle.subtitle{font-size:.9em;font-weight:600;padding-bottom:.5em}.quarto-grid-item .listing-categories{display:flex;flex-wrap:wrap;padding-bottom:5px}.quarto-grid-item .listing-categories .listing-category{color:#6c757d;border:solid 1px #dee2e6;border-radius:.25rem;text-transform:uppercase;font-size:.65em;padding-left:.5em;padding-right:.5em;padding-top:.15em;padding-bottom:.15em;cursor:pointer;margin-right:4px;margin-bottom:4px}.quarto-grid-item.card-right{text-align:right}.quarto-grid-item.card-right .listing-categories{justify-content:flex-end}.quarto-grid-item.card-left{text-align:left}.quarto-grid-item.card-center{text-align:center}.quarto-grid-item.card-center .listing-description{text-align:justify}.quarto-grid-item.card-center .listing-categories{justify-content:center}table.quarto-listing-table td.image{padding:0px}table.quarto-listing-table td.image img{width:100%;max-width:50px;object-fit:contain}table.quarto-listing-table a{text-decoration:none}table.quarto-listing-table th a{color:inherit}table.quarto-listing-table th a.asc:after{margin-bottom:-2px;margin-left:5px;display:inline-block;height:1rem;width:1rem;background-repeat:no-repeat;background-size:1rem 1rem;background-image:url('data:image/svg+xml,');content:""}table.quarto-listing-table th a.desc:after{margin-bottom:-2px;margin-left:5px;display:inline-block;height:1rem;width:1rem;background-repeat:no-repeat;background-size:1rem 1rem;background-image:url('data:image/svg+xml,');content:""}table.quarto-listing-table.table-hover td{cursor:pointer}.quarto-post.image-left{flex-direction:row}.quarto-post.image-right{flex-direction:row-reverse}@media(max-width: 767.98px){.quarto-post.image-right,.quarto-post.image-left{gap:0em;flex-direction:column}.quarto-post .metadata{padding-bottom:1em;order:2}.quarto-post .body{order:1}.quarto-post .thumbnail{order:3}}.list.quarto-listing-default div:last-of-type{border-bottom:none}@media(min-width: 992px){.quarto-listing-container-default{margin-right:2em}}div.quarto-post{display:flex;gap:2em;margin-bottom:1.5em;border-bottom:1px solid #dee2e6}@media(max-width: 767.98px){div.quarto-post{padding-bottom:1em}}div.quarto-post .metadata{flex-basis:20%;flex-grow:0;margin-top:.2em;flex-shrink:10}div.quarto-post .thumbnail{flex-basis:30%;flex-grow:0;flex-shrink:0}div.quarto-post .thumbnail img{margin-top:.4em;width:100%;object-fit:cover}div.quarto-post .body{flex-basis:45%;flex-grow:1;flex-shrink:0}div.quarto-post .body h3.listing-title,div.quarto-post .body .listing-title.h3{margin-top:0px;margin-bottom:0px;border-bottom:none}div.quarto-post .body .listing-subtitle{font-size:.875em;margin-bottom:.5em;margin-top:.2em}div.quarto-post .body .description{font-size:.9em}div.quarto-post a{color:#373a3c;display:flex;flex-direction:column;text-decoration:none}div.quarto-post a div.description{flex-shrink:0}div.quarto-post .metadata{display:flex;flex-direction:column;font-size:.8em;font-family:var(--bs-font-sans-serif);flex-basis:33%}div.quarto-post .listing-categories{display:flex;flex-wrap:wrap;padding-bottom:5px}div.quarto-post .listing-categories .listing-category{color:#6c757d;border:solid 1px #dee2e6;border-radius:.25rem;text-transform:uppercase;font-size:.65em;padding-left:.5em;padding-right:.5em;padding-top:.15em;padding-bottom:.15em;cursor:pointer;margin-right:4px;margin-bottom:4px}div.quarto-post .listing-description{margin-bottom:.5em}div.quarto-about-jolla{display:flex !important;flex-direction:column;align-items:center;margin-top:10%;padding-bottom:1em}div.quarto-about-jolla .about-image{object-fit:cover;margin-left:auto;margin-right:auto;margin-bottom:1.5em}div.quarto-about-jolla img.round{border-radius:50%}div.quarto-about-jolla img.rounded{border-radius:10px}div.quarto-about-jolla .quarto-title h1.title,div.quarto-about-jolla .quarto-title .title.h1{text-align:center}div.quarto-about-jolla .quarto-title .description{text-align:center}div.quarto-about-jolla h2,div.quarto-about-jolla .h2{border-bottom:none}div.quarto-about-jolla .about-sep{width:60%}div.quarto-about-jolla main{text-align:center}div.quarto-about-jolla .about-links{display:flex}@media(min-width: 992px){div.quarto-about-jolla .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-jolla .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-jolla .about-link{color:#686d71;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-jolla .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-jolla .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-jolla .about-link:hover{color:#ca225e}div.quarto-about-jolla .about-link i.bi{margin-right:.15em}div.quarto-about-solana{display:flex !important;flex-direction:column;padding-top:3em !important;padding-bottom:1em}div.quarto-about-solana .about-entity{display:flex !important;align-items:start;justify-content:space-between}@media(min-width: 992px){div.quarto-about-solana .about-entity{flex-direction:row}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity{flex-direction:column-reverse;align-items:center;text-align:center}}div.quarto-about-solana .about-entity .entity-contents{display:flex;flex-direction:column}@media(max-width: 767.98px){div.quarto-about-solana .about-entity .entity-contents{width:100%}}div.quarto-about-solana .about-entity .about-image{object-fit:cover}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-image{margin-bottom:1.5em}}div.quarto-about-solana .about-entity img.round{border-radius:50%}div.quarto-about-solana .about-entity img.rounded{border-radius:10px}div.quarto-about-solana .about-entity .about-links{display:flex;justify-content:left;padding-bottom:1.2em}@media(min-width: 992px){div.quarto-about-solana .about-entity .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-solana .about-entity .about-link{color:#686d71;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-solana .about-entity .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-solana .about-entity .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-solana .about-entity .about-link:hover{color:#ca225e}div.quarto-about-solana .about-entity .about-link i.bi{margin-right:.15em}div.quarto-about-solana .about-contents{padding-right:1.5em;flex-basis:0;flex-grow:1}div.quarto-about-solana .about-contents main.content{margin-top:0}div.quarto-about-solana .about-contents h2,div.quarto-about-solana .about-contents .h2{border-bottom:none}div.quarto-about-trestles{display:flex !important;flex-direction:row;padding-top:3em !important;padding-bottom:1em}@media(max-width: 991.98px){div.quarto-about-trestles{flex-direction:column;padding-top:0em !important}}div.quarto-about-trestles .about-entity{display:flex !important;flex-direction:column;align-items:center;text-align:center;padding-right:1em}@media(min-width: 992px){div.quarto-about-trestles .about-entity{flex:0 0 42%}}div.quarto-about-trestles .about-entity .about-image{object-fit:cover;margin-bottom:1.5em}div.quarto-about-trestles .about-entity img.round{border-radius:50%}div.quarto-about-trestles .about-entity img.rounded{border-radius:10px}div.quarto-about-trestles .about-entity .about-links{display:flex;justify-content:center}@media(min-width: 992px){div.quarto-about-trestles .about-entity .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-trestles .about-entity .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-trestles .about-entity .about-link{color:#686d71;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-trestles .about-entity .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-trestles .about-entity .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-trestles .about-entity .about-link:hover{color:#ca225e}div.quarto-about-trestles .about-entity .about-link i.bi{margin-right:.15em}div.quarto-about-trestles .about-contents{flex-basis:0;flex-grow:1}div.quarto-about-trestles .about-contents h2,div.quarto-about-trestles .about-contents .h2{border-bottom:none}@media(min-width: 992px){div.quarto-about-trestles .about-contents{border-left:solid 1px #dee2e6;padding-left:1.5em}}div.quarto-about-trestles .about-contents main.content{margin-top:0}div.quarto-about-marquee{padding-bottom:1em}div.quarto-about-marquee .about-contents{display:flex;flex-direction:column}div.quarto-about-marquee .about-image{max-height:550px;margin-bottom:1.5em;object-fit:cover}div.quarto-about-marquee img.round{border-radius:50%}div.quarto-about-marquee img.rounded{border-radius:10px}div.quarto-about-marquee h2,div.quarto-about-marquee .h2{border-bottom:none}div.quarto-about-marquee .about-links{display:flex;justify-content:center;padding-top:1.5em}@media(min-width: 992px){div.quarto-about-marquee .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-marquee .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-marquee .about-link{color:#686d71;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-marquee .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-marquee .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-marquee .about-link:hover{color:#ca225e}div.quarto-about-marquee .about-link i.bi{margin-right:.15em}@media(min-width: 992px){div.quarto-about-marquee .about-link{border:none}}div.quarto-about-broadside{display:flex;flex-direction:column;padding-bottom:1em}div.quarto-about-broadside .about-main{display:flex !important;padding-top:0 !important}@media(min-width: 992px){div.quarto-about-broadside .about-main{flex-direction:row;align-items:flex-start}}@media(max-width: 991.98px){div.quarto-about-broadside .about-main{flex-direction:column}}@media(max-width: 991.98px){div.quarto-about-broadside .about-main .about-entity{flex-shrink:0;width:100%;height:450px;margin-bottom:1.5em;background-size:cover;background-repeat:no-repeat}}@media(min-width: 992px){div.quarto-about-broadside .about-main .about-entity{flex:0 10 50%;margin-right:1.5em;width:100%;height:100%;background-size:100%;background-repeat:no-repeat}}div.quarto-about-broadside .about-main .about-contents{padding-top:14px;flex:0 0 50%}div.quarto-about-broadside h2,div.quarto-about-broadside .h2{border-bottom:none}div.quarto-about-broadside .about-sep{margin-top:1.5em;width:60%;align-self:center}div.quarto-about-broadside .about-links{display:flex;justify-content:center;column-gap:20px;padding-top:1.5em}@media(min-width: 992px){div.quarto-about-broadside .about-links{flex-direction:row;column-gap:.8em;row-gap:15px;flex-wrap:wrap}}@media(max-width: 991.98px){div.quarto-about-broadside .about-links{flex-direction:column;row-gap:1em;width:100%;padding-bottom:1.5em}}div.quarto-about-broadside .about-link{color:#686d71;text-decoration:none;border:solid 1px}@media(min-width: 992px){div.quarto-about-broadside .about-link{font-size:.8em;padding:.25em .5em;border-radius:4px}}@media(max-width: 991.98px){div.quarto-about-broadside .about-link{font-size:1.1em;padding:.5em .5em;text-align:center;border-radius:6px}}div.quarto-about-broadside .about-link:hover{color:#ca225e}div.quarto-about-broadside .about-link i.bi{margin-right:.15em}@media(min-width: 992px){div.quarto-about-broadside .about-link{border:none}}.tippy-box[data-theme~=quarto]{background-color:#fff;border:solid 1px #dee2e6;border-radius:.25rem;color:#373a3c;font-size:.875rem}.tippy-box[data-theme~=quarto]>.tippy-backdrop{background-color:#fff}.tippy-box[data-theme~=quarto]>.tippy-arrow:after,.tippy-box[data-theme~=quarto]>.tippy-svg-arrow:after{content:"";position:absolute;z-index:-1}.tippy-box[data-theme~=quarto]>.tippy-arrow:after{border-color:rgba(0,0,0,0);border-style:solid}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-6px}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-6px}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-6px}.tippy-box[data-placement^=left]>.tippy-arrow:before{right:-6px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-arrow:before{border-top-color:#fff}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-arrow:after{border-top-color:#dee2e6;border-width:7px 7px 0;top:17px;left:1px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-svg-arrow>svg{top:16px}.tippy-box[data-theme~=quarto][data-placement^=top]>.tippy-svg-arrow:after{top:17px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-arrow:before{border-bottom-color:#fff;bottom:16px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-arrow:after{border-bottom-color:#dee2e6;border-width:0 7px 7px;bottom:17px;left:1px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-svg-arrow>svg{bottom:15px}.tippy-box[data-theme~=quarto][data-placement^=bottom]>.tippy-svg-arrow:after{bottom:17px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-arrow:before{border-left-color:#fff}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-arrow:after{border-left-color:#dee2e6;border-width:7px 0 7px 7px;left:17px;top:1px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-svg-arrow>svg{left:11px}.tippy-box[data-theme~=quarto][data-placement^=left]>.tippy-svg-arrow:after{left:12px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-arrow:before{border-right-color:#fff;right:16px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-arrow:after{border-width:7px 7px 7px 0;right:17px;top:1px;border-right-color:#dee2e6}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-svg-arrow>svg{right:11px}.tippy-box[data-theme~=quarto][data-placement^=right]>.tippy-svg-arrow:after{right:12px}.tippy-box[data-theme~=quarto]>.tippy-svg-arrow{fill:#373a3c}.tippy-box[data-theme~=quarto]>.tippy-svg-arrow:after{background-image:url();background-size:16px 6px;width:16px;height:6px}.top-right{position:absolute;top:1em;right:1em}.hidden{display:none !important}.zindex-bottom{z-index:-1 !important}.quarto-layout-panel{margin-bottom:1em}.quarto-layout-panel>figure{width:100%}.quarto-layout-panel>figure>figcaption,.quarto-layout-panel>.panel-caption{margin-top:10pt}.quarto-layout-panel>.table-caption{margin-top:0px}.table-caption p{margin-bottom:.5em}.quarto-layout-row{display:flex;flex-direction:row;align-items:flex-start}.quarto-layout-valign-top{align-items:flex-start}.quarto-layout-valign-bottom{align-items:flex-end}.quarto-layout-valign-center{align-items:center}.quarto-layout-cell{position:relative;margin-right:20px}.quarto-layout-cell:last-child{margin-right:0}.quarto-layout-cell figure,.quarto-layout-cell>p{margin:.2em}.quarto-layout-cell img{max-width:100%}.quarto-layout-cell .html-widget{width:100% !important}.quarto-layout-cell div figure p{margin:0}.quarto-layout-cell figure{display:inline-block;margin-inline-start:0;margin-inline-end:0}.quarto-layout-cell table{display:inline-table}.quarto-layout-cell-subref figcaption,figure .quarto-layout-row figure figcaption{text-align:center;font-style:italic}.quarto-figure{position:relative;margin-bottom:1em}.quarto-figure>figure{width:100%;margin-bottom:0}.quarto-figure-left>figure>p,.quarto-figure-left>figure>div{text-align:left}.quarto-figure-center>figure>p,.quarto-figure-center>figure>div{text-align:center}.quarto-figure-right>figure>p,.quarto-figure-right>figure>div{text-align:right}figure>p:empty{display:none}figure>p:first-child{margin-top:0;margin-bottom:0}figure>figcaption{margin-top:.5em}div[id^=tbl-]{position:relative}.quarto-figure>.anchorjs-link{position:absolute;top:.6em;right:.5em}div[id^=tbl-]>.anchorjs-link{position:absolute;top:.7em;right:.3em}.quarto-figure:hover>.anchorjs-link,div[id^=tbl-]:hover>.anchorjs-link,h2:hover>.anchorjs-link,.h2:hover>.anchorjs-link,h3:hover>.anchorjs-link,.h3:hover>.anchorjs-link,h4:hover>.anchorjs-link,.h4:hover>.anchorjs-link,h5:hover>.anchorjs-link,.h5:hover>.anchorjs-link,h6:hover>.anchorjs-link,.h6:hover>.anchorjs-link,.reveal-anchorjs-link>.anchorjs-link{opacity:1}#title-block-header{margin-block-end:1rem;position:relative;margin-top:-1px}#title-block-header .abstract{margin-block-start:1rem}#title-block-header .abstract .abstract-title{font-weight:600}#title-block-header a{text-decoration:none}#title-block-header .author,#title-block-header .date,#title-block-header .doi{margin-block-end:.2rem}#title-block-header .quarto-title-block>div{display:flex}#title-block-header .quarto-title-block>div>h1,#title-block-header .quarto-title-block>div>.h1{flex-grow:1}#title-block-header .quarto-title-block>div>button{flex-shrink:0;height:2.25rem;margin-top:0}@media(min-width: 992px){#title-block-header .quarto-title-block>div>button{margin-top:5px}}tr.header>th>p:last-of-type{margin-bottom:0px}table,.table{caption-side:top;margin-bottom:1.5rem}caption,.table-caption{padding-top:.5rem;padding-bottom:.5rem;text-align:center}.utterances{max-width:none;margin-left:-8px}iframe{margin-bottom:1em}details{margin-bottom:1em}details[show]{margin-bottom:0}details>summary{color:#6c757d}details>summary>p:only-child{display:inline}pre.sourceCode,code.sourceCode{position:relative}p code:not(.sourceCode){white-space:pre-wrap}code{white-space:pre}@media print{code{white-space:pre-wrap}}pre>code{display:block}pre>code.sourceCode{white-space:pre}pre>code.sourceCode>span>a:first-child::before{text-decoration:none}pre.code-overflow-wrap>code.sourceCode{white-space:pre-wrap}pre.code-overflow-scroll>code.sourceCode{white-space:pre}code a:any-link{color:inherit;text-decoration:none}code a:hover{color:inherit;text-decoration:underline}ul.task-list{padding-left:1em}[data-tippy-root]{display:inline-block}.tippy-content .footnote-back{display:none}.quarto-embedded-source-code{display:none}.quarto-unresolved-ref{font-weight:600}.quarto-cover-image{max-width:35%;float:right;margin-left:30px}.cell-output-display .widget-subarea{margin-bottom:1em}.cell-output-display:not(.no-overflow-x),.knitsql-table:not(.no-overflow-x){overflow-x:auto}.panel-input{margin-bottom:1em}.panel-input>div,.panel-input>div>div{display:inline-block;vertical-align:top;padding-right:12px}.panel-input>p:last-child{margin-bottom:0}.layout-sidebar{margin-bottom:1em}.layout-sidebar .tab-content{border:none}.tab-content>.page-columns.active{display:grid}div.sourceCode>iframe{width:100%;height:300px;margin-bottom:-0.5em}div.ansi-escaped-output{font-family:monospace;display:block}/*! +* +* ansi colors from IPython notebook's +* +*/.ansi-black-fg{color:#3e424d}.ansi-black-bg{background-color:#3e424d}.ansi-black-intense-fg{color:#282c36}.ansi-black-intense-bg{background-color:#282c36}.ansi-red-fg{color:#e75c58}.ansi-red-bg{background-color:#e75c58}.ansi-red-intense-fg{color:#b22b31}.ansi-red-intense-bg{background-color:#b22b31}.ansi-green-fg{color:#00a250}.ansi-green-bg{background-color:#00a250}.ansi-green-intense-fg{color:#007427}.ansi-green-intense-bg{background-color:#007427}.ansi-yellow-fg{color:#ddb62b}.ansi-yellow-bg{background-color:#ddb62b}.ansi-yellow-intense-fg{color:#b27d12}.ansi-yellow-intense-bg{background-color:#b27d12}.ansi-blue-fg{color:#208ffb}.ansi-blue-bg{background-color:#208ffb}.ansi-blue-intense-fg{color:#0065ca}.ansi-blue-intense-bg{background-color:#0065ca}.ansi-magenta-fg{color:#d160c4}.ansi-magenta-bg{background-color:#d160c4}.ansi-magenta-intense-fg{color:#a03196}.ansi-magenta-intense-bg{background-color:#a03196}.ansi-cyan-fg{color:#60c6c8}.ansi-cyan-bg{background-color:#60c6c8}.ansi-cyan-intense-fg{color:#258f8f}.ansi-cyan-intense-bg{background-color:#258f8f}.ansi-white-fg{color:#c5c1b4}.ansi-white-bg{background-color:#c5c1b4}.ansi-white-intense-fg{color:#a1a6b2}.ansi-white-intense-bg{background-color:#a1a6b2}.ansi-default-inverse-fg{color:#fff}.ansi-default-inverse-bg{background-color:#000}.ansi-bold{font-weight:bold}.ansi-underline{text-decoration:underline}:root{--quarto-body-bg: #fff;--quarto-body-color: #373a3c;--quarto-text-muted: #6c757d;--quarto-border-color: #dee2e6;--quarto-border-width: 1px;--quarto-border-radius: 0.25rem}table.gt_table{color:var(--quarto-body-color);font-size:1em;width:100%;background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_column_spanner_outer{color:var(--quarto-body-color);background-color:rgba(0,0,0,0);border-top-width:inherit;border-bottom-width:inherit;border-color:var(--quarto-border-color)}table.gt_table th.gt_col_heading{color:var(--quarto-body-color);font-weight:bold;background-color:rgba(0,0,0,0)}table.gt_table thead.gt_col_headings{border-bottom:1px solid currentColor;border-top-width:inherit;border-top-color:var(--quarto-border-color)}table.gt_table thead.gt_col_headings:not(:first-child){border-top-width:1px;border-top-color:var(--quarto-border-color)}table.gt_table td.gt_row{border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-width:0px}table.gt_table tbody.gt_table_body{border-top-width:1px;border-bottom-width:1px;border-bottom-color:var(--quarto-border-color);border-top-color:currentColor}div.columns{display:initial;gap:initial}div.column{display:inline-block;overflow-x:initial;vertical-align:top;width:50%}.code-annotation-tip-content{word-wrap:break-word}.code-annotation-container-hidden{display:none !important}dl.code-annotation-container-grid{display:grid;grid-template-columns:min-content auto}dl.code-annotation-container-grid dt{grid-column:1}dl.code-annotation-container-grid dd{grid-column:2}pre.sourceCode.code-annotation-code{padding-right:0}code.sourceCode .code-annotation-anchor{z-index:100;position:absolute;right:.5em;left:inherit;background-color:rgba(0,0,0,0)}:root{--mermaid-bg-color: #fff;--mermaid-edge-color: #373a3c;--mermaid-node-fg-color: #373a3c;--mermaid-fg-color: #373a3c;--mermaid-fg-color--lighter: #4f5457;--mermaid-fg-color--lightest: #686d71;--mermaid-font-family: Lato, sans-serif;--mermaid-label-bg-color: #fff;--mermaid-label-fg-color: #2780e3;--mermaid-node-bg-color: rgba(39, 128, 227, 0.1);--mermaid-node-fg-color: #373a3c}@media print{:root{font-size:11pt}#quarto-sidebar,#TOC,.nav-page{display:none}.page-columns .content{grid-column-start:page-start}.fixed-top{position:relative}.panel-caption,.figure-caption,figcaption{color:#666}}.code-copy-button{position:absolute;top:0;right:0;border:0;margin-top:5px;margin-right:5px;background-color:rgba(0,0,0,0);z-index:3}.code-copy-button:focus{outline:none}.code-copy-button-tooltip{font-size:.75em}pre.sourceCode:hover>.code-copy-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:1rem 1rem}pre.sourceCode:hover>.code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,')}pre.sourceCode:hover>.code-copy-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}pre.sourceCode:hover>.code-copy-button-checked:hover>.bi::before{background-image:url('data:image/svg+xml,')}main ol ol,main ul ul,main ol ul,main ul ol{margin-bottom:1em}ul>li:not(:has(>p))>ul,ol>li:not(:has(>p))>ul,ul>li:not(:has(>p))>ol,ol>li:not(:has(>p))>ol{margin-bottom:0}ul>li:not(:has(>p))>ul>li:has(>p),ol>li:not(:has(>p))>ul>li:has(>p),ul>li:not(:has(>p))>ol>li:has(>p),ol>li:not(:has(>p))>ol>li:has(>p){margin-top:1rem}body{margin:0}main.page-columns>header>h1.title,main.page-columns>header>.title.h1{margin-bottom:0}@media(min-width: 992px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 890px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 890px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] 35px [page-end-inset page-end] 5fr [screen-end-inset] 1.5em}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset] 35px [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(500px, calc( 890px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 890px - 3em )) [body-content-end] 3em [body-end] 50px [body-end-outset] minmax(0px, 250px) [page-end-inset] minmax(50px, 100px) [page-end] 1fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 175px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end] minmax(25px, 50px) [body-end-outset] minmax(50px, 150px) [page-end-inset] minmax(25px, 50px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1040px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(50px, 100px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1040px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc( 790px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start] minmax(50px, 100px) [page-start-inset] 50px [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(500px, calc( 1040px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(0px, 200px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 50px [page-start-inset] minmax(50px, 150px) [body-start-outset] 50px [body-start] 1.5em [body-content-start] minmax(450px, calc( 790px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(50px, 150px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] minmax(25px, 50px) [page-start-inset] minmax(50px, 150px) [body-start-outset] minmax(25px, 50px) [body-start] 1.5em [body-content-start] minmax(500px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end] minmax(25px, 50px) [body-end-outset] minmax(50px, 150px) [page-end-inset] minmax(25px, 50px) [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 991.98px){body .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.fullcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.slimcontent:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.listing:not(.floating):not(.docked) .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset] 5fr [body-start] 1.5em [body-content-start] minmax(500px, calc( 1290px - 3em )) [body-content-end body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start] 35px [page-start-inset] minmax(0px, 145px) [body-start-outset] 35px [body-start] 1.5em [body-content-start] minmax(450px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1.5em [body-content-start] minmax(500px, calc( 790px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(75px, 150px) [page-end-inset] 25px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 790px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 1040px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 840px - 3em )) [body-content-end] 1.5em [body-end body-end-outset page-end-inset page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.docked.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 790px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.docked.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(500px, calc( 790px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(25px, 50px) [page-end-inset] 50px [page-end] 5fr [screen-end-inset] 1.5em [screen-end]}body.floating.slimcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 790px - 3em )) [body-content-end] 1.5em [body-end] 35px [body-end-outset] minmax(75px, 145px) [page-end-inset] 35px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}body.floating.listing .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset] 5fr [page-start page-start-inset body-start-outset body-start] 1em [body-content-start] minmax(500px, calc( 790px - 3em )) [body-content-end] 1.5em [body-end] 50px [body-end-outset] minmax(75px, 150px) [page-end-inset] 25px [page-end] 4fr [screen-end-inset] 1.5em [screen-end]}}@media(max-width: 767.98px){body .page-columns,body.fullcontent:not(.floating):not(.docked) .page-columns,body.slimcontent:not(.floating):not(.docked) .page-columns,body.docked .page-columns,body.docked.slimcontent .page-columns,body.docked.fullcontent .page-columns,body.floating .page-columns,body.floating.slimcontent .page-columns,body.floating.fullcontent .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}body:not(.floating):not(.docked) .page-columns.toc-left .page-columns{display:grid;gap:0;grid-template-columns:[screen-start] 1.5em [screen-start-inset page-start page-start-inset body-start-outset body-start body-content-start] minmax(0px, 1fr) [body-content-end body-end body-end-outset page-end-inset page-end screen-end-inset] 1.5em [screen-end]}nav[role=doc-toc]{display:none}}body,.page-row-navigation{grid-template-rows:[page-top] max-content [contents-top] max-content [contents-bottom] max-content [page-bottom]}.page-rows-contents{grid-template-rows:[content-top] minmax(max-content, 1fr) [content-bottom] minmax(60px, max-content) [page-bottom]}.page-full{grid-column:screen-start/screen-end !important}.page-columns>*{grid-column:body-content-start/body-content-end}.page-columns.column-page>*{grid-column:page-start/page-end}.page-columns.column-page-left>*{grid-column:page-start/body-content-end}.page-columns.column-page-right>*{grid-column:body-content-start/page-end}.page-rows{grid-auto-rows:auto}.header{grid-column:screen-start/screen-end;grid-row:page-top/contents-top}#quarto-content{padding:0;grid-column:screen-start/screen-end;grid-row:contents-top/contents-bottom}body.floating .sidebar.sidebar-navigation{grid-column:page-start/body-start;grid-row:content-top/page-bottom}body.docked .sidebar.sidebar-navigation{grid-column:screen-start/body-start;grid-row:content-top/page-bottom}.sidebar.toc-left{grid-column:page-start/body-start;grid-row:content-top/page-bottom}.sidebar.margin-sidebar{grid-column:body-end/page-end;grid-row:content-top/page-bottom}.page-columns .content{grid-column:body-content-start/body-content-end;grid-row:content-top/content-bottom;align-content:flex-start}.page-columns .page-navigation{grid-column:body-content-start/body-content-end;grid-row:content-bottom/page-bottom}.page-columns .footer{grid-column:screen-start/screen-end;grid-row:contents-bottom/page-bottom}.page-columns .column-body{grid-column:body-content-start/body-content-end}.page-columns .column-body-fullbleed{grid-column:body-start/body-end}.page-columns .column-body-outset{grid-column:body-start-outset/body-end-outset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset table{background:#fff}.page-columns .column-body-outset-left{grid-column:body-start-outset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset-left table{background:#fff}.page-columns .column-body-outset-right{grid-column:body-content-start/body-end-outset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-body-outset-right table{background:#fff}.page-columns .column-page{grid-column:page-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page table{background:#fff}.page-columns .column-page-inset{grid-column:page-start-inset/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset table{background:#fff}.page-columns .column-page-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset-left table{background:#fff}.page-columns .column-page-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-inset-right figcaption table{background:#fff}.page-columns .column-page-left{grid-column:page-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-left table{background:#fff}.page-columns .column-page-right{grid-column:body-content-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-page-right figcaption table{background:#fff}#quarto-content.page-columns #quarto-margin-sidebar,#quarto-content.page-columns #quarto-sidebar{z-index:1}@media(max-width: 991.98px){#quarto-content.page-columns #quarto-margin-sidebar.collapse,#quarto-content.page-columns #quarto-sidebar.collapse,#quarto-content.page-columns #quarto-margin-sidebar.collapsing,#quarto-content.page-columns #quarto-sidebar.collapsing{z-index:1055}}#quarto-content.page-columns main.column-page,#quarto-content.page-columns main.column-page-right,#quarto-content.page-columns main.column-page-left{z-index:0}.page-columns .column-screen-inset{grid-column:screen-start-inset/screen-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset table{background:#fff}.page-columns .column-screen-inset-left{grid-column:screen-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-left table{background:#fff}.page-columns .column-screen-inset-right{grid-column:body-content-start/screen-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-right table{background:#fff}.page-columns .column-screen{grid-column:screen-start/screen-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen table{background:#fff}.page-columns .column-screen-left{grid-column:screen-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-left table{background:#fff}.page-columns .column-screen-right{grid-column:body-content-start/screen-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-right table{background:#fff}.page-columns .column-screen-inset-shaded{grid-column:screen-start/screen-end;padding:1em;background:#f8f9fa;z-index:998;transform:translate3d(0, 0, 0);margin-bottom:1em}.zindex-content{z-index:998;transform:translate3d(0, 0, 0)}.zindex-modal{z-index:1055;transform:translate3d(0, 0, 0)}.zindex-over-content{z-index:999;transform:translate3d(0, 0, 0)}img.img-fluid.column-screen,img.img-fluid.column-screen-inset-shaded,img.img-fluid.column-screen-inset,img.img-fluid.column-screen-inset-left,img.img-fluid.column-screen-inset-right,img.img-fluid.column-screen-left,img.img-fluid.column-screen-right{width:100%}@media(min-width: 992px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-end/page-end !important;z-index:998}.column-sidebar{grid-column:page-start/body-start !important;z-index:998}.column-leftmargin{grid-column:screen-start-inset/body-start !important;z-index:998}.no-row-height{height:1em;overflow:visible}}@media(max-width: 991.98px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-end/page-end !important;z-index:998}.no-row-height{height:1em;overflow:visible}.page-columns.page-full{overflow:visible}.page-columns.toc-left .margin-caption,.page-columns.toc-left div.aside,.page-columns.toc-left aside,.page-columns.toc-left .column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;transform:translate3d(0, 0, 0)}.page-columns.toc-left .no-row-height{height:initial;overflow:initial}}@media(max-width: 767.98px){.margin-caption,div.aside,aside,.column-margin{grid-column:body-content-start/body-content-end !important;z-index:998;transform:translate3d(0, 0, 0)}.no-row-height{height:initial;overflow:initial}#quarto-margin-sidebar{display:none}#quarto-sidebar-toc-left{display:none}.hidden-sm{display:none}}.panel-grid{display:grid;grid-template-rows:repeat(1, 1fr);grid-template-columns:repeat(24, 1fr);gap:1em}.panel-grid .g-col-1{grid-column:auto/span 1}.panel-grid .g-col-2{grid-column:auto/span 2}.panel-grid .g-col-3{grid-column:auto/span 3}.panel-grid .g-col-4{grid-column:auto/span 4}.panel-grid .g-col-5{grid-column:auto/span 5}.panel-grid .g-col-6{grid-column:auto/span 6}.panel-grid .g-col-7{grid-column:auto/span 7}.panel-grid .g-col-8{grid-column:auto/span 8}.panel-grid .g-col-9{grid-column:auto/span 9}.panel-grid .g-col-10{grid-column:auto/span 10}.panel-grid .g-col-11{grid-column:auto/span 11}.panel-grid .g-col-12{grid-column:auto/span 12}.panel-grid .g-col-13{grid-column:auto/span 13}.panel-grid .g-col-14{grid-column:auto/span 14}.panel-grid .g-col-15{grid-column:auto/span 15}.panel-grid .g-col-16{grid-column:auto/span 16}.panel-grid .g-col-17{grid-column:auto/span 17}.panel-grid .g-col-18{grid-column:auto/span 18}.panel-grid .g-col-19{grid-column:auto/span 19}.panel-grid .g-col-20{grid-column:auto/span 20}.panel-grid .g-col-21{grid-column:auto/span 21}.panel-grid .g-col-22{grid-column:auto/span 22}.panel-grid .g-col-23{grid-column:auto/span 23}.panel-grid .g-col-24{grid-column:auto/span 24}.panel-grid .g-start-1{grid-column-start:1}.panel-grid .g-start-2{grid-column-start:2}.panel-grid .g-start-3{grid-column-start:3}.panel-grid .g-start-4{grid-column-start:4}.panel-grid .g-start-5{grid-column-start:5}.panel-grid .g-start-6{grid-column-start:6}.panel-grid .g-start-7{grid-column-start:7}.panel-grid .g-start-8{grid-column-start:8}.panel-grid .g-start-9{grid-column-start:9}.panel-grid .g-start-10{grid-column-start:10}.panel-grid .g-start-11{grid-column-start:11}.panel-grid .g-start-12{grid-column-start:12}.panel-grid .g-start-13{grid-column-start:13}.panel-grid .g-start-14{grid-column-start:14}.panel-grid .g-start-15{grid-column-start:15}.panel-grid .g-start-16{grid-column-start:16}.panel-grid .g-start-17{grid-column-start:17}.panel-grid .g-start-18{grid-column-start:18}.panel-grid .g-start-19{grid-column-start:19}.panel-grid .g-start-20{grid-column-start:20}.panel-grid .g-start-21{grid-column-start:21}.panel-grid .g-start-22{grid-column-start:22}.panel-grid .g-start-23{grid-column-start:23}@media(min-width: 576px){.panel-grid .g-col-sm-1{grid-column:auto/span 1}.panel-grid .g-col-sm-2{grid-column:auto/span 2}.panel-grid .g-col-sm-3{grid-column:auto/span 3}.panel-grid .g-col-sm-4{grid-column:auto/span 4}.panel-grid .g-col-sm-5{grid-column:auto/span 5}.panel-grid .g-col-sm-6{grid-column:auto/span 6}.panel-grid .g-col-sm-7{grid-column:auto/span 7}.panel-grid .g-col-sm-8{grid-column:auto/span 8}.panel-grid .g-col-sm-9{grid-column:auto/span 9}.panel-grid .g-col-sm-10{grid-column:auto/span 10}.panel-grid .g-col-sm-11{grid-column:auto/span 11}.panel-grid .g-col-sm-12{grid-column:auto/span 12}.panel-grid .g-col-sm-13{grid-column:auto/span 13}.panel-grid .g-col-sm-14{grid-column:auto/span 14}.panel-grid .g-col-sm-15{grid-column:auto/span 15}.panel-grid .g-col-sm-16{grid-column:auto/span 16}.panel-grid .g-col-sm-17{grid-column:auto/span 17}.panel-grid .g-col-sm-18{grid-column:auto/span 18}.panel-grid .g-col-sm-19{grid-column:auto/span 19}.panel-grid .g-col-sm-20{grid-column:auto/span 20}.panel-grid .g-col-sm-21{grid-column:auto/span 21}.panel-grid .g-col-sm-22{grid-column:auto/span 22}.panel-grid .g-col-sm-23{grid-column:auto/span 23}.panel-grid .g-col-sm-24{grid-column:auto/span 24}.panel-grid .g-start-sm-1{grid-column-start:1}.panel-grid .g-start-sm-2{grid-column-start:2}.panel-grid .g-start-sm-3{grid-column-start:3}.panel-grid .g-start-sm-4{grid-column-start:4}.panel-grid .g-start-sm-5{grid-column-start:5}.panel-grid .g-start-sm-6{grid-column-start:6}.panel-grid .g-start-sm-7{grid-column-start:7}.panel-grid .g-start-sm-8{grid-column-start:8}.panel-grid .g-start-sm-9{grid-column-start:9}.panel-grid .g-start-sm-10{grid-column-start:10}.panel-grid .g-start-sm-11{grid-column-start:11}.panel-grid .g-start-sm-12{grid-column-start:12}.panel-grid .g-start-sm-13{grid-column-start:13}.panel-grid .g-start-sm-14{grid-column-start:14}.panel-grid .g-start-sm-15{grid-column-start:15}.panel-grid .g-start-sm-16{grid-column-start:16}.panel-grid .g-start-sm-17{grid-column-start:17}.panel-grid .g-start-sm-18{grid-column-start:18}.panel-grid .g-start-sm-19{grid-column-start:19}.panel-grid .g-start-sm-20{grid-column-start:20}.panel-grid .g-start-sm-21{grid-column-start:21}.panel-grid .g-start-sm-22{grid-column-start:22}.panel-grid .g-start-sm-23{grid-column-start:23}}@media(min-width: 768px){.panel-grid .g-col-md-1{grid-column:auto/span 1}.panel-grid .g-col-md-2{grid-column:auto/span 2}.panel-grid .g-col-md-3{grid-column:auto/span 3}.panel-grid .g-col-md-4{grid-column:auto/span 4}.panel-grid .g-col-md-5{grid-column:auto/span 5}.panel-grid .g-col-md-6{grid-column:auto/span 6}.panel-grid .g-col-md-7{grid-column:auto/span 7}.panel-grid .g-col-md-8{grid-column:auto/span 8}.panel-grid .g-col-md-9{grid-column:auto/span 9}.panel-grid .g-col-md-10{grid-column:auto/span 10}.panel-grid .g-col-md-11{grid-column:auto/span 11}.panel-grid .g-col-md-12{grid-column:auto/span 12}.panel-grid .g-col-md-13{grid-column:auto/span 13}.panel-grid .g-col-md-14{grid-column:auto/span 14}.panel-grid .g-col-md-15{grid-column:auto/span 15}.panel-grid .g-col-md-16{grid-column:auto/span 16}.panel-grid .g-col-md-17{grid-column:auto/span 17}.panel-grid .g-col-md-18{grid-column:auto/span 18}.panel-grid .g-col-md-19{grid-column:auto/span 19}.panel-grid .g-col-md-20{grid-column:auto/span 20}.panel-grid .g-col-md-21{grid-column:auto/span 21}.panel-grid .g-col-md-22{grid-column:auto/span 22}.panel-grid .g-col-md-23{grid-column:auto/span 23}.panel-grid .g-col-md-24{grid-column:auto/span 24}.panel-grid .g-start-md-1{grid-column-start:1}.panel-grid .g-start-md-2{grid-column-start:2}.panel-grid .g-start-md-3{grid-column-start:3}.panel-grid .g-start-md-4{grid-column-start:4}.panel-grid .g-start-md-5{grid-column-start:5}.panel-grid .g-start-md-6{grid-column-start:6}.panel-grid .g-start-md-7{grid-column-start:7}.panel-grid .g-start-md-8{grid-column-start:8}.panel-grid .g-start-md-9{grid-column-start:9}.panel-grid .g-start-md-10{grid-column-start:10}.panel-grid .g-start-md-11{grid-column-start:11}.panel-grid .g-start-md-12{grid-column-start:12}.panel-grid .g-start-md-13{grid-column-start:13}.panel-grid .g-start-md-14{grid-column-start:14}.panel-grid .g-start-md-15{grid-column-start:15}.panel-grid .g-start-md-16{grid-column-start:16}.panel-grid .g-start-md-17{grid-column-start:17}.panel-grid .g-start-md-18{grid-column-start:18}.panel-grid .g-start-md-19{grid-column-start:19}.panel-grid .g-start-md-20{grid-column-start:20}.panel-grid .g-start-md-21{grid-column-start:21}.panel-grid .g-start-md-22{grid-column-start:22}.panel-grid .g-start-md-23{grid-column-start:23}}@media(min-width: 992px){.panel-grid .g-col-lg-1{grid-column:auto/span 1}.panel-grid .g-col-lg-2{grid-column:auto/span 2}.panel-grid .g-col-lg-3{grid-column:auto/span 3}.panel-grid .g-col-lg-4{grid-column:auto/span 4}.panel-grid .g-col-lg-5{grid-column:auto/span 5}.panel-grid .g-col-lg-6{grid-column:auto/span 6}.panel-grid .g-col-lg-7{grid-column:auto/span 7}.panel-grid .g-col-lg-8{grid-column:auto/span 8}.panel-grid .g-col-lg-9{grid-column:auto/span 9}.panel-grid .g-col-lg-10{grid-column:auto/span 10}.panel-grid .g-col-lg-11{grid-column:auto/span 11}.panel-grid .g-col-lg-12{grid-column:auto/span 12}.panel-grid .g-col-lg-13{grid-column:auto/span 13}.panel-grid .g-col-lg-14{grid-column:auto/span 14}.panel-grid .g-col-lg-15{grid-column:auto/span 15}.panel-grid .g-col-lg-16{grid-column:auto/span 16}.panel-grid .g-col-lg-17{grid-column:auto/span 17}.panel-grid .g-col-lg-18{grid-column:auto/span 18}.panel-grid .g-col-lg-19{grid-column:auto/span 19}.panel-grid .g-col-lg-20{grid-column:auto/span 20}.panel-grid .g-col-lg-21{grid-column:auto/span 21}.panel-grid .g-col-lg-22{grid-column:auto/span 22}.panel-grid .g-col-lg-23{grid-column:auto/span 23}.panel-grid .g-col-lg-24{grid-column:auto/span 24}.panel-grid .g-start-lg-1{grid-column-start:1}.panel-grid .g-start-lg-2{grid-column-start:2}.panel-grid .g-start-lg-3{grid-column-start:3}.panel-grid .g-start-lg-4{grid-column-start:4}.panel-grid .g-start-lg-5{grid-column-start:5}.panel-grid .g-start-lg-6{grid-column-start:6}.panel-grid .g-start-lg-7{grid-column-start:7}.panel-grid .g-start-lg-8{grid-column-start:8}.panel-grid .g-start-lg-9{grid-column-start:9}.panel-grid .g-start-lg-10{grid-column-start:10}.panel-grid .g-start-lg-11{grid-column-start:11}.panel-grid .g-start-lg-12{grid-column-start:12}.panel-grid .g-start-lg-13{grid-column-start:13}.panel-grid .g-start-lg-14{grid-column-start:14}.panel-grid .g-start-lg-15{grid-column-start:15}.panel-grid .g-start-lg-16{grid-column-start:16}.panel-grid .g-start-lg-17{grid-column-start:17}.panel-grid .g-start-lg-18{grid-column-start:18}.panel-grid .g-start-lg-19{grid-column-start:19}.panel-grid .g-start-lg-20{grid-column-start:20}.panel-grid .g-start-lg-21{grid-column-start:21}.panel-grid .g-start-lg-22{grid-column-start:22}.panel-grid .g-start-lg-23{grid-column-start:23}}@media(min-width: 1200px){.panel-grid .g-col-xl-1{grid-column:auto/span 1}.panel-grid .g-col-xl-2{grid-column:auto/span 2}.panel-grid .g-col-xl-3{grid-column:auto/span 3}.panel-grid .g-col-xl-4{grid-column:auto/span 4}.panel-grid .g-col-xl-5{grid-column:auto/span 5}.panel-grid .g-col-xl-6{grid-column:auto/span 6}.panel-grid .g-col-xl-7{grid-column:auto/span 7}.panel-grid .g-col-xl-8{grid-column:auto/span 8}.panel-grid .g-col-xl-9{grid-column:auto/span 9}.panel-grid .g-col-xl-10{grid-column:auto/span 10}.panel-grid .g-col-xl-11{grid-column:auto/span 11}.panel-grid .g-col-xl-12{grid-column:auto/span 12}.panel-grid .g-col-xl-13{grid-column:auto/span 13}.panel-grid .g-col-xl-14{grid-column:auto/span 14}.panel-grid .g-col-xl-15{grid-column:auto/span 15}.panel-grid .g-col-xl-16{grid-column:auto/span 16}.panel-grid .g-col-xl-17{grid-column:auto/span 17}.panel-grid .g-col-xl-18{grid-column:auto/span 18}.panel-grid .g-col-xl-19{grid-column:auto/span 19}.panel-grid .g-col-xl-20{grid-column:auto/span 20}.panel-grid .g-col-xl-21{grid-column:auto/span 21}.panel-grid .g-col-xl-22{grid-column:auto/span 22}.panel-grid .g-col-xl-23{grid-column:auto/span 23}.panel-grid .g-col-xl-24{grid-column:auto/span 24}.panel-grid .g-start-xl-1{grid-column-start:1}.panel-grid .g-start-xl-2{grid-column-start:2}.panel-grid .g-start-xl-3{grid-column-start:3}.panel-grid .g-start-xl-4{grid-column-start:4}.panel-grid .g-start-xl-5{grid-column-start:5}.panel-grid .g-start-xl-6{grid-column-start:6}.panel-grid .g-start-xl-7{grid-column-start:7}.panel-grid .g-start-xl-8{grid-column-start:8}.panel-grid .g-start-xl-9{grid-column-start:9}.panel-grid .g-start-xl-10{grid-column-start:10}.panel-grid .g-start-xl-11{grid-column-start:11}.panel-grid .g-start-xl-12{grid-column-start:12}.panel-grid .g-start-xl-13{grid-column-start:13}.panel-grid .g-start-xl-14{grid-column-start:14}.panel-grid .g-start-xl-15{grid-column-start:15}.panel-grid .g-start-xl-16{grid-column-start:16}.panel-grid .g-start-xl-17{grid-column-start:17}.panel-grid .g-start-xl-18{grid-column-start:18}.panel-grid .g-start-xl-19{grid-column-start:19}.panel-grid .g-start-xl-20{grid-column-start:20}.panel-grid .g-start-xl-21{grid-column-start:21}.panel-grid .g-start-xl-22{grid-column-start:22}.panel-grid .g-start-xl-23{grid-column-start:23}}@media(min-width: 1400px){.panel-grid .g-col-xxl-1{grid-column:auto/span 1}.panel-grid .g-col-xxl-2{grid-column:auto/span 2}.panel-grid .g-col-xxl-3{grid-column:auto/span 3}.panel-grid .g-col-xxl-4{grid-column:auto/span 4}.panel-grid .g-col-xxl-5{grid-column:auto/span 5}.panel-grid .g-col-xxl-6{grid-column:auto/span 6}.panel-grid .g-col-xxl-7{grid-column:auto/span 7}.panel-grid .g-col-xxl-8{grid-column:auto/span 8}.panel-grid .g-col-xxl-9{grid-column:auto/span 9}.panel-grid .g-col-xxl-10{grid-column:auto/span 10}.panel-grid .g-col-xxl-11{grid-column:auto/span 11}.panel-grid .g-col-xxl-12{grid-column:auto/span 12}.panel-grid .g-col-xxl-13{grid-column:auto/span 13}.panel-grid .g-col-xxl-14{grid-column:auto/span 14}.panel-grid .g-col-xxl-15{grid-column:auto/span 15}.panel-grid .g-col-xxl-16{grid-column:auto/span 16}.panel-grid .g-col-xxl-17{grid-column:auto/span 17}.panel-grid .g-col-xxl-18{grid-column:auto/span 18}.panel-grid .g-col-xxl-19{grid-column:auto/span 19}.panel-grid .g-col-xxl-20{grid-column:auto/span 20}.panel-grid .g-col-xxl-21{grid-column:auto/span 21}.panel-grid .g-col-xxl-22{grid-column:auto/span 22}.panel-grid .g-col-xxl-23{grid-column:auto/span 23}.panel-grid .g-col-xxl-24{grid-column:auto/span 24}.panel-grid .g-start-xxl-1{grid-column-start:1}.panel-grid .g-start-xxl-2{grid-column-start:2}.panel-grid .g-start-xxl-3{grid-column-start:3}.panel-grid .g-start-xxl-4{grid-column-start:4}.panel-grid .g-start-xxl-5{grid-column-start:5}.panel-grid .g-start-xxl-6{grid-column-start:6}.panel-grid .g-start-xxl-7{grid-column-start:7}.panel-grid .g-start-xxl-8{grid-column-start:8}.panel-grid .g-start-xxl-9{grid-column-start:9}.panel-grid .g-start-xxl-10{grid-column-start:10}.panel-grid .g-start-xxl-11{grid-column-start:11}.panel-grid .g-start-xxl-12{grid-column-start:12}.panel-grid .g-start-xxl-13{grid-column-start:13}.panel-grid .g-start-xxl-14{grid-column-start:14}.panel-grid .g-start-xxl-15{grid-column-start:15}.panel-grid .g-start-xxl-16{grid-column-start:16}.panel-grid .g-start-xxl-17{grid-column-start:17}.panel-grid .g-start-xxl-18{grid-column-start:18}.panel-grid .g-start-xxl-19{grid-column-start:19}.panel-grid .g-start-xxl-20{grid-column-start:20}.panel-grid .g-start-xxl-21{grid-column-start:21}.panel-grid .g-start-xxl-22{grid-column-start:22}.panel-grid .g-start-xxl-23{grid-column-start:23}}main{margin-top:1em;margin-bottom:1em}h1,.h1,h2,.h2{opacity:.9;margin-top:2rem;margin-bottom:1rem;font-weight:600}h1.title,.title.h1{margin-top:0}h2,.h2{border-bottom:1px solid #dee2e6;padding-bottom:.5rem}h3,.h3{font-weight:600}h3,.h3,h4,.h4{opacity:.9;margin-top:1.5rem}h5,.h5,h6,.h6{opacity:.9}.header-section-number{color:#747a7f}.nav-link.active .header-section-number{color:inherit}mark,.mark{padding:0em}.panel-caption,caption,.figure-caption{font-size:.9rem}.panel-caption,.figure-caption,figcaption{color:#747a7f}.table-caption,caption{color:#373a3c}.quarto-layout-cell[data-ref-parent] caption{color:#747a7f}.column-margin figcaption,.margin-caption,div.aside,aside,.column-margin{color:#747a7f;font-size:.825rem}.panel-caption.margin-caption{text-align:inherit}.column-margin.column-container p{margin-bottom:0}.column-margin.column-container>*:not(.collapse){padding-top:.5em;padding-bottom:.5em;display:block}.column-margin.column-container>*.collapse:not(.show){display:none}@media(min-width: 768px){.column-margin.column-container .callout-margin-content:first-child{margin-top:4.5em}.column-margin.column-container .callout-margin-content-simple:first-child{margin-top:3.5em}}.margin-caption>*{padding-top:.5em;padding-bottom:.5em}@media(max-width: 767.98px){.quarto-layout-row{flex-direction:column}}.nav-tabs .nav-item{margin-top:1px;cursor:pointer}.tab-content{margin-top:0px;border-left:#dee2e6 1px solid;border-right:#dee2e6 1px solid;border-bottom:#dee2e6 1px solid;margin-left:0;padding:1em;margin-bottom:1em}@media(max-width: 767.98px){.layout-sidebar{margin-left:0;margin-right:0}}.panel-sidebar,.panel-sidebar .form-control,.panel-input,.panel-input .form-control,.selectize-dropdown{font-size:.9rem}.panel-sidebar .form-control,.panel-input .form-control{padding-top:.1rem}.tab-pane div.sourceCode{margin-top:0px}.tab-pane>p{padding-top:1em}.tab-content>.tab-pane:not(.active){display:none !important}div.sourceCode{background-color:rgba(233,236,239,.65);border:1px solid rgba(233,236,239,.65);border-radius:.25rem}pre.sourceCode{background-color:rgba(0,0,0,0)}pre.sourceCode{border:none;font-size:.875em;overflow:visible !important;padding:.4em}.callout pre.sourceCode{padding-left:0}div.sourceCode{overflow-y:hidden}.callout div.sourceCode{margin-left:initial}.blockquote{font-size:inherit;padding-left:1rem;padding-right:1.5rem;color:#747a7f}.blockquote h1:first-child,.blockquote .h1:first-child,.blockquote h2:first-child,.blockquote .h2:first-child,.blockquote h3:first-child,.blockquote .h3:first-child,.blockquote h4:first-child,.blockquote .h4:first-child,.blockquote h5:first-child,.blockquote .h5:first-child{margin-top:0}pre{background-color:initial;padding:initial;border:initial}p code:not(.sourceCode),li code:not(.sourceCode),td code:not(.sourceCode){background-color:#f7f7f7;padding:.2em}nav p code:not(.sourceCode),nav li code:not(.sourceCode),nav td code:not(.sourceCode){background-color:rgba(0,0,0,0);padding:0}td code:not(.sourceCode){white-space:pre-wrap}#quarto-embedded-source-code-modal>.modal-dialog{max-width:1000px;padding-left:1.75rem;padding-right:1.75rem}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body{padding:0}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-body div.sourceCode{margin:0;padding:.2rem .2rem;border-radius:0px;border:none}#quarto-embedded-source-code-modal>.modal-dialog>.modal-content>.modal-header{padding:.7rem}.code-tools-button{font-size:1rem;padding:.15rem .15rem;margin-left:5px;color:#6c757d;background-color:rgba(0,0,0,0);transition:initial;cursor:pointer}.code-tools-button>.bi::before{display:inline-block;height:1rem;width:1rem;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:1rem 1rem}.code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}#quarto-embedded-source-code-modal .code-copy-button>.bi::before{background-image:url('data:image/svg+xml,')}#quarto-embedded-source-code-modal .code-copy-button-checked>.bi::before{background-image:url('data:image/svg+xml,')}.sidebar{will-change:top;transition:top 200ms linear;position:sticky;overflow-y:auto;padding-top:1.2em;max-height:100vh}.sidebar.toc-left,.sidebar.margin-sidebar{top:0px;padding-top:1em}.sidebar.toc-left>*,.sidebar.margin-sidebar>*{padding-top:.5em}.sidebar.quarto-banner-title-block-sidebar>*{padding-top:1.65em}figure .quarto-notebook-link{margin-top:.5em}.quarto-notebook-link{font-size:.75em;color:#6c757d;margin-bottom:1em;text-decoration:none;display:block}.quarto-notebook-link:hover{text-decoration:underline;color:#ca225e}.quarto-notebook-link::before{display:inline-block;height:.75rem;width:.75rem;margin-bottom:0em;margin-right:.25em;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:.75rem .75rem}.quarto-alternate-notebooks i.bi,.quarto-alternate-formats i.bi{margin-right:.4em}.quarto-notebook .cell-container{display:flex}.quarto-notebook .cell-container .cell{flex-grow:4}.quarto-notebook .cell-container .cell-decorator{padding-top:1.5em;padding-right:1em;text-align:right}.quarto-notebook h2,.quarto-notebook .h2{border-bottom:none}.sidebar .quarto-alternate-formats a,.sidebar .quarto-alternate-notebooks a{text-decoration:none}.sidebar .quarto-alternate-formats a:hover,.sidebar .quarto-alternate-notebooks a:hover{color:#ca225e}.sidebar .quarto-alternate-notebooks h2,.sidebar .quarto-alternate-notebooks .h2,.sidebar .quarto-alternate-formats h2,.sidebar .quarto-alternate-formats .h2,.sidebar nav[role=doc-toc]>h2,.sidebar nav[role=doc-toc]>.h2{font-size:.875rem;font-weight:400;margin-bottom:.5rem;margin-top:.3rem;font-family:inherit;border-bottom:0;padding-bottom:0;padding-top:0px}.sidebar .quarto-alternate-notebooks h2,.sidebar .quarto-alternate-notebooks .h2,.sidebar .quarto-alternate-formats h2,.sidebar .quarto-alternate-formats .h2{margin-top:1rem}.sidebar nav[role=doc-toc]>ul a{border-left:1px solid #e9ecef;padding-left:.6rem}.sidebar .quarto-alternate-notebooks h2>ul a,.sidebar .quarto-alternate-notebooks .h2>ul a,.sidebar .quarto-alternate-formats h2>ul a,.sidebar .quarto-alternate-formats .h2>ul a{border-left:none;padding-left:.6rem}.sidebar .quarto-alternate-notebooks ul a:empty,.sidebar .quarto-alternate-formats ul a:empty,.sidebar nav[role=doc-toc]>ul a:empty{display:none}.sidebar .quarto-alternate-notebooks ul,.sidebar .quarto-alternate-formats ul,.sidebar nav[role=doc-toc] ul{padding-left:0;list-style:none;font-size:.875rem;font-weight:300}.sidebar .quarto-alternate-notebooks ul li a,.sidebar .quarto-alternate-formats ul li a,.sidebar nav[role=doc-toc]>ul li a{line-height:1.1rem;padding-bottom:.2rem;padding-top:.2rem;color:inherit}.sidebar nav[role=doc-toc] ul>li>ul>li>a{padding-left:1.2em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>a{padding-left:2.4em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>a{padding-left:3.6em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:4.8em}.sidebar nav[role=doc-toc] ul>li>ul>li>ul>li>ul>li>ul>li>ul>li>a{padding-left:6em}.sidebar nav[role=doc-toc] ul>li>a.active,.sidebar nav[role=doc-toc] ul>li>ul>li>a.active{border-left:1px solid #ca225e;color:#ca225e !important}.sidebar nav[role=doc-toc] ul>li>a:hover,.sidebar nav[role=doc-toc] ul>li>ul>li>a:hover{color:#ca225e !important}kbd,.kbd{color:#373a3c;background-color:#f8f9fa;border:1px solid;border-radius:5px;border-color:#dee2e6}div.hanging-indent{margin-left:1em;text-indent:-1em}.citation a,.footnote-ref{text-decoration:none}.footnotes ol{padding-left:1em}.tippy-content>*{margin-bottom:.7em}.tippy-content>*:last-child{margin-bottom:0}.table a{word-break:break-word}.table>thead{border-top-width:1px;border-top-color:#dee2e6;border-bottom:1px solid #b6babc}.callout{margin-top:1.25rem;margin-bottom:1.25rem;border-radius:.25rem;overflow-wrap:break-word}.callout .callout-title-container{overflow-wrap:anywhere}.callout.callout-style-simple{padding:.4em .7em;border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout.callout-style-default{border-left:5px solid;border-right:1px solid #dee2e6;border-top:1px solid #dee2e6;border-bottom:1px solid #dee2e6}.callout .callout-body-container{flex-grow:1}.callout.callout-style-simple .callout-body{font-size:.9rem;font-weight:400}.callout.callout-style-default .callout-body{font-size:.9rem;font-weight:400}.callout.callout-titled .callout-body{margin-top:.2em}.callout:not(.no-icon).callout-titled.callout-style-simple .callout-body{padding-left:1.6em}.callout.callout-titled>.callout-header{padding-top:.2em;margin-bottom:-0.2em}.callout.callout-style-simple>div.callout-header{border-bottom:none;font-size:.9rem;font-weight:600;opacity:75%}.callout.callout-style-default>div.callout-header{border-bottom:none;font-weight:600;opacity:85%;font-size:.9rem;padding-left:.5em;padding-right:.5em}.callout.callout-style-default div.callout-body{padding-left:.5em;padding-right:.5em}.callout.callout-style-default div.callout-body>:first-child{margin-top:.5em}.callout>div.callout-header[data-bs-toggle=collapse]{cursor:pointer}.callout.callout-style-default .callout-header[aria-expanded=false],.callout.callout-style-default .callout-header[aria-expanded=true]{padding-top:0px;margin-bottom:0px;align-items:center}.callout.callout-titled .callout-body>:last-child:not(.sourceCode),.callout.callout-titled .callout-body>div>:last-child:not(.sourceCode){margin-bottom:.5rem}.callout:not(.callout-titled) .callout-body>:first-child,.callout:not(.callout-titled) .callout-body>div>:first-child{margin-top:.25rem}.callout:not(.callout-titled) .callout-body>:last-child,.callout:not(.callout-titled) .callout-body>div>:last-child{margin-bottom:.2rem}.callout.callout-style-simple .callout-icon::before,.callout.callout-style-simple .callout-toggle::before{height:1rem;width:1rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.callout.callout-style-default .callout-icon::before,.callout.callout-style-default .callout-toggle::before{height:.9rem;width:.9rem;display:inline-block;content:"";background-repeat:no-repeat;background-size:.9rem .9rem}.callout.callout-style-default .callout-toggle::before{margin-top:5px}.callout .callout-btn-toggle .callout-toggle::before{transition:transform .2s linear}.callout .callout-header[aria-expanded=false] .callout-toggle::before{transform:rotate(-90deg)}.callout .callout-header[aria-expanded=true] .callout-toggle::before{transform:none}.callout.callout-style-simple:not(.no-icon) div.callout-icon-container{padding-top:.2em;padding-right:.55em}.callout.callout-style-default:not(.no-icon) div.callout-icon-container{padding-top:.1em;padding-right:.35em}.callout.callout-style-default:not(.no-icon) div.callout-title-container{margin-top:-1px}.callout.callout-style-default.callout-caution:not(.no-icon) div.callout-icon-container{padding-top:.3em;padding-right:.35em}.callout>.callout-body>.callout-icon-container>.no-icon,.callout>.callout-header>.callout-icon-container>.no-icon{display:none}div.callout.callout{border-left-color:#6c757d}div.callout.callout-style-default>.callout-header{background-color:#6c757d}div.callout-note.callout{border-left-color:#2780e3}div.callout-note.callout-style-default>.callout-header{background-color:#e9f2fc}div.callout-note:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-note.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-note .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-tip.callout{border-left-color:#3fb618}div.callout-tip.callout-style-default>.callout-header{background-color:#ecf8e8}div.callout-tip:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-tip.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-tip .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-warning.callout{border-left-color:#ff7518}div.callout-warning.callout-style-default>.callout-header{background-color:#fff1e8}div.callout-warning:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-warning.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-warning .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-caution.callout{border-left-color:#f0ad4e}div.callout-caution.callout-style-default>.callout-header{background-color:#fef7ed}div.callout-caution:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-caution.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-caution .callout-toggle::before{background-image:url('data:image/svg+xml,')}div.callout-important.callout{border-left-color:#ff0039}div.callout-important.callout-style-default>.callout-header{background-color:#ffe6eb}div.callout-important:not(.callout-titled) .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-important.callout-titled .callout-icon::before{background-image:url('data:image/svg+xml,');}div.callout-important .callout-toggle::before{background-image:url('data:image/svg+xml,')}.quarto-toggle-container{display:flex;align-items:center}.quarto-reader-toggle .bi::before,.quarto-color-scheme-toggle .bi::before{display:inline-block;height:1rem;width:1rem;content:"";background-repeat:no-repeat;background-size:1rem 1rem}.sidebar-navigation{padding-left:20px}.navbar .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,')}.navbar .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-color-scheme-toggle:not(.alternate) .bi::before{background-image:url('data:image/svg+xml,')}.sidebar-navigation .quarto-color-scheme-toggle.alternate .bi::before{background-image:url('data:image/svg+xml,')}.quarto-sidebar-toggle{border-color:#dee2e6;border-bottom-left-radius:.25rem;border-bottom-right-radius:.25rem;border-style:solid;border-width:1px;overflow:hidden;border-top-width:0px;padding-top:0px !important}.quarto-sidebar-toggle-title{cursor:pointer;padding-bottom:2px;margin-left:.25em;text-align:center;font-weight:400;font-size:.775em}#quarto-content .quarto-sidebar-toggle{background:#fafafa}#quarto-content .quarto-sidebar-toggle-title{color:#373a3c}.quarto-sidebar-toggle-icon{color:#dee2e6;margin-right:.5em;float:right;transition:transform .2s ease}.quarto-sidebar-toggle-icon::before{padding-top:5px}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-icon{transform:rotate(-180deg)}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-title{border-bottom:solid #dee2e6 1px}.quarto-sidebar-toggle-contents{background-color:#fff;padding-right:10px;padding-left:10px;margin-top:0px !important;transition:max-height .5s ease}.quarto-sidebar-toggle.expanded .quarto-sidebar-toggle-contents{padding-top:1em;padding-bottom:10px}.quarto-sidebar-toggle:not(.expanded) .quarto-sidebar-toggle-contents{padding-top:0px !important;padding-bottom:0px}nav[role=doc-toc]{z-index:1020}#quarto-sidebar>*,nav[role=doc-toc]>*{transition:opacity .1s ease,border .1s ease}#quarto-sidebar.slow>*,nav[role=doc-toc].slow>*{transition:opacity .4s ease,border .4s ease}.quarto-color-scheme-toggle:not(.alternate).top-right .bi::before{background-image:url('data:image/svg+xml,')}.quarto-color-scheme-toggle.alternate.top-right .bi::before{background-image:url('data:image/svg+xml,')}#quarto-appendix.default{border-top:1px solid #dee2e6}#quarto-appendix.default{background-color:#fff;padding-top:1.5em;margin-top:2em;z-index:998}#quarto-appendix.default .quarto-appendix-heading{margin-top:0;line-height:1.4em;font-weight:600;opacity:.9;border-bottom:none;margin-bottom:0}#quarto-appendix.default .footnotes ol,#quarto-appendix.default .footnotes ol li>p:last-of-type,#quarto-appendix.default .quarto-appendix-contents>p:last-of-type{margin-bottom:0}#quarto-appendix.default .quarto-appendix-secondary-label{margin-bottom:.4em}#quarto-appendix.default .quarto-appendix-bibtex{font-size:.7em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-bibtex code.sourceCode{white-space:pre-wrap}#quarto-appendix.default .quarto-appendix-citeas{font-size:.9em;padding:1em;border:solid 1px #dee2e6;margin-bottom:1em}#quarto-appendix.default .quarto-appendix-heading{font-size:1em !important}#quarto-appendix.default *[role=doc-endnotes]>ol,#quarto-appendix.default .quarto-appendix-contents>*:not(h2):not(.h2){font-size:.9em}#quarto-appendix.default section{padding-bottom:1.5em}#quarto-appendix.default section *[role=doc-endnotes],#quarto-appendix.default section>*:not(a){opacity:.9;word-wrap:break-word}.btn.btn-quarto,div.cell-output-display .btn-quarto{color:#cbcccc;background-color:#373a3c;border-color:#373a3c}.btn.btn-quarto:hover,div.cell-output-display .btn-quarto:hover{color:#cbcccc;background-color:#555859;border-color:#4b4e50}.btn-check:focus+.btn.btn-quarto,.btn.btn-quarto:focus,.btn-check:focus+div.cell-output-display .btn-quarto,div.cell-output-display .btn-quarto:focus{color:#cbcccc;background-color:#555859;border-color:#4b4e50;box-shadow:0 0 0 .25rem rgba(77,80,82,.5)}.btn-check:checked+.btn.btn-quarto,.btn-check:active+.btn.btn-quarto,.btn.btn-quarto:active,.btn.btn-quarto.active,.show>.btn.btn-quarto.dropdown-toggle,.btn-check:checked+div.cell-output-display .btn-quarto,.btn-check:active+div.cell-output-display .btn-quarto,div.cell-output-display .btn-quarto:active,div.cell-output-display .btn-quarto.active,.show>div.cell-output-display .btn-quarto.dropdown-toggle{color:#fff;background-color:#5f6163;border-color:#4b4e50}.btn-check:checked+.btn.btn-quarto:focus,.btn-check:active+.btn.btn-quarto:focus,.btn.btn-quarto:active:focus,.btn.btn-quarto.active:focus,.show>.btn.btn-quarto.dropdown-toggle:focus,.btn-check:checked+div.cell-output-display .btn-quarto:focus,.btn-check:active+div.cell-output-display .btn-quarto:focus,div.cell-output-display .btn-quarto:active:focus,div.cell-output-display .btn-quarto.active:focus,.show>div.cell-output-display .btn-quarto.dropdown-toggle:focus{box-shadow:0 0 0 .25rem rgba(77,80,82,.5)}.btn.btn-quarto:disabled,.btn.btn-quarto.disabled,div.cell-output-display .btn-quarto:disabled,div.cell-output-display .btn-quarto.disabled{color:#fff;background-color:#373a3c;border-color:#373a3c}nav.quarto-secondary-nav.color-navbar{background-color:#fff;color:#595959}nav.quarto-secondary-nav.color-navbar h1,nav.quarto-secondary-nav.color-navbar .h1,nav.quarto-secondary-nav.color-navbar .quarto-btn-toggle{color:#595959}@media(max-width: 991.98px){body.nav-sidebar .quarto-title-banner{margin-bottom:0;padding-bottom:0}body.nav-sidebar #title-block-header{margin-block-end:0}}p.subtitle{margin-top:.25em;margin-bottom:.5em}code a:any-link{color:inherit;text-decoration-color:#6c757d}/*! light */div.observablehq table thead tr th{background-color:var(--bs-body-bg)}input,button,select,optgroup,textarea{background-color:var(--bs-body-bg)}.code-annotated .code-copy-button{margin-right:1.25em;margin-top:0;padding-bottom:0;padding-top:3px}.code-annotation-gutter-bg{background-color:#fff}.code-annotation-gutter{background-color:rgba(233,236,239,.65)}.code-annotation-gutter,.code-annotation-gutter-bg{height:100%;width:calc(20px + .5em);position:absolute;top:0;right:0}dl.code-annotation-container-grid dt{margin-right:1em;margin-top:.25rem}dl.code-annotation-container-grid dt{font-family:var(--bs-font-monospace);color:#4f5457;border:solid #4f5457 1px;border-radius:50%;height:22px;width:22px;line-height:22px;font-size:11px;text-align:center;vertical-align:middle;text-decoration:none}dl.code-annotation-container-grid dt[data-target-cell]{cursor:pointer}dl.code-annotation-container-grid dt[data-target-cell].code-annotation-active{color:#fff;border:solid #aaa 1px;background-color:#aaa}pre.code-annotation-code{padding-top:0;padding-bottom:0}pre.code-annotation-code code{z-index:3}#code-annotation-line-highlight-gutter{width:100%;border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}#code-annotation-line-highlight{margin-left:-4em;width:calc(100% + 4em);border-top:solid rgba(170,170,170,.2666666667) 1px;border-bottom:solid rgba(170,170,170,.2666666667) 1px;z-index:2;background-color:rgba(170,170,170,.1333333333)}code.sourceCode .code-annotation-anchor.code-annotation-active{background-color:var(--quarto-hl-normal-color, #aaaaaa);border:solid var(--quarto-hl-normal-color, #aaaaaa) 1px;color:#e9ecef;font-weight:bolder}code.sourceCode .code-annotation-anchor{font-family:var(--bs-font-monospace);color:var(--quarto-hl-co-color);border:solid var(--quarto-hl-co-color) 1px;border-radius:50%;height:18px;width:18px;font-size:9px;margin-top:2px}code.sourceCode button.code-annotation-anchor{padding:2px}code.sourceCode a.code-annotation-anchor{line-height:18px;text-align:center;vertical-align:middle;cursor:default;text-decoration:none}@media print{.page-columns .column-screen-inset{grid-column:page-start-inset/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset table{background:#fff}.page-columns .column-screen-inset-left{grid-column:page-start-inset/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-left table{background:#fff}.page-columns .column-screen-inset-right{grid-column:body-content-start/page-end-inset;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-inset-right table{background:#fff}.page-columns .column-screen{grid-column:page-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen table{background:#fff}.page-columns .column-screen-left{grid-column:page-start/body-content-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-left table{background:#fff}.page-columns .column-screen-right{grid-column:body-content-start/page-end;z-index:998;transform:translate3d(0, 0, 0)}.page-columns .column-screen-right table{background:#fff}.page-columns .column-screen-inset-shaded{grid-column:page-start-inset/page-end-inset;padding:1em;background:#f8f9fa;z-index:998;transform:translate3d(0, 0, 0);margin-bottom:1em}}.quarto-video{margin-bottom:1em}.table>thead{border-top-width:0}.table>:not(caption)>*:not(:last-child)>*{border-bottom-color:#ebeced;border-bottom-style:solid;border-bottom-width:1px}.table>:not(:first-child){border-top:1px solid #b6babc;border-bottom:1px solid inherit}.table tbody{border-bottom-color:#b6babc}a.external:after{display:inline-block;height:.75rem;width:.75rem;margin-bottom:.15em;margin-left:.25em;content:"";vertical-align:-0.125em;background-image:url('data:image/svg+xml,');background-repeat:no-repeat;background-size:.75rem .75rem}div.sourceCode code a.external:after{content:none}a.external:after:hover{cursor:pointer}.quarto-ext-icon{display:inline-block;font-size:.75em;padding-left:.3em}.code-with-filename .code-with-filename-file{margin-bottom:0;padding-bottom:2px;padding-top:2px;padding-left:.7em;border:var(--quarto-border-width) solid var(--quarto-border-color);border-radius:var(--quarto-border-radius);border-bottom:0;border-bottom-left-radius:0%;border-bottom-right-radius:0%}.code-with-filename div.sourceCode,.reveal .code-with-filename div.sourceCode{margin-top:0;border-top-left-radius:0%;border-top-right-radius:0%}.code-with-filename .code-with-filename-file pre{margin-bottom:0}.code-with-filename .code-with-filename-file,.code-with-filename .code-with-filename-file pre{background-color:rgba(219,219,219,.8)}.quarto-dark .code-with-filename .code-with-filename-file,.quarto-dark .code-with-filename .code-with-filename-file pre{background-color:#555}.code-with-filename .code-with-filename-file strong{font-weight:400}.quarto-title-banner{margin-bottom:1em;color:#595959;background:#fff}.quarto-title-banner .code-tools-button{color:#8c8c8c}.quarto-title-banner .code-tools-button:hover{color:#595959}.quarto-title-banner .code-tools-button>.bi::before{background-image:url('data:image/svg+xml,')}.quarto-title-banner .code-tools-button:hover>.bi::before{background-image:url('data:image/svg+xml,')}.quarto-title-banner .quarto-title .title{font-weight:600}.quarto-title-banner .quarto-categories{margin-top:.75em}@media(min-width: 992px){.quarto-title-banner{padding-top:2.5em;padding-bottom:2.5em}}@media(max-width: 991.98px){.quarto-title-banner{padding-top:1em;padding-bottom:1em}}main.quarto-banner-title-block>section:first-child>h2,main.quarto-banner-title-block>section:first-child>.h2,main.quarto-banner-title-block>section:first-child>h3,main.quarto-banner-title-block>section:first-child>.h3,main.quarto-banner-title-block>section:first-child>h4,main.quarto-banner-title-block>section:first-child>.h4{margin-top:0}.quarto-title .quarto-categories{display:flex;flex-wrap:wrap;row-gap:.5em;column-gap:.4em;padding-bottom:.5em;margin-top:.75em}.quarto-title .quarto-categories .quarto-category{padding:.25em .75em;font-size:.65em;text-transform:uppercase;border:solid 1px;border-radius:.25rem;opacity:.6}.quarto-title .quarto-categories .quarto-category a{color:inherit}#title-block-header.quarto-title-block.default .quarto-title-meta{display:grid;grid-template-columns:repeat(2, 1fr)}#title-block-header.quarto-title-block.default .quarto-title .title{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-author-orcid img{margin-top:-5px}#title-block-header.quarto-title-block.default .quarto-description p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p,#title-block-header.quarto-title-block.default .quarto-title-authors p,#title-block-header.quarto-title-block.default .quarto-title-affiliations p{margin-bottom:.1em}#title-block-header.quarto-title-block.default .quarto-title-meta-heading{text-transform:uppercase;margin-top:1em;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-contents{font-size:.9em}#title-block-header.quarto-title-block.default .quarto-title-meta-contents a{color:#373a3c}#title-block-header.quarto-title-block.default .quarto-title-meta-contents p.affiliation:last-of-type{margin-bottom:.7em}#title-block-header.quarto-title-block.default p.affiliation{margin-bottom:.1em}#title-block-header.quarto-title-block.default .description,#title-block-header.quarto-title-block.default .abstract{margin-top:0}#title-block-header.quarto-title-block.default .description>p,#title-block-header.quarto-title-block.default .abstract>p{font-size:.9em}#title-block-header.quarto-title-block.default .description>p:last-of-type,#title-block-header.quarto-title-block.default .abstract>p:last-of-type{margin-bottom:0}#title-block-header.quarto-title-block.default .description .abstract-title,#title-block-header.quarto-title-block.default .abstract .abstract-title{margin-top:1em;text-transform:uppercase;font-size:.8em;opacity:.8;font-weight:400}#title-block-header.quarto-title-block.default .quarto-title-meta-author{display:grid;grid-template-columns:1fr 1fr}.quarto-title-tools-only{display:flex;justify-content:right}body{-webkit-font-smoothing:antialiased}.badge.bg-light{color:#373a3c}.progress .progress-bar{font-size:8px;line-height:8px}a{text-decoration:none}#title-block-header.quarto-title-block.default .quarto-title .title{margin-bottom:.5em}h1,.h1{color:#ca225e;font-weight:400}h1.title,.title.h1{padding-top:20px}h2,.h2{text-transform:uppercase;font-size:1.3em;letter-spacing:1.5pt;color:#1a162d;font-weight:400}.navbar{border-bottom:1px solid;border-color:#ddd;text-transform:uppercase;letter-spacing:2pt;font-size:12.8px}.navbar .navbar-brand{color:#ca225e;font-family:monospace;text-transform:lowercase}.navbar .navbar-nav{align-items:center}.navbar .navbar-nav .nav-link:hover,.navbar .navbar-dark .navbar-nav .nav-link:focus{color:#ca225e}.nav-footer{height:92px;line-height:20px;font-size:17px;padding:30px 5%;align-items:center}.sidebar-item .sidebar-item-container{padding-bottom:10px}.quarto-listing-category-title{color:#ca225e}.quarto-listing-category .category{padding-bottom:5px}.quarto-listing-category .category:hover{color:rgba(202,34,94,.8)}.quarto-listing-category .category.active{color:#ca225e}#TOC:not(.toc-active){padding:1em;position:relative}#TOC:not(.toc-active) a{text-transform:uppercase;letter-spacing:2pt;font-size:.8rem !important;font-weight:bold;color:rgba(202,34,94,.8)}#TOC:not(.toc-active) ul{margin-bottom:0px}#TOC:not(.toc-active) ul li{padding-bottom:.5em}#TOC:not(.toc-active) ul li::marker{color:#ca225e}#TOC:not(.toc-active):after,#TOC:not(.toc-active):before{border:1pt solid rgba(202,34,94,.2);bottom:0;content:"";position:absolute;top:0;width:2.5%}#TOC:not(.toc-active):after,#TOC:not(.toc-active):after{border-left:none;right:0}#TOC:not(.toc-active):before,#TOC:not(.toc-active):before{border-right:none;left:0}#session-info div pre code{color:#5e5e5e;background-color:rgba(233,236,239,.65);border:1px solid rgba(233,236,239,.65);border-radius:.25rem}.cell-output-display table{border-collapse:collapse;border-spacing:0;font-weight:inherit;width:100%;text-align:left;font-size:.87rem;max-width:70%}.cell-output-display table thead{font-weight:600;background-color:rgba(202,34,94,.15)}.cell-output-display table thead tr th{padding:15px 13px}.cell-output-display table thead tr th:last-child{padding:15px 0;padding-right:15px}.cell-output-display table tbody tr:nth-of-type(even)>*{background-color:rgba(202,34,94,.03);--bs-table-accent-bg: rgba($theme-cranberry, 0.03)}.cell-output-display table tbody tr:nth-of-type(odd)>*{background-color:#fff;--bs-table-accent-bg: $theme-white}.cell-output-display table tbody tr:last-of-type{border-bottom:1px solid #edf1f8}.cell-output-display table tbody tr td{padding:15px}.cell-output-display table tbody tr td:last-child{padding:15px 0;padding-right:15px}.quarto-title .quarto-categories .quarto-category:hover{border-color:rgba(202,34,94,.5)}.quarto-title .quarto-categories .quarto-category:hover a{color:#ca225e}.quarto-grid-item .listing-categories .listing-category:hover{border-color:rgba(202,34,94,.5);color:#ca225e}.package-section .packages .package{position:relative;height:200px;overflow-y:hidden;background-size:190px 215px;background-repeat:no-repeat;background-position:left top;background-image:url(images/tidy-packages-back-01.svg)}.package-section .packages .package:first-of-type{background-image:url(images/tidy-packages-back-02.svg)}.package-section .packages .package:last-of-type{background-image:url(images/tidy-packages-back-03.svg)}.package-section .packages .package .package-image{position:absolute;top:6px;left:62px;width:120px;height:139px;border:none}.package-section .packages .package .package-info{margin:20px 0 0 210px}.package-info h3,.package-info .h3{padding-top:72px;margin-top:-72px}@media screen and (max-width: 767px){.package-section .package-section-info{margin-bottom:20px}.package-section .packages .package,.package-section .packages .package:first-of-type,.package-section .packages .package:last-of-type{background-image:none}.package-section .packages .package{height:auto;overflow-y:auto;margin-bottom:20px}.package-section .packages .package .package-image{position:static;float:left;margin-right:20px}.package-section .packages .package .package-info{margin:0}}.section.resources{margin-top:30px}.section.resources .resourcesTitle{color:#ca225e;text-transform:uppercase;letter-spacing:2pt;font-size:.8rem !important;font-weight:bold;margin-bottom:10px}.section.resources .event{border-top:7pt solid rgba(161,158,169,.2117647059);background-color:rgba(234,176,196,.1019607843);margin-top:0px;margin-bottom:30px;background-image:none;background-size:40px 40px;padding:10px 20px 20px 20px;font-size:20px;font-weight:300;line-height:25px}.section.resources .event a{color:#1a162d}.section.resources .event .eventDetails{font-size:13px;line-height:18px}#FrontPage .band{position:relative;padding:0 5%}#FrontPage .band .blurb{font-weight:300;display:inline-block;min-height:160px;font-size:24px;line-height:40px;max-width:48%}#FrontPage .event{margin-top:70px;padding:40px;background-color:#fff;max-width:400px;background-repeat:no-repeat;background-position:right top;background-size:75px 75px;border:1px solid rgba(234,176,196,.3215686275);background-color:#fcfcfc;color:#272626;line-height:30px;font-size:18px}#FrontPage .hexBadges{display:inline-block;position:relative;width:500px;margin-top:-130px;height:600px}#FrontPage .hexBadges img{width:120px;height:139px;position:absolute;-webkit-transition-property:opacity;-moz-transition-property:opacity;-o-transition-property:opacity;transition-property:opacity;-webkit-transition-duration:.25s;-moz-transition-duration:.25s;-o-transition-duration:.25s;transition-duration:.25s}#FrontPage .hexBadges img.r0.c0,#FrontPage .hexBadges img.r2.c0,#FrontPage .hexBadges img.r4.c0{left:0}#FrontPage .hexBadges img.r0.c1,#FrontPage .hexBadges img.r2.c1,#FrontPage .hexBadges img.r4.c1{left:125px}#FrontPage .hexBadges img.r0.c2,#FrontPage .hexBadges img.r2.c2,#FrontPage .hexBadges img.r4.c2{left:250px}#FrontPage .hexBadges img.r0.c3,#FrontPage .hexBadges img.r2.c3,#FrontPage .hexBadges img.r4.c3{left:375px}#FrontPage .hexBadges img.r1.c0,#FrontPage .hexBadges img.r3.c0,#FrontPage .hexBadges img.r5.c0{left:62px}#FrontPage .hexBadges img.r1.c1,#FrontPage .hexBadges img.r3.c1,#FrontPage .hexBadges img.r5.c1{left:187px}#FrontPage .hexBadges img.r1.c2,#FrontPage .hexBadges img.r3.c2,#FrontPage .hexBadges img.r5.c2{left:312px}#FrontPage .hexBadges img.r1.c3,#FrontPage .hexBadges img.r3.c3,#FrontPage .hexBadges img.r5.c3{left:437px}#FrontPage .hexBadges img.r0{top:0px}#FrontPage .hexBadges img.r1{top:107.5px}#FrontPage .hexBadges img.r2{top:215px}#FrontPage .hexBadges img.r3{top:322.5px}#FrontPage .hexBadges img.r4{top:430px}#FrontPage .hexBadges img.r5{top:537.5px}#FrontPage .bee1{display:block;position:absolute;bottom:10px;left:10px;width:100px;height:auto}#FrontPage .band.first{height:690px}#FrontPage .band.first span.tagline{text-transform:uppercase;font-size:1.2rem !important;font-weight:bold;letter-spacing:2px}#FrontPage .band.first .bandContent{max-width:1100px;margin-top:-140px;margin-left:auto;margin-right:auto;background-color:#fff;padding-top:220px;height:100%;background-image:url(images/tidy-back-01.svg);background-repeat:no-repeat;background-size:570px 690px;display:-webkit-flex;-webkit-justify-content:space-between}#FrontPage .band.second{height:952px;background-color:#fcfcfc;border-bottom:solid 1pt #d9d9d9;border-top:solid 1pt #d9d9d9}#FrontPage .band.second span.tagline{text-transform:uppercase;font-size:1.2rem !important;font-weight:bold;letter-spacing:2px}#FrontPage .band.second .bandContent{max-width:1100px;margin-left:auto;margin-right:auto;background-color:#fcfcfc;color:#000;padding-top:120px;padding-bottom:100px;height:100%;background-image:url(images/tidy-back-02.svg);background-repeat:no-repeat;background-size:700px 952px;display:-webkit-flex;-webkit-justify-content:space-between}#FrontPage .band.third{background-color:#fff;height:450px}#FrontPage .band.third .bandContent{max-width:1100px;margin:0 auto;height:450px;padding-top:100px;padding-bottom:100px;background-image:url(images/tidy-back-03.svg);background-repeat:no-repeat;background-size:650px 450px;display:-webkit-flex;-webkit-justify-content:space-between}#FrontPage .band.third span.tagline{text-transform:uppercase;font-size:1.2rem !important;font-weight:bold;letter-spacing:2px}@media screen and (max-width: 1023px){#FrontPage .hexBadges{width:350px}#FrontPage .hexBadges img.r0.c2,#FrontPage .hexBadges img.r2.c2,#FrontPage .hexBadges img.r4.c2{left:125px}#FrontPage .hexBadges img.r1.c2,#FrontPage .hexBadges img.r3.c2,#FrontPage .hexBadges img.r5.c2{left:187px}#FrontPage .band.first{height:auto}#FrontPage .band.first .bandContent{background-image:none;padding-bottom:100px}#FrontPage .band.second,#FrontPage .band.third{height:auto}#FrontPage .band.second .bandContent,#FrontPage .band.third .bandContent{display:block;height:auto;background-image:none;padding-bottom:100px}#FrontPage .band.second .bandContent .blurb,#FrontPage .band.third .bandContent .blurb{max-width:100%}#FrontPage .bookCover{margin-left:0px}}@media screen and (max-width: 767px){#FrontPage .band.first .bandContent{display:block;height:auto}#FrontPage .band.first .bandContent .blurb{max-width:100%}}/*# sourceMappingURL=d6b77e37a12f878a50f9f8a85e535bdc.css.map */ diff --git a/docs/site_libs/bootstrap/bootstrap.min.js b/docs/site_libs/bootstrap/bootstrap.min.js new file mode 100644 index 00000000..cc0a2556 --- /dev/null +++ b/docs/site_libs/bootstrap/bootstrap.min.js @@ -0,0 +1,7 @@ +/*! + * Bootstrap v5.1.3 (https://getbootstrap.com/) + * Copyright 2011-2021 The Bootstrap Authors (https://github.com/twbs/bootstrap/graphs/contributors) + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) + */ +!function(t,e){"object"==typeof exports&&"undefined"!=typeof module?module.exports=e():"function"==typeof define&&define.amd?define(e):(t="undefined"!=typeof globalThis?globalThis:t||self).bootstrap=e()}(this,(function(){"use strict";const t="transitionend",e=t=>{let e=t.getAttribute("data-bs-target");if(!e||"#"===e){let i=t.getAttribute("href");if(!i||!i.includes("#")&&!i.startsWith("."))return null;i.includes("#")&&!i.startsWith("#")&&(i=`#${i.split("#")[1]}`),e=i&&"#"!==i?i.trim():null}return e},i=t=>{const i=e(t);return i&&document.querySelector(i)?i:null},n=t=>{const i=e(t);return i?document.querySelector(i):null},s=e=>{e.dispatchEvent(new Event(t))},o=t=>!(!t||"object"!=typeof t)&&(void 0!==t.jquery&&(t=t[0]),void 0!==t.nodeType),r=t=>o(t)?t.jquery?t[0]:t:"string"==typeof t&&t.length>0?document.querySelector(t):null,a=(t,e,i)=>{Object.keys(i).forEach((n=>{const s=i[n],r=e[n],a=r&&o(r)?"element":null==(l=r)?`${l}`:{}.toString.call(l).match(/\s([a-z]+)/i)[1].toLowerCase();var l;if(!new RegExp(s).test(a))throw new TypeError(`${t.toUpperCase()}: Option "${n}" provided type "${a}" but expected type "${s}".`)}))},l=t=>!(!o(t)||0===t.getClientRects().length)&&"visible"===getComputedStyle(t).getPropertyValue("visibility"),c=t=>!t||t.nodeType!==Node.ELEMENT_NODE||!!t.classList.contains("disabled")||(void 0!==t.disabled?t.disabled:t.hasAttribute("disabled")&&"false"!==t.getAttribute("disabled")),h=t=>{if(!document.documentElement.attachShadow)return null;if("function"==typeof t.getRootNode){const e=t.getRootNode();return e instanceof ShadowRoot?e:null}return t instanceof ShadowRoot?t:t.parentNode?h(t.parentNode):null},d=()=>{},u=t=>{t.offsetHeight},f=()=>{const{jQuery:t}=window;return t&&!document.body.hasAttribute("data-bs-no-jquery")?t:null},p=[],m=()=>"rtl"===document.documentElement.dir,g=t=>{var e;e=()=>{const e=f();if(e){const i=t.NAME,n=e.fn[i];e.fn[i]=t.jQueryInterface,e.fn[i].Constructor=t,e.fn[i].noConflict=()=>(e.fn[i]=n,t.jQueryInterface)}},"loading"===document.readyState?(p.length||document.addEventListener("DOMContentLoaded",(()=>{p.forEach((t=>t()))})),p.push(e)):e()},_=t=>{"function"==typeof t&&t()},b=(e,i,n=!0)=>{if(!n)return void _(e);const o=(t=>{if(!t)return 0;let{transitionDuration:e,transitionDelay:i}=window.getComputedStyle(t);const n=Number.parseFloat(e),s=Number.parseFloat(i);return n||s?(e=e.split(",")[0],i=i.split(",")[0],1e3*(Number.parseFloat(e)+Number.parseFloat(i))):0})(i)+5;let r=!1;const a=({target:n})=>{n===i&&(r=!0,i.removeEventListener(t,a),_(e))};i.addEventListener(t,a),setTimeout((()=>{r||s(i)}),o)},v=(t,e,i,n)=>{let s=t.indexOf(e);if(-1===s)return t[!i&&n?t.length-1:0];const o=t.length;return s+=i?1:-1,n&&(s=(s+o)%o),t[Math.max(0,Math.min(s,o-1))]},y=/[^.]*(?=\..*)\.|.*/,w=/\..*/,E=/::\d+$/,A={};let T=1;const O={mouseenter:"mouseover",mouseleave:"mouseout"},C=/^(mouseenter|mouseleave)/i,k=new Set(["click","dblclick","mouseup","mousedown","contextmenu","mousewheel","DOMMouseScroll","mouseover","mouseout","mousemove","selectstart","selectend","keydown","keypress","keyup","orientationchange","touchstart","touchmove","touchend","touchcancel","pointerdown","pointermove","pointerup","pointerleave","pointercancel","gesturestart","gesturechange","gestureend","focus","blur","change","reset","select","submit","focusin","focusout","load","unload","beforeunload","resize","move","DOMContentLoaded","readystatechange","error","abort","scroll"]);function L(t,e){return e&&`${e}::${T++}`||t.uidEvent||T++}function x(t){const e=L(t);return t.uidEvent=e,A[e]=A[e]||{},A[e]}function D(t,e,i=null){const n=Object.keys(t);for(let s=0,o=n.length;sfunction(e){if(!e.relatedTarget||e.relatedTarget!==e.delegateTarget&&!e.delegateTarget.contains(e.relatedTarget))return t.call(this,e)};n?n=t(n):i=t(i)}const[o,r,a]=S(e,i,n),l=x(t),c=l[a]||(l[a]={}),h=D(c,r,o?i:null);if(h)return void(h.oneOff=h.oneOff&&s);const d=L(r,e.replace(y,"")),u=o?function(t,e,i){return function n(s){const o=t.querySelectorAll(e);for(let{target:r}=s;r&&r!==this;r=r.parentNode)for(let a=o.length;a--;)if(o[a]===r)return s.delegateTarget=r,n.oneOff&&j.off(t,s.type,e,i),i.apply(r,[s]);return null}}(t,i,n):function(t,e){return function i(n){return n.delegateTarget=t,i.oneOff&&j.off(t,n.type,e),e.apply(t,[n])}}(t,i);u.delegationSelector=o?i:null,u.originalHandler=r,u.oneOff=s,u.uidEvent=d,c[d]=u,t.addEventListener(a,u,o)}function I(t,e,i,n,s){const o=D(e[i],n,s);o&&(t.removeEventListener(i,o,Boolean(s)),delete e[i][o.uidEvent])}function P(t){return t=t.replace(w,""),O[t]||t}const j={on(t,e,i,n){N(t,e,i,n,!1)},one(t,e,i,n){N(t,e,i,n,!0)},off(t,e,i,n){if("string"!=typeof e||!t)return;const[s,o,r]=S(e,i,n),a=r!==e,l=x(t),c=e.startsWith(".");if(void 0!==o){if(!l||!l[r])return;return void I(t,l,r,o,s?i:null)}c&&Object.keys(l).forEach((i=>{!function(t,e,i,n){const s=e[i]||{};Object.keys(s).forEach((o=>{if(o.includes(n)){const n=s[o];I(t,e,i,n.originalHandler,n.delegationSelector)}}))}(t,l,i,e.slice(1))}));const h=l[r]||{};Object.keys(h).forEach((i=>{const n=i.replace(E,"");if(!a||e.includes(n)){const e=h[i];I(t,l,r,e.originalHandler,e.delegationSelector)}}))},trigger(t,e,i){if("string"!=typeof e||!t)return null;const n=f(),s=P(e),o=e!==s,r=k.has(s);let a,l=!0,c=!0,h=!1,d=null;return o&&n&&(a=n.Event(e,i),n(t).trigger(a),l=!a.isPropagationStopped(),c=!a.isImmediatePropagationStopped(),h=a.isDefaultPrevented()),r?(d=document.createEvent("HTMLEvents"),d.initEvent(s,l,!0)):d=new CustomEvent(e,{bubbles:l,cancelable:!0}),void 0!==i&&Object.keys(i).forEach((t=>{Object.defineProperty(d,t,{get:()=>i[t]})})),h&&d.preventDefault(),c&&t.dispatchEvent(d),d.defaultPrevented&&void 0!==a&&a.preventDefault(),d}},M=new Map,H={set(t,e,i){M.has(t)||M.set(t,new Map);const n=M.get(t);n.has(e)||0===n.size?n.set(e,i):console.error(`Bootstrap doesn't allow more than one instance per element. Bound instance: ${Array.from(n.keys())[0]}.`)},get:(t,e)=>M.has(t)&&M.get(t).get(e)||null,remove(t,e){if(!M.has(t))return;const i=M.get(t);i.delete(e),0===i.size&&M.delete(t)}};class B{constructor(t){(t=r(t))&&(this._element=t,H.set(this._element,this.constructor.DATA_KEY,this))}dispose(){H.remove(this._element,this.constructor.DATA_KEY),j.off(this._element,this.constructor.EVENT_KEY),Object.getOwnPropertyNames(this).forEach((t=>{this[t]=null}))}_queueCallback(t,e,i=!0){b(t,e,i)}static getInstance(t){return H.get(r(t),this.DATA_KEY)}static getOrCreateInstance(t,e={}){return this.getInstance(t)||new this(t,"object"==typeof e?e:null)}static get VERSION(){return"5.1.3"}static get NAME(){throw new Error('You have to implement the static method "NAME", for each component!')}static get DATA_KEY(){return`bs.${this.NAME}`}static get EVENT_KEY(){return`.${this.DATA_KEY}`}}const R=(t,e="hide")=>{const i=`click.dismiss${t.EVENT_KEY}`,s=t.NAME;j.on(document,i,`[data-bs-dismiss="${s}"]`,(function(i){if(["A","AREA"].includes(this.tagName)&&i.preventDefault(),c(this))return;const o=n(this)||this.closest(`.${s}`);t.getOrCreateInstance(o)[e]()}))};class W extends B{static get NAME(){return"alert"}close(){if(j.trigger(this._element,"close.bs.alert").defaultPrevented)return;this._element.classList.remove("show");const t=this._element.classList.contains("fade");this._queueCallback((()=>this._destroyElement()),this._element,t)}_destroyElement(){this._element.remove(),j.trigger(this._element,"closed.bs.alert"),this.dispose()}static jQueryInterface(t){return this.each((function(){const e=W.getOrCreateInstance(this);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}R(W,"close"),g(W);const $='[data-bs-toggle="button"]';class z extends B{static get NAME(){return"button"}toggle(){this._element.setAttribute("aria-pressed",this._element.classList.toggle("active"))}static jQueryInterface(t){return this.each((function(){const e=z.getOrCreateInstance(this);"toggle"===t&&e[t]()}))}}function q(t){return"true"===t||"false"!==t&&(t===Number(t).toString()?Number(t):""===t||"null"===t?null:t)}function F(t){return t.replace(/[A-Z]/g,(t=>`-${t.toLowerCase()}`))}j.on(document,"click.bs.button.data-api",$,(t=>{t.preventDefault();const e=t.target.closest($);z.getOrCreateInstance(e).toggle()})),g(z);const U={setDataAttribute(t,e,i){t.setAttribute(`data-bs-${F(e)}`,i)},removeDataAttribute(t,e){t.removeAttribute(`data-bs-${F(e)}`)},getDataAttributes(t){if(!t)return{};const e={};return Object.keys(t.dataset).filter((t=>t.startsWith("bs"))).forEach((i=>{let n=i.replace(/^bs/,"");n=n.charAt(0).toLowerCase()+n.slice(1,n.length),e[n]=q(t.dataset[i])})),e},getDataAttribute:(t,e)=>q(t.getAttribute(`data-bs-${F(e)}`)),offset(t){const e=t.getBoundingClientRect();return{top:e.top+window.pageYOffset,left:e.left+window.pageXOffset}},position:t=>({top:t.offsetTop,left:t.offsetLeft})},V={find:(t,e=document.documentElement)=>[].concat(...Element.prototype.querySelectorAll.call(e,t)),findOne:(t,e=document.documentElement)=>Element.prototype.querySelector.call(e,t),children:(t,e)=>[].concat(...t.children).filter((t=>t.matches(e))),parents(t,e){const i=[];let n=t.parentNode;for(;n&&n.nodeType===Node.ELEMENT_NODE&&3!==n.nodeType;)n.matches(e)&&i.push(n),n=n.parentNode;return i},prev(t,e){let i=t.previousElementSibling;for(;i;){if(i.matches(e))return[i];i=i.previousElementSibling}return[]},next(t,e){let i=t.nextElementSibling;for(;i;){if(i.matches(e))return[i];i=i.nextElementSibling}return[]},focusableChildren(t){const e=["a","button","input","textarea","select","details","[tabindex]",'[contenteditable="true"]'].map((t=>`${t}:not([tabindex^="-"])`)).join(", ");return this.find(e,t).filter((t=>!c(t)&&l(t)))}},K="carousel",X={interval:5e3,keyboard:!0,slide:!1,pause:"hover",wrap:!0,touch:!0},Y={interval:"(number|boolean)",keyboard:"boolean",slide:"(boolean|string)",pause:"(string|boolean)",wrap:"boolean",touch:"boolean"},Q="next",G="prev",Z="left",J="right",tt={ArrowLeft:J,ArrowRight:Z},et="slid.bs.carousel",it="active",nt=".active.carousel-item";class st extends B{constructor(t,e){super(t),this._items=null,this._interval=null,this._activeElement=null,this._isPaused=!1,this._isSliding=!1,this.touchTimeout=null,this.touchStartX=0,this.touchDeltaX=0,this._config=this._getConfig(e),this._indicatorsElement=V.findOne(".carousel-indicators",this._element),this._touchSupported="ontouchstart"in document.documentElement||navigator.maxTouchPoints>0,this._pointerEvent=Boolean(window.PointerEvent),this._addEventListeners()}static get Default(){return X}static get NAME(){return K}next(){this._slide(Q)}nextWhenVisible(){!document.hidden&&l(this._element)&&this.next()}prev(){this._slide(G)}pause(t){t||(this._isPaused=!0),V.findOne(".carousel-item-next, .carousel-item-prev",this._element)&&(s(this._element),this.cycle(!0)),clearInterval(this._interval),this._interval=null}cycle(t){t||(this._isPaused=!1),this._interval&&(clearInterval(this._interval),this._interval=null),this._config&&this._config.interval&&!this._isPaused&&(this._updateInterval(),this._interval=setInterval((document.visibilityState?this.nextWhenVisible:this.next).bind(this),this._config.interval))}to(t){this._activeElement=V.findOne(nt,this._element);const e=this._getItemIndex(this._activeElement);if(t>this._items.length-1||t<0)return;if(this._isSliding)return void j.one(this._element,et,(()=>this.to(t)));if(e===t)return this.pause(),void this.cycle();const i=t>e?Q:G;this._slide(i,this._items[t])}_getConfig(t){return t={...X,...U.getDataAttributes(this._element),..."object"==typeof t?t:{}},a(K,t,Y),t}_handleSwipe(){const t=Math.abs(this.touchDeltaX);if(t<=40)return;const e=t/this.touchDeltaX;this.touchDeltaX=0,e&&this._slide(e>0?J:Z)}_addEventListeners(){this._config.keyboard&&j.on(this._element,"keydown.bs.carousel",(t=>this._keydown(t))),"hover"===this._config.pause&&(j.on(this._element,"mouseenter.bs.carousel",(t=>this.pause(t))),j.on(this._element,"mouseleave.bs.carousel",(t=>this.cycle(t)))),this._config.touch&&this._touchSupported&&this._addTouchEventListeners()}_addTouchEventListeners(){const t=t=>this._pointerEvent&&("pen"===t.pointerType||"touch"===t.pointerType),e=e=>{t(e)?this.touchStartX=e.clientX:this._pointerEvent||(this.touchStartX=e.touches[0].clientX)},i=t=>{this.touchDeltaX=t.touches&&t.touches.length>1?0:t.touches[0].clientX-this.touchStartX},n=e=>{t(e)&&(this.touchDeltaX=e.clientX-this.touchStartX),this._handleSwipe(),"hover"===this._config.pause&&(this.pause(),this.touchTimeout&&clearTimeout(this.touchTimeout),this.touchTimeout=setTimeout((t=>this.cycle(t)),500+this._config.interval))};V.find(".carousel-item img",this._element).forEach((t=>{j.on(t,"dragstart.bs.carousel",(t=>t.preventDefault()))})),this._pointerEvent?(j.on(this._element,"pointerdown.bs.carousel",(t=>e(t))),j.on(this._element,"pointerup.bs.carousel",(t=>n(t))),this._element.classList.add("pointer-event")):(j.on(this._element,"touchstart.bs.carousel",(t=>e(t))),j.on(this._element,"touchmove.bs.carousel",(t=>i(t))),j.on(this._element,"touchend.bs.carousel",(t=>n(t))))}_keydown(t){if(/input|textarea/i.test(t.target.tagName))return;const e=tt[t.key];e&&(t.preventDefault(),this._slide(e))}_getItemIndex(t){return this._items=t&&t.parentNode?V.find(".carousel-item",t.parentNode):[],this._items.indexOf(t)}_getItemByOrder(t,e){const i=t===Q;return v(this._items,e,i,this._config.wrap)}_triggerSlideEvent(t,e){const i=this._getItemIndex(t),n=this._getItemIndex(V.findOne(nt,this._element));return j.trigger(this._element,"slide.bs.carousel",{relatedTarget:t,direction:e,from:n,to:i})}_setActiveIndicatorElement(t){if(this._indicatorsElement){const e=V.findOne(".active",this._indicatorsElement);e.classList.remove(it),e.removeAttribute("aria-current");const i=V.find("[data-bs-target]",this._indicatorsElement);for(let e=0;e{j.trigger(this._element,et,{relatedTarget:o,direction:d,from:s,to:r})};if(this._element.classList.contains("slide")){o.classList.add(h),u(o),n.classList.add(c),o.classList.add(c);const t=()=>{o.classList.remove(c,h),o.classList.add(it),n.classList.remove(it,h,c),this._isSliding=!1,setTimeout(f,0)};this._queueCallback(t,n,!0)}else n.classList.remove(it),o.classList.add(it),this._isSliding=!1,f();a&&this.cycle()}_directionToOrder(t){return[J,Z].includes(t)?m()?t===Z?G:Q:t===Z?Q:G:t}_orderToDirection(t){return[Q,G].includes(t)?m()?t===G?Z:J:t===G?J:Z:t}static carouselInterface(t,e){const i=st.getOrCreateInstance(t,e);let{_config:n}=i;"object"==typeof e&&(n={...n,...e});const s="string"==typeof e?e:n.slide;if("number"==typeof e)i.to(e);else if("string"==typeof s){if(void 0===i[s])throw new TypeError(`No method named "${s}"`);i[s]()}else n.interval&&n.ride&&(i.pause(),i.cycle())}static jQueryInterface(t){return this.each((function(){st.carouselInterface(this,t)}))}static dataApiClickHandler(t){const e=n(this);if(!e||!e.classList.contains("carousel"))return;const i={...U.getDataAttributes(e),...U.getDataAttributes(this)},s=this.getAttribute("data-bs-slide-to");s&&(i.interval=!1),st.carouselInterface(e,i),s&&st.getInstance(e).to(s),t.preventDefault()}}j.on(document,"click.bs.carousel.data-api","[data-bs-slide], [data-bs-slide-to]",st.dataApiClickHandler),j.on(window,"load.bs.carousel.data-api",(()=>{const t=V.find('[data-bs-ride="carousel"]');for(let e=0,i=t.length;et===this._element));null!==s&&o.length&&(this._selector=s,this._triggerArray.push(e))}this._initializeChildren(),this._config.parent||this._addAriaAndCollapsedClass(this._triggerArray,this._isShown()),this._config.toggle&&this.toggle()}static get Default(){return rt}static get NAME(){return ot}toggle(){this._isShown()?this.hide():this.show()}show(){if(this._isTransitioning||this._isShown())return;let t,e=[];if(this._config.parent){const t=V.find(ut,this._config.parent);e=V.find(".collapse.show, .collapse.collapsing",this._config.parent).filter((e=>!t.includes(e)))}const i=V.findOne(this._selector);if(e.length){const n=e.find((t=>i!==t));if(t=n?pt.getInstance(n):null,t&&t._isTransitioning)return}if(j.trigger(this._element,"show.bs.collapse").defaultPrevented)return;e.forEach((e=>{i!==e&&pt.getOrCreateInstance(e,{toggle:!1}).hide(),t||H.set(e,"bs.collapse",null)}));const n=this._getDimension();this._element.classList.remove(ct),this._element.classList.add(ht),this._element.style[n]=0,this._addAriaAndCollapsedClass(this._triggerArray,!0),this._isTransitioning=!0;const s=`scroll${n[0].toUpperCase()+n.slice(1)}`;this._queueCallback((()=>{this._isTransitioning=!1,this._element.classList.remove(ht),this._element.classList.add(ct,lt),this._element.style[n]="",j.trigger(this._element,"shown.bs.collapse")}),this._element,!0),this._element.style[n]=`${this._element[s]}px`}hide(){if(this._isTransitioning||!this._isShown())return;if(j.trigger(this._element,"hide.bs.collapse").defaultPrevented)return;const t=this._getDimension();this._element.style[t]=`${this._element.getBoundingClientRect()[t]}px`,u(this._element),this._element.classList.add(ht),this._element.classList.remove(ct,lt);const e=this._triggerArray.length;for(let t=0;t{this._isTransitioning=!1,this._element.classList.remove(ht),this._element.classList.add(ct),j.trigger(this._element,"hidden.bs.collapse")}),this._element,!0)}_isShown(t=this._element){return t.classList.contains(lt)}_getConfig(t){return(t={...rt,...U.getDataAttributes(this._element),...t}).toggle=Boolean(t.toggle),t.parent=r(t.parent),a(ot,t,at),t}_getDimension(){return this._element.classList.contains("collapse-horizontal")?"width":"height"}_initializeChildren(){if(!this._config.parent)return;const t=V.find(ut,this._config.parent);V.find(ft,this._config.parent).filter((e=>!t.includes(e))).forEach((t=>{const e=n(t);e&&this._addAriaAndCollapsedClass([t],this._isShown(e))}))}_addAriaAndCollapsedClass(t,e){t.length&&t.forEach((t=>{e?t.classList.remove(dt):t.classList.add(dt),t.setAttribute("aria-expanded",e)}))}static jQueryInterface(t){return this.each((function(){const e={};"string"==typeof t&&/show|hide/.test(t)&&(e.toggle=!1);const i=pt.getOrCreateInstance(this,e);if("string"==typeof t){if(void 0===i[t])throw new TypeError(`No method named "${t}"`);i[t]()}}))}}j.on(document,"click.bs.collapse.data-api",ft,(function(t){("A"===t.target.tagName||t.delegateTarget&&"A"===t.delegateTarget.tagName)&&t.preventDefault();const e=i(this);V.find(e).forEach((t=>{pt.getOrCreateInstance(t,{toggle:!1}).toggle()}))})),g(pt);var mt="top",gt="bottom",_t="right",bt="left",vt="auto",yt=[mt,gt,_t,bt],wt="start",Et="end",At="clippingParents",Tt="viewport",Ot="popper",Ct="reference",kt=yt.reduce((function(t,e){return t.concat([e+"-"+wt,e+"-"+Et])}),[]),Lt=[].concat(yt,[vt]).reduce((function(t,e){return t.concat([e,e+"-"+wt,e+"-"+Et])}),[]),xt="beforeRead",Dt="read",St="afterRead",Nt="beforeMain",It="main",Pt="afterMain",jt="beforeWrite",Mt="write",Ht="afterWrite",Bt=[xt,Dt,St,Nt,It,Pt,jt,Mt,Ht];function Rt(t){return t?(t.nodeName||"").toLowerCase():null}function Wt(t){if(null==t)return window;if("[object Window]"!==t.toString()){var e=t.ownerDocument;return e&&e.defaultView||window}return t}function $t(t){return t instanceof Wt(t).Element||t instanceof Element}function zt(t){return t instanceof Wt(t).HTMLElement||t instanceof HTMLElement}function qt(t){return"undefined"!=typeof ShadowRoot&&(t instanceof Wt(t).ShadowRoot||t instanceof ShadowRoot)}const Ft={name:"applyStyles",enabled:!0,phase:"write",fn:function(t){var e=t.state;Object.keys(e.elements).forEach((function(t){var i=e.styles[t]||{},n=e.attributes[t]||{},s=e.elements[t];zt(s)&&Rt(s)&&(Object.assign(s.style,i),Object.keys(n).forEach((function(t){var e=n[t];!1===e?s.removeAttribute(t):s.setAttribute(t,!0===e?"":e)})))}))},effect:function(t){var e=t.state,i={popper:{position:e.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};return Object.assign(e.elements.popper.style,i.popper),e.styles=i,e.elements.arrow&&Object.assign(e.elements.arrow.style,i.arrow),function(){Object.keys(e.elements).forEach((function(t){var n=e.elements[t],s=e.attributes[t]||{},o=Object.keys(e.styles.hasOwnProperty(t)?e.styles[t]:i[t]).reduce((function(t,e){return t[e]="",t}),{});zt(n)&&Rt(n)&&(Object.assign(n.style,o),Object.keys(s).forEach((function(t){n.removeAttribute(t)})))}))}},requires:["computeStyles"]};function Ut(t){return t.split("-")[0]}function Vt(t,e){var i=t.getBoundingClientRect();return{width:i.width/1,height:i.height/1,top:i.top/1,right:i.right/1,bottom:i.bottom/1,left:i.left/1,x:i.left/1,y:i.top/1}}function Kt(t){var e=Vt(t),i=t.offsetWidth,n=t.offsetHeight;return Math.abs(e.width-i)<=1&&(i=e.width),Math.abs(e.height-n)<=1&&(n=e.height),{x:t.offsetLeft,y:t.offsetTop,width:i,height:n}}function Xt(t,e){var i=e.getRootNode&&e.getRootNode();if(t.contains(e))return!0;if(i&&qt(i)){var n=e;do{if(n&&t.isSameNode(n))return!0;n=n.parentNode||n.host}while(n)}return!1}function Yt(t){return Wt(t).getComputedStyle(t)}function Qt(t){return["table","td","th"].indexOf(Rt(t))>=0}function Gt(t){return(($t(t)?t.ownerDocument:t.document)||window.document).documentElement}function Zt(t){return"html"===Rt(t)?t:t.assignedSlot||t.parentNode||(qt(t)?t.host:null)||Gt(t)}function Jt(t){return zt(t)&&"fixed"!==Yt(t).position?t.offsetParent:null}function te(t){for(var e=Wt(t),i=Jt(t);i&&Qt(i)&&"static"===Yt(i).position;)i=Jt(i);return i&&("html"===Rt(i)||"body"===Rt(i)&&"static"===Yt(i).position)?e:i||function(t){var e=-1!==navigator.userAgent.toLowerCase().indexOf("firefox");if(-1!==navigator.userAgent.indexOf("Trident")&&zt(t)&&"fixed"===Yt(t).position)return null;for(var i=Zt(t);zt(i)&&["html","body"].indexOf(Rt(i))<0;){var n=Yt(i);if("none"!==n.transform||"none"!==n.perspective||"paint"===n.contain||-1!==["transform","perspective"].indexOf(n.willChange)||e&&"filter"===n.willChange||e&&n.filter&&"none"!==n.filter)return i;i=i.parentNode}return null}(t)||e}function ee(t){return["top","bottom"].indexOf(t)>=0?"x":"y"}var ie=Math.max,ne=Math.min,se=Math.round;function oe(t,e,i){return ie(t,ne(e,i))}function re(t){return Object.assign({},{top:0,right:0,bottom:0,left:0},t)}function ae(t,e){return e.reduce((function(e,i){return e[i]=t,e}),{})}const le={name:"arrow",enabled:!0,phase:"main",fn:function(t){var e,i=t.state,n=t.name,s=t.options,o=i.elements.arrow,r=i.modifiersData.popperOffsets,a=Ut(i.placement),l=ee(a),c=[bt,_t].indexOf(a)>=0?"height":"width";if(o&&r){var h=function(t,e){return re("number"!=typeof(t="function"==typeof t?t(Object.assign({},e.rects,{placement:e.placement})):t)?t:ae(t,yt))}(s.padding,i),d=Kt(o),u="y"===l?mt:bt,f="y"===l?gt:_t,p=i.rects.reference[c]+i.rects.reference[l]-r[l]-i.rects.popper[c],m=r[l]-i.rects.reference[l],g=te(o),_=g?"y"===l?g.clientHeight||0:g.clientWidth||0:0,b=p/2-m/2,v=h[u],y=_-d[c]-h[f],w=_/2-d[c]/2+b,E=oe(v,w,y),A=l;i.modifiersData[n]=((e={})[A]=E,e.centerOffset=E-w,e)}},effect:function(t){var e=t.state,i=t.options.element,n=void 0===i?"[data-popper-arrow]":i;null!=n&&("string"!=typeof n||(n=e.elements.popper.querySelector(n)))&&Xt(e.elements.popper,n)&&(e.elements.arrow=n)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function ce(t){return t.split("-")[1]}var he={top:"auto",right:"auto",bottom:"auto",left:"auto"};function de(t){var e,i=t.popper,n=t.popperRect,s=t.placement,o=t.variation,r=t.offsets,a=t.position,l=t.gpuAcceleration,c=t.adaptive,h=t.roundOffsets,d=!0===h?function(t){var e=t.x,i=t.y,n=window.devicePixelRatio||1;return{x:se(se(e*n)/n)||0,y:se(se(i*n)/n)||0}}(r):"function"==typeof h?h(r):r,u=d.x,f=void 0===u?0:u,p=d.y,m=void 0===p?0:p,g=r.hasOwnProperty("x"),_=r.hasOwnProperty("y"),b=bt,v=mt,y=window;if(c){var w=te(i),E="clientHeight",A="clientWidth";w===Wt(i)&&"static"!==Yt(w=Gt(i)).position&&"absolute"===a&&(E="scrollHeight",A="scrollWidth"),w=w,s!==mt&&(s!==bt&&s!==_t||o!==Et)||(v=gt,m-=w[E]-n.height,m*=l?1:-1),s!==bt&&(s!==mt&&s!==gt||o!==Et)||(b=_t,f-=w[A]-n.width,f*=l?1:-1)}var T,O=Object.assign({position:a},c&&he);return l?Object.assign({},O,((T={})[v]=_?"0":"",T[b]=g?"0":"",T.transform=(y.devicePixelRatio||1)<=1?"translate("+f+"px, "+m+"px)":"translate3d("+f+"px, "+m+"px, 0)",T)):Object.assign({},O,((e={})[v]=_?m+"px":"",e[b]=g?f+"px":"",e.transform="",e))}const ue={name:"computeStyles",enabled:!0,phase:"beforeWrite",fn:function(t){var e=t.state,i=t.options,n=i.gpuAcceleration,s=void 0===n||n,o=i.adaptive,r=void 0===o||o,a=i.roundOffsets,l=void 0===a||a,c={placement:Ut(e.placement),variation:ce(e.placement),popper:e.elements.popper,popperRect:e.rects.popper,gpuAcceleration:s};null!=e.modifiersData.popperOffsets&&(e.styles.popper=Object.assign({},e.styles.popper,de(Object.assign({},c,{offsets:e.modifiersData.popperOffsets,position:e.options.strategy,adaptive:r,roundOffsets:l})))),null!=e.modifiersData.arrow&&(e.styles.arrow=Object.assign({},e.styles.arrow,de(Object.assign({},c,{offsets:e.modifiersData.arrow,position:"absolute",adaptive:!1,roundOffsets:l})))),e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-placement":e.placement})},data:{}};var fe={passive:!0};const pe={name:"eventListeners",enabled:!0,phase:"write",fn:function(){},effect:function(t){var e=t.state,i=t.instance,n=t.options,s=n.scroll,o=void 0===s||s,r=n.resize,a=void 0===r||r,l=Wt(e.elements.popper),c=[].concat(e.scrollParents.reference,e.scrollParents.popper);return o&&c.forEach((function(t){t.addEventListener("scroll",i.update,fe)})),a&&l.addEventListener("resize",i.update,fe),function(){o&&c.forEach((function(t){t.removeEventListener("scroll",i.update,fe)})),a&&l.removeEventListener("resize",i.update,fe)}},data:{}};var me={left:"right",right:"left",bottom:"top",top:"bottom"};function ge(t){return t.replace(/left|right|bottom|top/g,(function(t){return me[t]}))}var _e={start:"end",end:"start"};function be(t){return t.replace(/start|end/g,(function(t){return _e[t]}))}function ve(t){var e=Wt(t);return{scrollLeft:e.pageXOffset,scrollTop:e.pageYOffset}}function ye(t){return Vt(Gt(t)).left+ve(t).scrollLeft}function we(t){var e=Yt(t),i=e.overflow,n=e.overflowX,s=e.overflowY;return/auto|scroll|overlay|hidden/.test(i+s+n)}function Ee(t){return["html","body","#document"].indexOf(Rt(t))>=0?t.ownerDocument.body:zt(t)&&we(t)?t:Ee(Zt(t))}function Ae(t,e){var i;void 0===e&&(e=[]);var n=Ee(t),s=n===(null==(i=t.ownerDocument)?void 0:i.body),o=Wt(n),r=s?[o].concat(o.visualViewport||[],we(n)?n:[]):n,a=e.concat(r);return s?a:a.concat(Ae(Zt(r)))}function Te(t){return Object.assign({},t,{left:t.x,top:t.y,right:t.x+t.width,bottom:t.y+t.height})}function Oe(t,e){return e===Tt?Te(function(t){var e=Wt(t),i=Gt(t),n=e.visualViewport,s=i.clientWidth,o=i.clientHeight,r=0,a=0;return n&&(s=n.width,o=n.height,/^((?!chrome|android).)*safari/i.test(navigator.userAgent)||(r=n.offsetLeft,a=n.offsetTop)),{width:s,height:o,x:r+ye(t),y:a}}(t)):zt(e)?function(t){var e=Vt(t);return e.top=e.top+t.clientTop,e.left=e.left+t.clientLeft,e.bottom=e.top+t.clientHeight,e.right=e.left+t.clientWidth,e.width=t.clientWidth,e.height=t.clientHeight,e.x=e.left,e.y=e.top,e}(e):Te(function(t){var e,i=Gt(t),n=ve(t),s=null==(e=t.ownerDocument)?void 0:e.body,o=ie(i.scrollWidth,i.clientWidth,s?s.scrollWidth:0,s?s.clientWidth:0),r=ie(i.scrollHeight,i.clientHeight,s?s.scrollHeight:0,s?s.clientHeight:0),a=-n.scrollLeft+ye(t),l=-n.scrollTop;return"rtl"===Yt(s||i).direction&&(a+=ie(i.clientWidth,s?s.clientWidth:0)-o),{width:o,height:r,x:a,y:l}}(Gt(t)))}function Ce(t){var e,i=t.reference,n=t.element,s=t.placement,o=s?Ut(s):null,r=s?ce(s):null,a=i.x+i.width/2-n.width/2,l=i.y+i.height/2-n.height/2;switch(o){case mt:e={x:a,y:i.y-n.height};break;case gt:e={x:a,y:i.y+i.height};break;case _t:e={x:i.x+i.width,y:l};break;case bt:e={x:i.x-n.width,y:l};break;default:e={x:i.x,y:i.y}}var c=o?ee(o):null;if(null!=c){var h="y"===c?"height":"width";switch(r){case wt:e[c]=e[c]-(i[h]/2-n[h]/2);break;case Et:e[c]=e[c]+(i[h]/2-n[h]/2)}}return e}function ke(t,e){void 0===e&&(e={});var i=e,n=i.placement,s=void 0===n?t.placement:n,o=i.boundary,r=void 0===o?At:o,a=i.rootBoundary,l=void 0===a?Tt:a,c=i.elementContext,h=void 0===c?Ot:c,d=i.altBoundary,u=void 0!==d&&d,f=i.padding,p=void 0===f?0:f,m=re("number"!=typeof p?p:ae(p,yt)),g=h===Ot?Ct:Ot,_=t.rects.popper,b=t.elements[u?g:h],v=function(t,e,i){var n="clippingParents"===e?function(t){var e=Ae(Zt(t)),i=["absolute","fixed"].indexOf(Yt(t).position)>=0&&zt(t)?te(t):t;return $t(i)?e.filter((function(t){return $t(t)&&Xt(t,i)&&"body"!==Rt(t)})):[]}(t):[].concat(e),s=[].concat(n,[i]),o=s[0],r=s.reduce((function(e,i){var n=Oe(t,i);return e.top=ie(n.top,e.top),e.right=ne(n.right,e.right),e.bottom=ne(n.bottom,e.bottom),e.left=ie(n.left,e.left),e}),Oe(t,o));return r.width=r.right-r.left,r.height=r.bottom-r.top,r.x=r.left,r.y=r.top,r}($t(b)?b:b.contextElement||Gt(t.elements.popper),r,l),y=Vt(t.elements.reference),w=Ce({reference:y,element:_,strategy:"absolute",placement:s}),E=Te(Object.assign({},_,w)),A=h===Ot?E:y,T={top:v.top-A.top+m.top,bottom:A.bottom-v.bottom+m.bottom,left:v.left-A.left+m.left,right:A.right-v.right+m.right},O=t.modifiersData.offset;if(h===Ot&&O){var C=O[s];Object.keys(T).forEach((function(t){var e=[_t,gt].indexOf(t)>=0?1:-1,i=[mt,gt].indexOf(t)>=0?"y":"x";T[t]+=C[i]*e}))}return T}function Le(t,e){void 0===e&&(e={});var i=e,n=i.placement,s=i.boundary,o=i.rootBoundary,r=i.padding,a=i.flipVariations,l=i.allowedAutoPlacements,c=void 0===l?Lt:l,h=ce(n),d=h?a?kt:kt.filter((function(t){return ce(t)===h})):yt,u=d.filter((function(t){return c.indexOf(t)>=0}));0===u.length&&(u=d);var f=u.reduce((function(e,i){return e[i]=ke(t,{placement:i,boundary:s,rootBoundary:o,padding:r})[Ut(i)],e}),{});return Object.keys(f).sort((function(t,e){return f[t]-f[e]}))}const xe={name:"flip",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,n=t.name;if(!e.modifiersData[n]._skip){for(var s=i.mainAxis,o=void 0===s||s,r=i.altAxis,a=void 0===r||r,l=i.fallbackPlacements,c=i.padding,h=i.boundary,d=i.rootBoundary,u=i.altBoundary,f=i.flipVariations,p=void 0===f||f,m=i.allowedAutoPlacements,g=e.options.placement,_=Ut(g),b=l||(_!==g&&p?function(t){if(Ut(t)===vt)return[];var e=ge(t);return[be(t),e,be(e)]}(g):[ge(g)]),v=[g].concat(b).reduce((function(t,i){return t.concat(Ut(i)===vt?Le(e,{placement:i,boundary:h,rootBoundary:d,padding:c,flipVariations:p,allowedAutoPlacements:m}):i)}),[]),y=e.rects.reference,w=e.rects.popper,E=new Map,A=!0,T=v[0],O=0;O=0,D=x?"width":"height",S=ke(e,{placement:C,boundary:h,rootBoundary:d,altBoundary:u,padding:c}),N=x?L?_t:bt:L?gt:mt;y[D]>w[D]&&(N=ge(N));var I=ge(N),P=[];if(o&&P.push(S[k]<=0),a&&P.push(S[N]<=0,S[I]<=0),P.every((function(t){return t}))){T=C,A=!1;break}E.set(C,P)}if(A)for(var j=function(t){var e=v.find((function(e){var i=E.get(e);if(i)return i.slice(0,t).every((function(t){return t}))}));if(e)return T=e,"break"},M=p?3:1;M>0&&"break"!==j(M);M--);e.placement!==T&&(e.modifiersData[n]._skip=!0,e.placement=T,e.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function De(t,e,i){return void 0===i&&(i={x:0,y:0}),{top:t.top-e.height-i.y,right:t.right-e.width+i.x,bottom:t.bottom-e.height+i.y,left:t.left-e.width-i.x}}function Se(t){return[mt,_t,gt,bt].some((function(e){return t[e]>=0}))}const Ne={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(t){var e=t.state,i=t.name,n=e.rects.reference,s=e.rects.popper,o=e.modifiersData.preventOverflow,r=ke(e,{elementContext:"reference"}),a=ke(e,{altBoundary:!0}),l=De(r,n),c=De(a,s,o),h=Se(l),d=Se(c);e.modifiersData[i]={referenceClippingOffsets:l,popperEscapeOffsets:c,isReferenceHidden:h,hasPopperEscaped:d},e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-reference-hidden":h,"data-popper-escaped":d})}},Ie={name:"offset",enabled:!0,phase:"main",requires:["popperOffsets"],fn:function(t){var e=t.state,i=t.options,n=t.name,s=i.offset,o=void 0===s?[0,0]:s,r=Lt.reduce((function(t,i){return t[i]=function(t,e,i){var n=Ut(t),s=[bt,mt].indexOf(n)>=0?-1:1,o="function"==typeof i?i(Object.assign({},e,{placement:t})):i,r=o[0],a=o[1];return r=r||0,a=(a||0)*s,[bt,_t].indexOf(n)>=0?{x:a,y:r}:{x:r,y:a}}(i,e.rects,o),t}),{}),a=r[e.placement],l=a.x,c=a.y;null!=e.modifiersData.popperOffsets&&(e.modifiersData.popperOffsets.x+=l,e.modifiersData.popperOffsets.y+=c),e.modifiersData[n]=r}},Pe={name:"popperOffsets",enabled:!0,phase:"read",fn:function(t){var e=t.state,i=t.name;e.modifiersData[i]=Ce({reference:e.rects.reference,element:e.rects.popper,strategy:"absolute",placement:e.placement})},data:{}},je={name:"preventOverflow",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,n=t.name,s=i.mainAxis,o=void 0===s||s,r=i.altAxis,a=void 0!==r&&r,l=i.boundary,c=i.rootBoundary,h=i.altBoundary,d=i.padding,u=i.tether,f=void 0===u||u,p=i.tetherOffset,m=void 0===p?0:p,g=ke(e,{boundary:l,rootBoundary:c,padding:d,altBoundary:h}),_=Ut(e.placement),b=ce(e.placement),v=!b,y=ee(_),w="x"===y?"y":"x",E=e.modifiersData.popperOffsets,A=e.rects.reference,T=e.rects.popper,O="function"==typeof m?m(Object.assign({},e.rects,{placement:e.placement})):m,C={x:0,y:0};if(E){if(o||a){var k="y"===y?mt:bt,L="y"===y?gt:_t,x="y"===y?"height":"width",D=E[y],S=E[y]+g[k],N=E[y]-g[L],I=f?-T[x]/2:0,P=b===wt?A[x]:T[x],j=b===wt?-T[x]:-A[x],M=e.elements.arrow,H=f&&M?Kt(M):{width:0,height:0},B=e.modifiersData["arrow#persistent"]?e.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},R=B[k],W=B[L],$=oe(0,A[x],H[x]),z=v?A[x]/2-I-$-R-O:P-$-R-O,q=v?-A[x]/2+I+$+W+O:j+$+W+O,F=e.elements.arrow&&te(e.elements.arrow),U=F?"y"===y?F.clientTop||0:F.clientLeft||0:0,V=e.modifiersData.offset?e.modifiersData.offset[e.placement][y]:0,K=E[y]+z-V-U,X=E[y]+q-V;if(o){var Y=oe(f?ne(S,K):S,D,f?ie(N,X):N);E[y]=Y,C[y]=Y-D}if(a){var Q="x"===y?mt:bt,G="x"===y?gt:_t,Z=E[w],J=Z+g[Q],tt=Z-g[G],et=oe(f?ne(J,K):J,Z,f?ie(tt,X):tt);E[w]=et,C[w]=et-Z}}e.modifiersData[n]=C}},requiresIfExists:["offset"]};function Me(t,e,i){void 0===i&&(i=!1);var n=zt(e);zt(e)&&function(t){var e=t.getBoundingClientRect();e.width,t.offsetWidth,e.height,t.offsetHeight}(e);var s,o,r=Gt(e),a=Vt(t),l={scrollLeft:0,scrollTop:0},c={x:0,y:0};return(n||!n&&!i)&&(("body"!==Rt(e)||we(r))&&(l=(s=e)!==Wt(s)&&zt(s)?{scrollLeft:(o=s).scrollLeft,scrollTop:o.scrollTop}:ve(s)),zt(e)?((c=Vt(e)).x+=e.clientLeft,c.y+=e.clientTop):r&&(c.x=ye(r))),{x:a.left+l.scrollLeft-c.x,y:a.top+l.scrollTop-c.y,width:a.width,height:a.height}}function He(t){var e=new Map,i=new Set,n=[];function s(t){i.add(t.name),[].concat(t.requires||[],t.requiresIfExists||[]).forEach((function(t){if(!i.has(t)){var n=e.get(t);n&&s(n)}})),n.push(t)}return t.forEach((function(t){e.set(t.name,t)})),t.forEach((function(t){i.has(t.name)||s(t)})),n}var Be={placement:"bottom",modifiers:[],strategy:"absolute"};function Re(){for(var t=arguments.length,e=new Array(t),i=0;ij.on(t,"mouseover",d))),this._element.focus(),this._element.setAttribute("aria-expanded",!0),this._menu.classList.add(Je),this._element.classList.add(Je),j.trigger(this._element,"shown.bs.dropdown",t)}hide(){if(c(this._element)||!this._isShown(this._menu))return;const t={relatedTarget:this._element};this._completeHide(t)}dispose(){this._popper&&this._popper.destroy(),super.dispose()}update(){this._inNavbar=this._detectNavbar(),this._popper&&this._popper.update()}_completeHide(t){j.trigger(this._element,"hide.bs.dropdown",t).defaultPrevented||("ontouchstart"in document.documentElement&&[].concat(...document.body.children).forEach((t=>j.off(t,"mouseover",d))),this._popper&&this._popper.destroy(),this._menu.classList.remove(Je),this._element.classList.remove(Je),this._element.setAttribute("aria-expanded","false"),U.removeDataAttribute(this._menu,"popper"),j.trigger(this._element,"hidden.bs.dropdown",t))}_getConfig(t){if(t={...this.constructor.Default,...U.getDataAttributes(this._element),...t},a(Ue,t,this.constructor.DefaultType),"object"==typeof t.reference&&!o(t.reference)&&"function"!=typeof t.reference.getBoundingClientRect)throw new TypeError(`${Ue.toUpperCase()}: Option "reference" provided type "object" without a required "getBoundingClientRect" method.`);return t}_createPopper(t){if(void 0===Fe)throw new TypeError("Bootstrap's dropdowns require Popper (https://popper.js.org)");let e=this._element;"parent"===this._config.reference?e=t:o(this._config.reference)?e=r(this._config.reference):"object"==typeof this._config.reference&&(e=this._config.reference);const i=this._getPopperConfig(),n=i.modifiers.find((t=>"applyStyles"===t.name&&!1===t.enabled));this._popper=qe(e,this._menu,i),n&&U.setDataAttribute(this._menu,"popper","static")}_isShown(t=this._element){return t.classList.contains(Je)}_getMenuElement(){return V.next(this._element,ei)[0]}_getPlacement(){const t=this._element.parentNode;if(t.classList.contains("dropend"))return ri;if(t.classList.contains("dropstart"))return ai;const e="end"===getComputedStyle(this._menu).getPropertyValue("--bs-position").trim();return t.classList.contains("dropup")?e?ni:ii:e?oi:si}_detectNavbar(){return null!==this._element.closest(".navbar")}_getOffset(){const{offset:t}=this._config;return"string"==typeof t?t.split(",").map((t=>Number.parseInt(t,10))):"function"==typeof t?e=>t(e,this._element):t}_getPopperConfig(){const t={placement:this._getPlacement(),modifiers:[{name:"preventOverflow",options:{boundary:this._config.boundary}},{name:"offset",options:{offset:this._getOffset()}}]};return"static"===this._config.display&&(t.modifiers=[{name:"applyStyles",enabled:!1}]),{...t,..."function"==typeof this._config.popperConfig?this._config.popperConfig(t):this._config.popperConfig}}_selectMenuItem({key:t,target:e}){const i=V.find(".dropdown-menu .dropdown-item:not(.disabled):not(:disabled)",this._menu).filter(l);i.length&&v(i,e,t===Ye,!i.includes(e)).focus()}static jQueryInterface(t){return this.each((function(){const e=hi.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}static clearMenus(t){if(t&&(2===t.button||"keyup"===t.type&&"Tab"!==t.key))return;const e=V.find(ti);for(let i=0,n=e.length;ie+t)),this._setElementAttributes(di,"paddingRight",(e=>e+t)),this._setElementAttributes(ui,"marginRight",(e=>e-t))}_disableOverFlow(){this._saveInitialAttribute(this._element,"overflow"),this._element.style.overflow="hidden"}_setElementAttributes(t,e,i){const n=this.getWidth();this._applyManipulationCallback(t,(t=>{if(t!==this._element&&window.innerWidth>t.clientWidth+n)return;this._saveInitialAttribute(t,e);const s=window.getComputedStyle(t)[e];t.style[e]=`${i(Number.parseFloat(s))}px`}))}reset(){this._resetElementAttributes(this._element,"overflow"),this._resetElementAttributes(this._element,"paddingRight"),this._resetElementAttributes(di,"paddingRight"),this._resetElementAttributes(ui,"marginRight")}_saveInitialAttribute(t,e){const i=t.style[e];i&&U.setDataAttribute(t,e,i)}_resetElementAttributes(t,e){this._applyManipulationCallback(t,(t=>{const i=U.getDataAttribute(t,e);void 0===i?t.style.removeProperty(e):(U.removeDataAttribute(t,e),t.style[e]=i)}))}_applyManipulationCallback(t,e){o(t)?e(t):V.find(t,this._element).forEach(e)}isOverflowing(){return this.getWidth()>0}}const pi={className:"modal-backdrop",isVisible:!0,isAnimated:!1,rootElement:"body",clickCallback:null},mi={className:"string",isVisible:"boolean",isAnimated:"boolean",rootElement:"(element|string)",clickCallback:"(function|null)"},gi="show",_i="mousedown.bs.backdrop";class bi{constructor(t){this._config=this._getConfig(t),this._isAppended=!1,this._element=null}show(t){this._config.isVisible?(this._append(),this._config.isAnimated&&u(this._getElement()),this._getElement().classList.add(gi),this._emulateAnimation((()=>{_(t)}))):_(t)}hide(t){this._config.isVisible?(this._getElement().classList.remove(gi),this._emulateAnimation((()=>{this.dispose(),_(t)}))):_(t)}_getElement(){if(!this._element){const t=document.createElement("div");t.className=this._config.className,this._config.isAnimated&&t.classList.add("fade"),this._element=t}return this._element}_getConfig(t){return(t={...pi,..."object"==typeof t?t:{}}).rootElement=r(t.rootElement),a("backdrop",t,mi),t}_append(){this._isAppended||(this._config.rootElement.append(this._getElement()),j.on(this._getElement(),_i,(()=>{_(this._config.clickCallback)})),this._isAppended=!0)}dispose(){this._isAppended&&(j.off(this._element,_i),this._element.remove(),this._isAppended=!1)}_emulateAnimation(t){b(t,this._getElement(),this._config.isAnimated)}}const vi={trapElement:null,autofocus:!0},yi={trapElement:"element",autofocus:"boolean"},wi=".bs.focustrap",Ei="backward";class Ai{constructor(t){this._config=this._getConfig(t),this._isActive=!1,this._lastTabNavDirection=null}activate(){const{trapElement:t,autofocus:e}=this._config;this._isActive||(e&&t.focus(),j.off(document,wi),j.on(document,"focusin.bs.focustrap",(t=>this._handleFocusin(t))),j.on(document,"keydown.tab.bs.focustrap",(t=>this._handleKeydown(t))),this._isActive=!0)}deactivate(){this._isActive&&(this._isActive=!1,j.off(document,wi))}_handleFocusin(t){const{target:e}=t,{trapElement:i}=this._config;if(e===document||e===i||i.contains(e))return;const n=V.focusableChildren(i);0===n.length?i.focus():this._lastTabNavDirection===Ei?n[n.length-1].focus():n[0].focus()}_handleKeydown(t){"Tab"===t.key&&(this._lastTabNavDirection=t.shiftKey?Ei:"forward")}_getConfig(t){return t={...vi,..."object"==typeof t?t:{}},a("focustrap",t,yi),t}}const Ti="modal",Oi="Escape",Ci={backdrop:!0,keyboard:!0,focus:!0},ki={backdrop:"(boolean|string)",keyboard:"boolean",focus:"boolean"},Li="hidden.bs.modal",xi="show.bs.modal",Di="resize.bs.modal",Si="click.dismiss.bs.modal",Ni="keydown.dismiss.bs.modal",Ii="mousedown.dismiss.bs.modal",Pi="modal-open",ji="show",Mi="modal-static";class Hi extends B{constructor(t,e){super(t),this._config=this._getConfig(e),this._dialog=V.findOne(".modal-dialog",this._element),this._backdrop=this._initializeBackDrop(),this._focustrap=this._initializeFocusTrap(),this._isShown=!1,this._ignoreBackdropClick=!1,this._isTransitioning=!1,this._scrollBar=new fi}static get Default(){return Ci}static get NAME(){return Ti}toggle(t){return this._isShown?this.hide():this.show(t)}show(t){this._isShown||this._isTransitioning||j.trigger(this._element,xi,{relatedTarget:t}).defaultPrevented||(this._isShown=!0,this._isAnimated()&&(this._isTransitioning=!0),this._scrollBar.hide(),document.body.classList.add(Pi),this._adjustDialog(),this._setEscapeEvent(),this._setResizeEvent(),j.on(this._dialog,Ii,(()=>{j.one(this._element,"mouseup.dismiss.bs.modal",(t=>{t.target===this._element&&(this._ignoreBackdropClick=!0)}))})),this._showBackdrop((()=>this._showElement(t))))}hide(){if(!this._isShown||this._isTransitioning)return;if(j.trigger(this._element,"hide.bs.modal").defaultPrevented)return;this._isShown=!1;const t=this._isAnimated();t&&(this._isTransitioning=!0),this._setEscapeEvent(),this._setResizeEvent(),this._focustrap.deactivate(),this._element.classList.remove(ji),j.off(this._element,Si),j.off(this._dialog,Ii),this._queueCallback((()=>this._hideModal()),this._element,t)}dispose(){[window,this._dialog].forEach((t=>j.off(t,".bs.modal"))),this._backdrop.dispose(),this._focustrap.deactivate(),super.dispose()}handleUpdate(){this._adjustDialog()}_initializeBackDrop(){return new bi({isVisible:Boolean(this._config.backdrop),isAnimated:this._isAnimated()})}_initializeFocusTrap(){return new Ai({trapElement:this._element})}_getConfig(t){return t={...Ci,...U.getDataAttributes(this._element),..."object"==typeof t?t:{}},a(Ti,t,ki),t}_showElement(t){const e=this._isAnimated(),i=V.findOne(".modal-body",this._dialog);this._element.parentNode&&this._element.parentNode.nodeType===Node.ELEMENT_NODE||document.body.append(this._element),this._element.style.display="block",this._element.removeAttribute("aria-hidden"),this._element.setAttribute("aria-modal",!0),this._element.setAttribute("role","dialog"),this._element.scrollTop=0,i&&(i.scrollTop=0),e&&u(this._element),this._element.classList.add(ji),this._queueCallback((()=>{this._config.focus&&this._focustrap.activate(),this._isTransitioning=!1,j.trigger(this._element,"shown.bs.modal",{relatedTarget:t})}),this._dialog,e)}_setEscapeEvent(){this._isShown?j.on(this._element,Ni,(t=>{this._config.keyboard&&t.key===Oi?(t.preventDefault(),this.hide()):this._config.keyboard||t.key!==Oi||this._triggerBackdropTransition()})):j.off(this._element,Ni)}_setResizeEvent(){this._isShown?j.on(window,Di,(()=>this._adjustDialog())):j.off(window,Di)}_hideModal(){this._element.style.display="none",this._element.setAttribute("aria-hidden",!0),this._element.removeAttribute("aria-modal"),this._element.removeAttribute("role"),this._isTransitioning=!1,this._backdrop.hide((()=>{document.body.classList.remove(Pi),this._resetAdjustments(),this._scrollBar.reset(),j.trigger(this._element,Li)}))}_showBackdrop(t){j.on(this._element,Si,(t=>{this._ignoreBackdropClick?this._ignoreBackdropClick=!1:t.target===t.currentTarget&&(!0===this._config.backdrop?this.hide():"static"===this._config.backdrop&&this._triggerBackdropTransition())})),this._backdrop.show(t)}_isAnimated(){return this._element.classList.contains("fade")}_triggerBackdropTransition(){if(j.trigger(this._element,"hidePrevented.bs.modal").defaultPrevented)return;const{classList:t,scrollHeight:e,style:i}=this._element,n=e>document.documentElement.clientHeight;!n&&"hidden"===i.overflowY||t.contains(Mi)||(n||(i.overflowY="hidden"),t.add(Mi),this._queueCallback((()=>{t.remove(Mi),n||this._queueCallback((()=>{i.overflowY=""}),this._dialog)}),this._dialog),this._element.focus())}_adjustDialog(){const t=this._element.scrollHeight>document.documentElement.clientHeight,e=this._scrollBar.getWidth(),i=e>0;(!i&&t&&!m()||i&&!t&&m())&&(this._element.style.paddingLeft=`${e}px`),(i&&!t&&!m()||!i&&t&&m())&&(this._element.style.paddingRight=`${e}px`)}_resetAdjustments(){this._element.style.paddingLeft="",this._element.style.paddingRight=""}static jQueryInterface(t,e){return this.each((function(){const i=Hi.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===i[t])throw new TypeError(`No method named "${t}"`);i[t](e)}}))}}j.on(document,"click.bs.modal.data-api",'[data-bs-toggle="modal"]',(function(t){const e=n(this);["A","AREA"].includes(this.tagName)&&t.preventDefault(),j.one(e,xi,(t=>{t.defaultPrevented||j.one(e,Li,(()=>{l(this)&&this.focus()}))}));const i=V.findOne(".modal.show");i&&Hi.getInstance(i).hide(),Hi.getOrCreateInstance(e).toggle(this)})),R(Hi),g(Hi);const Bi="offcanvas",Ri={backdrop:!0,keyboard:!0,scroll:!1},Wi={backdrop:"boolean",keyboard:"boolean",scroll:"boolean"},$i="show",zi=".offcanvas.show",qi="hidden.bs.offcanvas";class Fi extends B{constructor(t,e){super(t),this._config=this._getConfig(e),this._isShown=!1,this._backdrop=this._initializeBackDrop(),this._focustrap=this._initializeFocusTrap(),this._addEventListeners()}static get NAME(){return Bi}static get Default(){return Ri}toggle(t){return this._isShown?this.hide():this.show(t)}show(t){this._isShown||j.trigger(this._element,"show.bs.offcanvas",{relatedTarget:t}).defaultPrevented||(this._isShown=!0,this._element.style.visibility="visible",this._backdrop.show(),this._config.scroll||(new fi).hide(),this._element.removeAttribute("aria-hidden"),this._element.setAttribute("aria-modal",!0),this._element.setAttribute("role","dialog"),this._element.classList.add($i),this._queueCallback((()=>{this._config.scroll||this._focustrap.activate(),j.trigger(this._element,"shown.bs.offcanvas",{relatedTarget:t})}),this._element,!0))}hide(){this._isShown&&(j.trigger(this._element,"hide.bs.offcanvas").defaultPrevented||(this._focustrap.deactivate(),this._element.blur(),this._isShown=!1,this._element.classList.remove($i),this._backdrop.hide(),this._queueCallback((()=>{this._element.setAttribute("aria-hidden",!0),this._element.removeAttribute("aria-modal"),this._element.removeAttribute("role"),this._element.style.visibility="hidden",this._config.scroll||(new fi).reset(),j.trigger(this._element,qi)}),this._element,!0)))}dispose(){this._backdrop.dispose(),this._focustrap.deactivate(),super.dispose()}_getConfig(t){return t={...Ri,...U.getDataAttributes(this._element),..."object"==typeof t?t:{}},a(Bi,t,Wi),t}_initializeBackDrop(){return new bi({className:"offcanvas-backdrop",isVisible:this._config.backdrop,isAnimated:!0,rootElement:this._element.parentNode,clickCallback:()=>this.hide()})}_initializeFocusTrap(){return new Ai({trapElement:this._element})}_addEventListeners(){j.on(this._element,"keydown.dismiss.bs.offcanvas",(t=>{this._config.keyboard&&"Escape"===t.key&&this.hide()}))}static jQueryInterface(t){return this.each((function(){const e=Fi.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}j.on(document,"click.bs.offcanvas.data-api",'[data-bs-toggle="offcanvas"]',(function(t){const e=n(this);if(["A","AREA"].includes(this.tagName)&&t.preventDefault(),c(this))return;j.one(e,qi,(()=>{l(this)&&this.focus()}));const i=V.findOne(zi);i&&i!==e&&Fi.getInstance(i).hide(),Fi.getOrCreateInstance(e).toggle(this)})),j.on(window,"load.bs.offcanvas.data-api",(()=>V.find(zi).forEach((t=>Fi.getOrCreateInstance(t).show())))),R(Fi),g(Fi);const Ui=new Set(["background","cite","href","itemtype","longdesc","poster","src","xlink:href"]),Vi=/^(?:(?:https?|mailto|ftp|tel|file|sms):|[^#&/:?]*(?:[#/?]|$))/i,Ki=/^data:(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm)|audio\/(?:mp3|oga|ogg|opus));base64,[\d+/a-z]+=*$/i,Xi=(t,e)=>{const i=t.nodeName.toLowerCase();if(e.includes(i))return!Ui.has(i)||Boolean(Vi.test(t.nodeValue)||Ki.test(t.nodeValue));const n=e.filter((t=>t instanceof RegExp));for(let t=0,e=n.length;t{Xi(t,r)||i.removeAttribute(t.nodeName)}))}return n.body.innerHTML}const Qi="tooltip",Gi=new Set(["sanitize","allowList","sanitizeFn"]),Zi={animation:"boolean",template:"string",title:"(string|element|function)",trigger:"string",delay:"(number|object)",html:"boolean",selector:"(string|boolean)",placement:"(string|function)",offset:"(array|string|function)",container:"(string|element|boolean)",fallbackPlacements:"array",boundary:"(string|element)",customClass:"(string|function)",sanitize:"boolean",sanitizeFn:"(null|function)",allowList:"object",popperConfig:"(null|object|function)"},Ji={AUTO:"auto",TOP:"top",RIGHT:m()?"left":"right",BOTTOM:"bottom",LEFT:m()?"right":"left"},tn={animation:!0,template:'',trigger:"hover focus",title:"",delay:0,html:!1,selector:!1,placement:"top",offset:[0,0],container:!1,fallbackPlacements:["top","right","bottom","left"],boundary:"clippingParents",customClass:"",sanitize:!0,sanitizeFn:null,allowList:{"*":["class","dir","id","lang","role",/^aria-[\w-]*$/i],a:["target","href","title","rel"],area:[],b:[],br:[],col:[],code:[],div:[],em:[],hr:[],h1:[],h2:[],h3:[],h4:[],h5:[],h6:[],i:[],img:["src","srcset","alt","title","width","height"],li:[],ol:[],p:[],pre:[],s:[],small:[],span:[],sub:[],sup:[],strong:[],u:[],ul:[]},popperConfig:null},en={HIDE:"hide.bs.tooltip",HIDDEN:"hidden.bs.tooltip",SHOW:"show.bs.tooltip",SHOWN:"shown.bs.tooltip",INSERTED:"inserted.bs.tooltip",CLICK:"click.bs.tooltip",FOCUSIN:"focusin.bs.tooltip",FOCUSOUT:"focusout.bs.tooltip",MOUSEENTER:"mouseenter.bs.tooltip",MOUSELEAVE:"mouseleave.bs.tooltip"},nn="fade",sn="show",on="show",rn="out",an=".tooltip-inner",ln=".modal",cn="hide.bs.modal",hn="hover",dn="focus";class un extends B{constructor(t,e){if(void 0===Fe)throw new TypeError("Bootstrap's tooltips require Popper (https://popper.js.org)");super(t),this._isEnabled=!0,this._timeout=0,this._hoverState="",this._activeTrigger={},this._popper=null,this._config=this._getConfig(e),this.tip=null,this._setListeners()}static get Default(){return tn}static get NAME(){return Qi}static get Event(){return en}static get DefaultType(){return Zi}enable(){this._isEnabled=!0}disable(){this._isEnabled=!1}toggleEnabled(){this._isEnabled=!this._isEnabled}toggle(t){if(this._isEnabled)if(t){const e=this._initializeOnDelegatedTarget(t);e._activeTrigger.click=!e._activeTrigger.click,e._isWithActiveTrigger()?e._enter(null,e):e._leave(null,e)}else{if(this.getTipElement().classList.contains(sn))return void this._leave(null,this);this._enter(null,this)}}dispose(){clearTimeout(this._timeout),j.off(this._element.closest(ln),cn,this._hideModalHandler),this.tip&&this.tip.remove(),this._disposePopper(),super.dispose()}show(){if("none"===this._element.style.display)throw new Error("Please use show on visible elements");if(!this.isWithContent()||!this._isEnabled)return;const t=j.trigger(this._element,this.constructor.Event.SHOW),e=h(this._element),i=null===e?this._element.ownerDocument.documentElement.contains(this._element):e.contains(this._element);if(t.defaultPrevented||!i)return;"tooltip"===this.constructor.NAME&&this.tip&&this.getTitle()!==this.tip.querySelector(an).innerHTML&&(this._disposePopper(),this.tip.remove(),this.tip=null);const n=this.getTipElement(),s=(t=>{do{t+=Math.floor(1e6*Math.random())}while(document.getElementById(t));return t})(this.constructor.NAME);n.setAttribute("id",s),this._element.setAttribute("aria-describedby",s),this._config.animation&&n.classList.add(nn);const o="function"==typeof this._config.placement?this._config.placement.call(this,n,this._element):this._config.placement,r=this._getAttachment(o);this._addAttachmentClass(r);const{container:a}=this._config;H.set(n,this.constructor.DATA_KEY,this),this._element.ownerDocument.documentElement.contains(this.tip)||(a.append(n),j.trigger(this._element,this.constructor.Event.INSERTED)),this._popper?this._popper.update():this._popper=qe(this._element,n,this._getPopperConfig(r)),n.classList.add(sn);const l=this._resolvePossibleFunction(this._config.customClass);l&&n.classList.add(...l.split(" ")),"ontouchstart"in document.documentElement&&[].concat(...document.body.children).forEach((t=>{j.on(t,"mouseover",d)}));const c=this.tip.classList.contains(nn);this._queueCallback((()=>{const t=this._hoverState;this._hoverState=null,j.trigger(this._element,this.constructor.Event.SHOWN),t===rn&&this._leave(null,this)}),this.tip,c)}hide(){if(!this._popper)return;const t=this.getTipElement();if(j.trigger(this._element,this.constructor.Event.HIDE).defaultPrevented)return;t.classList.remove(sn),"ontouchstart"in document.documentElement&&[].concat(...document.body.children).forEach((t=>j.off(t,"mouseover",d))),this._activeTrigger.click=!1,this._activeTrigger.focus=!1,this._activeTrigger.hover=!1;const e=this.tip.classList.contains(nn);this._queueCallback((()=>{this._isWithActiveTrigger()||(this._hoverState!==on&&t.remove(),this._cleanTipClass(),this._element.removeAttribute("aria-describedby"),j.trigger(this._element,this.constructor.Event.HIDDEN),this._disposePopper())}),this.tip,e),this._hoverState=""}update(){null!==this._popper&&this._popper.update()}isWithContent(){return Boolean(this.getTitle())}getTipElement(){if(this.tip)return this.tip;const t=document.createElement("div");t.innerHTML=this._config.template;const e=t.children[0];return this.setContent(e),e.classList.remove(nn,sn),this.tip=e,this.tip}setContent(t){this._sanitizeAndSetContent(t,this.getTitle(),an)}_sanitizeAndSetContent(t,e,i){const n=V.findOne(i,t);e||!n?this.setElementContent(n,e):n.remove()}setElementContent(t,e){if(null!==t)return o(e)?(e=r(e),void(this._config.html?e.parentNode!==t&&(t.innerHTML="",t.append(e)):t.textContent=e.textContent)):void(this._config.html?(this._config.sanitize&&(e=Yi(e,this._config.allowList,this._config.sanitizeFn)),t.innerHTML=e):t.textContent=e)}getTitle(){const t=this._element.getAttribute("data-bs-original-title")||this._config.title;return this._resolvePossibleFunction(t)}updateAttachment(t){return"right"===t?"end":"left"===t?"start":t}_initializeOnDelegatedTarget(t,e){return e||this.constructor.getOrCreateInstance(t.delegateTarget,this._getDelegateConfig())}_getOffset(){const{offset:t}=this._config;return"string"==typeof t?t.split(",").map((t=>Number.parseInt(t,10))):"function"==typeof t?e=>t(e,this._element):t}_resolvePossibleFunction(t){return"function"==typeof t?t.call(this._element):t}_getPopperConfig(t){const e={placement:t,modifiers:[{name:"flip",options:{fallbackPlacements:this._config.fallbackPlacements}},{name:"offset",options:{offset:this._getOffset()}},{name:"preventOverflow",options:{boundary:this._config.boundary}},{name:"arrow",options:{element:`.${this.constructor.NAME}-arrow`}},{name:"onChange",enabled:!0,phase:"afterWrite",fn:t=>this._handlePopperPlacementChange(t)}],onFirstUpdate:t=>{t.options.placement!==t.placement&&this._handlePopperPlacementChange(t)}};return{...e,..."function"==typeof this._config.popperConfig?this._config.popperConfig(e):this._config.popperConfig}}_addAttachmentClass(t){this.getTipElement().classList.add(`${this._getBasicClassPrefix()}-${this.updateAttachment(t)}`)}_getAttachment(t){return Ji[t.toUpperCase()]}_setListeners(){this._config.trigger.split(" ").forEach((t=>{if("click"===t)j.on(this._element,this.constructor.Event.CLICK,this._config.selector,(t=>this.toggle(t)));else if("manual"!==t){const e=t===hn?this.constructor.Event.MOUSEENTER:this.constructor.Event.FOCUSIN,i=t===hn?this.constructor.Event.MOUSELEAVE:this.constructor.Event.FOCUSOUT;j.on(this._element,e,this._config.selector,(t=>this._enter(t))),j.on(this._element,i,this._config.selector,(t=>this._leave(t)))}})),this._hideModalHandler=()=>{this._element&&this.hide()},j.on(this._element.closest(ln),cn,this._hideModalHandler),this._config.selector?this._config={...this._config,trigger:"manual",selector:""}:this._fixTitle()}_fixTitle(){const t=this._element.getAttribute("title"),e=typeof this._element.getAttribute("data-bs-original-title");(t||"string"!==e)&&(this._element.setAttribute("data-bs-original-title",t||""),!t||this._element.getAttribute("aria-label")||this._element.textContent||this._element.setAttribute("aria-label",t),this._element.setAttribute("title",""))}_enter(t,e){e=this._initializeOnDelegatedTarget(t,e),t&&(e._activeTrigger["focusin"===t.type?dn:hn]=!0),e.getTipElement().classList.contains(sn)||e._hoverState===on?e._hoverState=on:(clearTimeout(e._timeout),e._hoverState=on,e._config.delay&&e._config.delay.show?e._timeout=setTimeout((()=>{e._hoverState===on&&e.show()}),e._config.delay.show):e.show())}_leave(t,e){e=this._initializeOnDelegatedTarget(t,e),t&&(e._activeTrigger["focusout"===t.type?dn:hn]=e._element.contains(t.relatedTarget)),e._isWithActiveTrigger()||(clearTimeout(e._timeout),e._hoverState=rn,e._config.delay&&e._config.delay.hide?e._timeout=setTimeout((()=>{e._hoverState===rn&&e.hide()}),e._config.delay.hide):e.hide())}_isWithActiveTrigger(){for(const t in this._activeTrigger)if(this._activeTrigger[t])return!0;return!1}_getConfig(t){const e=U.getDataAttributes(this._element);return Object.keys(e).forEach((t=>{Gi.has(t)&&delete e[t]})),(t={...this.constructor.Default,...e,..."object"==typeof t&&t?t:{}}).container=!1===t.container?document.body:r(t.container),"number"==typeof t.delay&&(t.delay={show:t.delay,hide:t.delay}),"number"==typeof t.title&&(t.title=t.title.toString()),"number"==typeof t.content&&(t.content=t.content.toString()),a(Qi,t,this.constructor.DefaultType),t.sanitize&&(t.template=Yi(t.template,t.allowList,t.sanitizeFn)),t}_getDelegateConfig(){const t={};for(const e in this._config)this.constructor.Default[e]!==this._config[e]&&(t[e]=this._config[e]);return t}_cleanTipClass(){const t=this.getTipElement(),e=new RegExp(`(^|\\s)${this._getBasicClassPrefix()}\\S+`,"g"),i=t.getAttribute("class").match(e);null!==i&&i.length>0&&i.map((t=>t.trim())).forEach((e=>t.classList.remove(e)))}_getBasicClassPrefix(){return"bs-tooltip"}_handlePopperPlacementChange(t){const{state:e}=t;e&&(this.tip=e.elements.popper,this._cleanTipClass(),this._addAttachmentClass(this._getAttachment(e.placement)))}_disposePopper(){this._popper&&(this._popper.destroy(),this._popper=null)}static jQueryInterface(t){return this.each((function(){const e=un.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}}g(un);const fn={...un.Default,placement:"right",offset:[0,8],trigger:"click",content:"",template:''},pn={...un.DefaultType,content:"(string|element|function)"},mn={HIDE:"hide.bs.popover",HIDDEN:"hidden.bs.popover",SHOW:"show.bs.popover",SHOWN:"shown.bs.popover",INSERTED:"inserted.bs.popover",CLICK:"click.bs.popover",FOCUSIN:"focusin.bs.popover",FOCUSOUT:"focusout.bs.popover",MOUSEENTER:"mouseenter.bs.popover",MOUSELEAVE:"mouseleave.bs.popover"};class gn extends un{static get Default(){return fn}static get NAME(){return"popover"}static get Event(){return mn}static get DefaultType(){return pn}isWithContent(){return this.getTitle()||this._getContent()}setContent(t){this._sanitizeAndSetContent(t,this.getTitle(),".popover-header"),this._sanitizeAndSetContent(t,this._getContent(),".popover-body")}_getContent(){return this._resolvePossibleFunction(this._config.content)}_getBasicClassPrefix(){return"bs-popover"}static jQueryInterface(t){return this.each((function(){const e=gn.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}}g(gn);const _n="scrollspy",bn={offset:10,method:"auto",target:""},vn={offset:"number",method:"string",target:"(string|element)"},yn="active",wn=".nav-link, .list-group-item, .dropdown-item",En="position";class An extends B{constructor(t,e){super(t),this._scrollElement="BODY"===this._element.tagName?window:this._element,this._config=this._getConfig(e),this._offsets=[],this._targets=[],this._activeTarget=null,this._scrollHeight=0,j.on(this._scrollElement,"scroll.bs.scrollspy",(()=>this._process())),this.refresh(),this._process()}static get Default(){return bn}static get NAME(){return _n}refresh(){const t=this._scrollElement===this._scrollElement.window?"offset":En,e="auto"===this._config.method?t:this._config.method,n=e===En?this._getScrollTop():0;this._offsets=[],this._targets=[],this._scrollHeight=this._getScrollHeight(),V.find(wn,this._config.target).map((t=>{const s=i(t),o=s?V.findOne(s):null;if(o){const t=o.getBoundingClientRect();if(t.width||t.height)return[U[e](o).top+n,s]}return null})).filter((t=>t)).sort(((t,e)=>t[0]-e[0])).forEach((t=>{this._offsets.push(t[0]),this._targets.push(t[1])}))}dispose(){j.off(this._scrollElement,".bs.scrollspy"),super.dispose()}_getConfig(t){return(t={...bn,...U.getDataAttributes(this._element),..."object"==typeof t&&t?t:{}}).target=r(t.target)||document.documentElement,a(_n,t,vn),t}_getScrollTop(){return this._scrollElement===window?this._scrollElement.pageYOffset:this._scrollElement.scrollTop}_getScrollHeight(){return this._scrollElement.scrollHeight||Math.max(document.body.scrollHeight,document.documentElement.scrollHeight)}_getOffsetHeight(){return this._scrollElement===window?window.innerHeight:this._scrollElement.getBoundingClientRect().height}_process(){const t=this._getScrollTop()+this._config.offset,e=this._getScrollHeight(),i=this._config.offset+e-this._getOffsetHeight();if(this._scrollHeight!==e&&this.refresh(),t>=i){const t=this._targets[this._targets.length-1];this._activeTarget!==t&&this._activate(t)}else{if(this._activeTarget&&t0)return this._activeTarget=null,void this._clear();for(let e=this._offsets.length;e--;)this._activeTarget!==this._targets[e]&&t>=this._offsets[e]&&(void 0===this._offsets[e+1]||t`${e}[data-bs-target="${t}"],${e}[href="${t}"]`)),i=V.findOne(e.join(","),this._config.target);i.classList.add(yn),i.classList.contains("dropdown-item")?V.findOne(".dropdown-toggle",i.closest(".dropdown")).classList.add(yn):V.parents(i,".nav, .list-group").forEach((t=>{V.prev(t,".nav-link, .list-group-item").forEach((t=>t.classList.add(yn))),V.prev(t,".nav-item").forEach((t=>{V.children(t,".nav-link").forEach((t=>t.classList.add(yn)))}))})),j.trigger(this._scrollElement,"activate.bs.scrollspy",{relatedTarget:t})}_clear(){V.find(wn,this._config.target).filter((t=>t.classList.contains(yn))).forEach((t=>t.classList.remove(yn)))}static jQueryInterface(t){return this.each((function(){const e=An.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}}j.on(window,"load.bs.scrollspy.data-api",(()=>{V.find('[data-bs-spy="scroll"]').forEach((t=>new An(t)))})),g(An);const Tn="active",On="fade",Cn="show",kn=".active",Ln=":scope > li > .active";class xn extends B{static get NAME(){return"tab"}show(){if(this._element.parentNode&&this._element.parentNode.nodeType===Node.ELEMENT_NODE&&this._element.classList.contains(Tn))return;let t;const e=n(this._element),i=this._element.closest(".nav, .list-group");if(i){const e="UL"===i.nodeName||"OL"===i.nodeName?Ln:kn;t=V.find(e,i),t=t[t.length-1]}const s=t?j.trigger(t,"hide.bs.tab",{relatedTarget:this._element}):null;if(j.trigger(this._element,"show.bs.tab",{relatedTarget:t}).defaultPrevented||null!==s&&s.defaultPrevented)return;this._activate(this._element,i);const o=()=>{j.trigger(t,"hidden.bs.tab",{relatedTarget:this._element}),j.trigger(this._element,"shown.bs.tab",{relatedTarget:t})};e?this._activate(e,e.parentNode,o):o()}_activate(t,e,i){const n=(!e||"UL"!==e.nodeName&&"OL"!==e.nodeName?V.children(e,kn):V.find(Ln,e))[0],s=i&&n&&n.classList.contains(On),o=()=>this._transitionComplete(t,n,i);n&&s?(n.classList.remove(Cn),this._queueCallback(o,t,!0)):o()}_transitionComplete(t,e,i){if(e){e.classList.remove(Tn);const t=V.findOne(":scope > .dropdown-menu .active",e.parentNode);t&&t.classList.remove(Tn),"tab"===e.getAttribute("role")&&e.setAttribute("aria-selected",!1)}t.classList.add(Tn),"tab"===t.getAttribute("role")&&t.setAttribute("aria-selected",!0),u(t),t.classList.contains(On)&&t.classList.add(Cn);let n=t.parentNode;if(n&&"LI"===n.nodeName&&(n=n.parentNode),n&&n.classList.contains("dropdown-menu")){const e=t.closest(".dropdown");e&&V.find(".dropdown-toggle",e).forEach((t=>t.classList.add(Tn))),t.setAttribute("aria-expanded",!0)}i&&i()}static jQueryInterface(t){return this.each((function(){const e=xn.getOrCreateInstance(this);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}}j.on(document,"click.bs.tab.data-api",'[data-bs-toggle="tab"], [data-bs-toggle="pill"], [data-bs-toggle="list"]',(function(t){["A","AREA"].includes(this.tagName)&&t.preventDefault(),c(this)||xn.getOrCreateInstance(this).show()})),g(xn);const Dn="toast",Sn="hide",Nn="show",In="showing",Pn={animation:"boolean",autohide:"boolean",delay:"number"},jn={animation:!0,autohide:!0,delay:5e3};class Mn extends B{constructor(t,e){super(t),this._config=this._getConfig(e),this._timeout=null,this._hasMouseInteraction=!1,this._hasKeyboardInteraction=!1,this._setListeners()}static get DefaultType(){return Pn}static get Default(){return jn}static get NAME(){return Dn}show(){j.trigger(this._element,"show.bs.toast").defaultPrevented||(this._clearTimeout(),this._config.animation&&this._element.classList.add("fade"),this._element.classList.remove(Sn),u(this._element),this._element.classList.add(Nn),this._element.classList.add(In),this._queueCallback((()=>{this._element.classList.remove(In),j.trigger(this._element,"shown.bs.toast"),this._maybeScheduleHide()}),this._element,this._config.animation))}hide(){this._element.classList.contains(Nn)&&(j.trigger(this._element,"hide.bs.toast").defaultPrevented||(this._element.classList.add(In),this._queueCallback((()=>{this._element.classList.add(Sn),this._element.classList.remove(In),this._element.classList.remove(Nn),j.trigger(this._element,"hidden.bs.toast")}),this._element,this._config.animation)))}dispose(){this._clearTimeout(),this._element.classList.contains(Nn)&&this._element.classList.remove(Nn),super.dispose()}_getConfig(t){return t={...jn,...U.getDataAttributes(this._element),..."object"==typeof t&&t?t:{}},a(Dn,t,this.constructor.DefaultType),t}_maybeScheduleHide(){this._config.autohide&&(this._hasMouseInteraction||this._hasKeyboardInteraction||(this._timeout=setTimeout((()=>{this.hide()}),this._config.delay)))}_onInteraction(t,e){switch(t.type){case"mouseover":case"mouseout":this._hasMouseInteraction=e;break;case"focusin":case"focusout":this._hasKeyboardInteraction=e}if(e)return void this._clearTimeout();const i=t.relatedTarget;this._element===i||this._element.contains(i)||this._maybeScheduleHide()}_setListeners(){j.on(this._element,"mouseover.bs.toast",(t=>this._onInteraction(t,!0))),j.on(this._element,"mouseout.bs.toast",(t=>this._onInteraction(t,!1))),j.on(this._element,"focusin.bs.toast",(t=>this._onInteraction(t,!0))),j.on(this._element,"focusout.bs.toast",(t=>this._onInteraction(t,!1)))}_clearTimeout(){clearTimeout(this._timeout),this._timeout=null}static jQueryInterface(t){return this.each((function(){const e=Mn.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}return R(Mn),g(Mn),{Alert:W,Button:z,Carousel:st,Collapse:pt,Dropdown:hi,Modal:Hi,Offcanvas:Fi,Popover:gn,ScrollSpy:An,Tab:xn,Toast:Mn,Tooltip:un}})); +//# sourceMappingURL=bootstrap.bundle.min.js.map \ No newline at end of file diff --git a/static/css/images/tidy-back-01.svg b/docs/site_libs/bootstrap/images/tidy-back-01.svg similarity index 100% rename from static/css/images/tidy-back-01.svg rename to docs/site_libs/bootstrap/images/tidy-back-01.svg diff --git a/static/css/images/tidy-back-02.svg b/docs/site_libs/bootstrap/images/tidy-back-02.svg old mode 100755 new mode 100644 similarity index 100% rename from static/css/images/tidy-back-02.svg rename to docs/site_libs/bootstrap/images/tidy-back-02.svg diff --git a/static/css/images/tidy-back-03.svg b/docs/site_libs/bootstrap/images/tidy-back-03.svg similarity index 100% rename from static/css/images/tidy-back-03.svg rename to docs/site_libs/bootstrap/images/tidy-back-03.svg diff --git a/static/css/images/tidy-packages-back-01.svg b/docs/site_libs/bootstrap/images/tidy-packages-back-01.svg old mode 100755 new mode 100644 similarity index 100% rename from static/css/images/tidy-packages-back-01.svg rename to docs/site_libs/bootstrap/images/tidy-packages-back-01.svg diff --git a/static/css/images/tidy-packages-back-02.svg b/docs/site_libs/bootstrap/images/tidy-packages-back-02.svg old mode 100755 new mode 100644 similarity index 100% rename from static/css/images/tidy-packages-back-02.svg rename to docs/site_libs/bootstrap/images/tidy-packages-back-02.svg diff --git a/static/css/images/tidy-packages-back-03.svg b/docs/site_libs/bootstrap/images/tidy-packages-back-03.svg old mode 100755 new mode 100644 similarity index 100% rename from static/css/images/tidy-packages-back-03.svg rename to docs/site_libs/bootstrap/images/tidy-packages-back-03.svg diff --git a/docs/site_libs/clipboard/clipboard.min.js b/docs/site_libs/clipboard/clipboard.min.js new file mode 100644 index 00000000..1103f811 --- /dev/null +++ b/docs/site_libs/clipboard/clipboard.min.js @@ -0,0 +1,7 @@ +/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1 123,456,666.7890 +var markInterval = function(d, digits, interval, mark, decMark, precision) { + x = precision ? d.toPrecision(digits) : d.toFixed(digits); + if (!/^-?[\d.]+$/.test(x)) return x; + var xv = x.split('.'); + if (xv.length > 2) return x; // should have at most one decimal point + xv[0] = xv[0].replace(new RegExp('\\B(?=(\\d{' + interval + '})+(?!\\d))', 'g'), mark); + return xv.join(decMark); +}; + +DTWidget.formatCurrency = function(data, currency, digits, interval, mark, decMark, before, zeroPrint) { + var d = parseFloat(data); + if (isNaN(d)) return ''; + if (zeroPrint !== null && d === 0.0) return zeroPrint; + var res = markInterval(d, digits, interval, mark, decMark); + res = before ? (/^-/.test(res) ? '-' + currency + res.replace(/^-/, '') : currency + res) : + res + currency; + return res; +}; + +DTWidget.formatString = function(data, prefix, suffix) { + var d = data; + if (d === null) return ''; + return prefix + d + suffix; +}; + +DTWidget.formatPercentage = function(data, digits, interval, mark, decMark, zeroPrint) { + var d = parseFloat(data); + if (isNaN(d)) return ''; + if (zeroPrint !== null && d === 0.0) return zeroPrint; + return markInterval(d * 100, digits, interval, mark, decMark) + '%'; +}; + +DTWidget.formatRound = function(data, digits, interval, mark, decMark, zeroPrint) { + var d = parseFloat(data); + if (isNaN(d)) return ''; + if (zeroPrint !== null && d === 0.0) return zeroPrint; + return markInterval(d, digits, interval, mark, decMark); +}; + +DTWidget.formatSignif = function(data, digits, interval, mark, decMark, zeroPrint) { + var d = parseFloat(data); + if (isNaN(d)) return ''; + if (zeroPrint !== null && d === 0.0) return zeroPrint; + return markInterval(d, digits, interval, mark, decMark, true); +}; + +DTWidget.formatDate = function(data, method, params) { + var d = data; + if (d === null) return ''; + // (new Date('2015-10-28')).toDateString() may return 2015-10-27 because the + // actual time created could be like 'Tue Oct 27 2015 19:00:00 GMT-0500 (CDT)', + // i.e. the date-only string is treated as UTC time instead of local time + if ((method === 'toDateString' || method === 'toLocaleDateString') && /^\d{4,}\D\d{2}\D\d{2}$/.test(d)) { + d = d.split(/\D/); + d = new Date(d[0], d[1] - 1, d[2]); + } else { + d = new Date(d); + } + return d[method].apply(d, params); +}; + +window.DTWidget = DTWidget; + +// A helper function to update the properties of existing filters +var setFilterProps = function(td, props) { + // Update enabled/disabled state + var $input = $(td).find('input').first(); + var searchable = $input.data('searchable'); + $input.prop('disabled', !searchable || props.disabled); + + // Based on the filter type, set its new values + var type = td.getAttribute('data-type'); + if (['factor', 'logical'].includes(type)) { + // Reformat the new dropdown options for use with selectize + var new_vals = props.params.options.map(function(item) { + return { text: item, value: item }; + }); + + // Find the selectize object + var dropdown = $(td).find('.selectized').eq(0)[0].selectize; + + // Note the current values + var old_vals = dropdown.getValue(); + + // Remove the existing values + dropdown.clearOptions(); + + // Add the new options + dropdown.addOption(new_vals); + + // Preserve the existing values + dropdown.setValue(old_vals); + + } else if (['number', 'integer', 'date', 'time'].includes(type)) { + // Apply internal scaling to new limits. Updating scale not yet implemented. + var slider = $(td).find('.noUi-target').eq(0); + var scale = Math.pow(10, Math.max(0, +slider.data('scale') || 0)); + var new_vals = [props.params.min * scale, props.params.max * scale]; + + // Note what the new limits will be just for this filter + var new_lims = new_vals.slice(); + + // Determine the current values and limits + var old_vals = slider.val().map(Number); + var old_lims = slider.noUiSlider('options').range; + old_lims = [old_lims.min, old_lims.max]; + + // Preserve the current values if filters have been applied; otherwise, apply no filtering + if (old_vals[0] != old_lims[0]) { + new_vals[0] = Math.max(old_vals[0], new_vals[0]); + } + + if (old_vals[1] != old_lims[1]) { + new_vals[1] = Math.min(old_vals[1], new_vals[1]); + } + + // Update the endpoints of the slider + slider.noUiSlider({ + start: new_vals, + range: {'min': new_lims[0], 'max': new_lims[1]} + }, true); + } +}; + +var transposeArray2D = function(a) { + return a.length === 0 ? a : HTMLWidgets.transposeArray2D(a); +}; + +var crosstalkPluginsInstalled = false; + +function maybeInstallCrosstalkPlugins() { + if (crosstalkPluginsInstalled) + return; + crosstalkPluginsInstalled = true; + + $.fn.dataTable.ext.afnFiltering.push( + function(oSettings, aData, iDataIndex) { + var ctfilter = oSettings.nTable.ctfilter; + if (ctfilter && !ctfilter[iDataIndex]) + return false; + + var ctselect = oSettings.nTable.ctselect; + if (ctselect && !ctselect[iDataIndex]) + return false; + + return true; + } + ); +} + +HTMLWidgets.widget({ + name: "datatables", + type: "output", + renderOnNullValue: true, + initialize: function(el, width, height) { + // in order that the type=number inputs return a number + $.valHooks.number = { + get: function(el) { + var value = parseFloat(el.value); + return isNaN(value) ? "" : value; + } + }; + $(el).html(' '); + return { + data: null, + ctfilterHandle: new crosstalk.FilterHandle(), + ctfilterSubscription: null, + ctselectHandle: new crosstalk.SelectionHandle(), + ctselectSubscription: null + }; + }, + renderValue: function(el, data, instance) { + if (el.offsetWidth === 0 || el.offsetHeight === 0) { + instance.data = data; + return; + } + instance.data = null; + var $el = $(el); + $el.empty(); + + if (data === null) { + $el.append(' '); + // clear previous Shiny inputs (if any) + for (var i in instance.clearInputs) instance.clearInputs[i](); + instance.clearInputs = {}; + return; + } + + var crosstalkOptions = data.crosstalkOptions; + if (!crosstalkOptions) crosstalkOptions = { + 'key': null, 'group': null + }; + if (crosstalkOptions.group) { + maybeInstallCrosstalkPlugins(); + instance.ctfilterHandle.setGroup(crosstalkOptions.group); + instance.ctselectHandle.setGroup(crosstalkOptions.group); + } + + // if we are in the viewer then we always want to fillContainer and + // and autoHideNavigation (unless the user has explicitly set these) + if (window.HTMLWidgets.viewerMode) { + if (!data.hasOwnProperty("fillContainer")) + data.fillContainer = true; + if (!data.hasOwnProperty("autoHideNavigation")) + data.autoHideNavigation = true; + } + + // propagate fillContainer to instance (so we have it in resize) + instance.fillContainer = data.fillContainer; + + var cells = data.data; + + if (cells instanceof Array) cells = transposeArray2D(cells); + + $el.append(data.container); + var $table = $el.find('table'); + if (data.class) $table.addClass(data.class); + if (data.caption) $table.prepend(data.caption); + + if (!data.selection) data.selection = { + mode: 'none', selected: null, target: 'row', selectable: null + }; + if (HTMLWidgets.shinyMode && data.selection.mode !== 'none' && + data.selection.target === 'row+column') { + if ($table.children('tfoot').length === 0) { + $table.append($('')); + $table.find('thead tr').clone().appendTo($table.find('tfoot')); + } + } + + // column filters + var filterRow; + switch (data.filter) { + case 'top': + $table.children('thead').append(data.filterHTML); + filterRow = $table.find('thead tr:last td'); + break; + case 'bottom': + if ($table.children('tfoot').length === 0) { + $table.append($('')); + } + $table.children('tfoot').prepend(data.filterHTML); + filterRow = $table.find('tfoot tr:first td'); + break; + } + + var options = { searchDelay: 1000 }; + if (cells !== null) $.extend(options, { + data: cells + }); + + // options for fillContainer + var bootstrapActive = typeof($.fn.popover) != 'undefined'; + if (instance.fillContainer) { + + // force scrollX/scrollY and turn off autoWidth + options.scrollX = true; + options.scrollY = "100px"; // can be any value, we'll adjust below + + // if we aren't paginating then move around the info/filter controls + // to save space at the bottom and rephrase the info callback + if (data.options.paging === false) { + + // we know how to do this cleanly for bootstrap, not so much + // for other themes/layouts + if (bootstrapActive) { + options.dom = "<'row'<'col-sm-4'i><'col-sm-8'f>>" + + "<'row'<'col-sm-12'tr>>"; + } + + options.fnInfoCallback = function(oSettings, iStart, iEnd, + iMax, iTotal, sPre) { + return Number(iTotal).toLocaleString() + " records"; + }; + } + } + + // auto hide navigation if requested + // Note, this only works on client-side processing mode as on server-side, + // cells (data.data) is null; In addition, we require the pageLength option + // being provided explicitly to enable this. Despite we may be able to deduce + // the default value of pageLength, it may complicate things so we'd rather + // put this responsiblity to users and warn them on the R side. + if (data.autoHideNavigation === true && data.options.paging !== false) { + // strip all nav if length >= cells + if ((cells instanceof Array) && data.options.pageLength >= cells.length) + options.dom = bootstrapActive ? "<'row'<'col-sm-12'tr>>" : "t"; + // alternatively lean things out for flexdashboard mobile portrait + else if (bootstrapActive && window.FlexDashboard && window.FlexDashboard.isMobilePhone()) + options.dom = "<'row'<'col-sm-12'f>>" + + "<'row'<'col-sm-12'tr>>" + + "<'row'<'col-sm-12'p>>"; + } + + $.extend(true, options, data.options || {}); + + var searchCols = options.searchCols; + if (searchCols) { + searchCols = searchCols.map(function(x) { + return x === null ? '' : x.search; + }); + // FIXME: this means I don't respect the escapeRegex setting + delete options.searchCols; + } + + // server-side processing? + var server = options.serverSide === true; + + // use the dataSrc function to pre-process JSON data returned from R + var DT_rows_all = [], DT_rows_current = []; + if (server && HTMLWidgets.shinyMode && typeof options.ajax === 'object' && + /^session\/[\da-z]+\/dataobj/.test(options.ajax.url) && !options.ajax.dataSrc) { + options.ajax.dataSrc = function(json) { + DT_rows_all = $.makeArray(json.DT_rows_all); + DT_rows_current = $.makeArray(json.DT_rows_current); + var data = json.data; + if (!colReorderEnabled()) return data; + var table = $table.DataTable(), order = table.colReorder.order(), flag = true, i, j, row; + for (i = 0; i < order.length; ++i) if (order[i] !== i) flag = false; + if (flag) return data; + for (i = 0; i < data.length; ++i) { + row = data[i].slice(); + for (j = 0; j < order.length; ++j) data[i][j] = row[order[j]]; + } + return data; + }; + } + + var thiz = this; + if (instance.fillContainer) $table.on('init.dt', function(e) { + thiz.fillAvailableHeight(el, $(el).innerHeight()); + }); + // If the page contains serveral datatables and one of which enables colReorder, + // the table.colReorder.order() function will exist but throws error when called. + // So it seems like the only way to know if colReorder is enabled or not is to + // check the options. + var colReorderEnabled = function() { return "colReorder" in options; }; + var table = $table.DataTable(options); + $el.data('datatable', table); + + // Unregister previous Crosstalk event subscriptions, if they exist + if (instance.ctfilterSubscription) { + instance.ctfilterHandle.off("change", instance.ctfilterSubscription); + instance.ctfilterSubscription = null; + } + if (instance.ctselectSubscription) { + instance.ctselectHandle.off("change", instance.ctselectSubscription); + instance.ctselectSubscription = null; + } + + if (!crosstalkOptions.group) { + $table[0].ctfilter = null; + $table[0].ctselect = null; + } else { + var key = crosstalkOptions.key; + function keysToMatches(keys) { + if (!keys) { + return null; + } else { + var selectedKeys = {}; + for (var i = 0; i < keys.length; i++) { + selectedKeys[keys[i]] = true; + } + var matches = {}; + for (var j = 0; j < key.length; j++) { + if (selectedKeys[key[j]]) + matches[j] = true; + } + return matches; + } + } + + function applyCrosstalkFilter(e) { + $table[0].ctfilter = keysToMatches(e.value); + table.draw(); + } + instance.ctfilterSubscription = instance.ctfilterHandle.on("change", applyCrosstalkFilter); + applyCrosstalkFilter({value: instance.ctfilterHandle.filteredKeys}); + + function applyCrosstalkSelection(e) { + if (e.sender !== instance.ctselectHandle) { + table + .rows('.' + selClass, {search: 'applied'}) + .nodes() + .to$() + .removeClass(selClass); + if (selectedRows) + changeInput('rows_selected', selectedRows(), void 0, true); + } + + if (e.sender !== instance.ctselectHandle && e.value && e.value.length) { + var matches = keysToMatches(e.value); + + // persistent selection with plotly (& leaflet) + var ctOpts = crosstalk.var("plotlyCrosstalkOpts").get() || {}; + if (ctOpts.persistent === true) { + var matches = $.extend(matches, $table[0].ctselect); + } + + $table[0].ctselect = matches; + table.draw(); + } else { + if ($table[0].ctselect) { + $table[0].ctselect = null; + table.draw(); + } + } + } + instance.ctselectSubscription = instance.ctselectHandle.on("change", applyCrosstalkSelection); + // TODO: This next line doesn't seem to work when renderDataTable is used + applyCrosstalkSelection({value: instance.ctselectHandle.value}); + } + + var inArray = function(val, array) { + return $.inArray(val, $.makeArray(array)) > -1; + }; + + // search the i-th column + var searchColumn = function(i, value) { + var regex = false, ci = true; + if (options.search) { + regex = options.search.regex, + ci = options.search.caseInsensitive !== false; + } + return table.column(i).search(value, regex, !regex, ci); + }; + + if (data.filter !== 'none') { + + filterRow.each(function(i, td) { + + var $td = $(td), type = $td.data('type'), filter; + var $input = $td.children('div').first().children('input'); + var disabled = $input.prop('disabled'); + var searchable = table.settings()[0].aoColumns[i].bSearchable; + $input.prop('disabled', !searchable || disabled); + $input.data('searchable', searchable); // for updating later + $input.on('input blur', function() { + $input.next('span').toggle(Boolean($input.val())); + }); + // Bootstrap sets pointer-events to none and we won't be able to click + // the clear button + $input.next('span').css('pointer-events', 'auto').hide().click(function() { + $(this).hide().prev('input').val('').trigger('input').focus(); + }); + var searchCol; // search string for this column + if (searchCols && searchCols[i]) { + searchCol = searchCols[i]; + $input.val(searchCol).trigger('input'); + } + var $x = $td.children('div').last(); + + // remove the overflow: hidden attribute of the scrollHead + // (otherwise the scrolling table body obscures the filters) + // The workaround and the discussion from + // https://github.com/rstudio/DT/issues/554#issuecomment-518007347 + // Otherwise the filter selection will not be anchored to the values + // when the columns number is many and scrollX is enabled. + var scrollHead = $(el).find('.dataTables_scrollHead,.dataTables_scrollFoot'); + var cssOverflowHead = scrollHead.css('overflow'); + var scrollBody = $(el).find('.dataTables_scrollBody'); + var cssOverflowBody = scrollBody.css('overflow'); + var scrollTable = $(el).find('.dataTables_scroll'); + var cssOverflowTable = scrollTable.css('overflow'); + if (cssOverflowHead === 'hidden') { + $x.on('show hide', function(e) { + if (e.type === 'show') { + scrollHead.css('overflow', 'visible'); + scrollBody.css('overflow', 'visible'); + scrollTable.css('overflow-x', 'scroll'); + } else { + scrollHead.css('overflow', cssOverflowHead); + scrollBody.css('overflow', cssOverflowBody); + scrollTable.css('overflow-x', cssOverflowTable); + } + }); + $x.css('z-index', 25); + } + + if (inArray(type, ['factor', 'logical'])) { + $input.on({ + click: function() { + $input.parent().hide(); $x.show().trigger('show'); filter[0].selectize.focus(); + }, + input: function() { + if ($input.val() === '') filter[0].selectize.setValue([]); + } + }); + var $input2 = $x.children('select'); + filter = $input2.selectize({ + options: $input2.data('options').map(function(v, i) { + return ({text: v, value: v}); + }), + plugins: ['remove_button'], + hideSelected: true, + onChange: function(value) { + if (value === null) value = []; // compatibility with jQuery 3.0 + $input.val(value.length ? JSON.stringify(value) : ''); + if (value.length) $input.trigger('input'); + $input.attr('title', $input.val()); + if (server) { + table.column(i).search(value.length ? JSON.stringify(value) : '').draw(); + return; + } + // turn off filter if nothing selected + $td.data('filter', value.length > 0); + table.draw(); // redraw table, and filters will be applied + } + }); + if (searchCol) filter[0].selectize.setValue(JSON.parse(searchCol)); + filter[0].selectize.on('blur', function() { + $x.hide().trigger('hide'); $input.parent().show(); $input.trigger('blur'); + }); + filter.next('div').css('margin-bottom', 'auto'); + } else if (type === 'character') { + var fun = function() { + searchColumn(i, $input.val()).draw(); + }; + if (server) { + fun = $.fn.dataTable.util.throttle(fun, options.searchDelay); + } + $input.on('input', fun); + } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { + var $x0 = $x; + $x = $x0.children('div').first(); + $x0.css({ + 'background-color': '#fff', + 'border': '1px #ddd solid', + 'border-radius': '4px', + 'padding': data.vertical ? '35px 20px': '20px 20px 10px 20px' + }); + var $spans = $x0.children('span').css({ + 'margin-top': data.vertical ? '0' : '10px', + 'white-space': 'nowrap' + }); + var $span1 = $spans.first(), $span2 = $spans.last(); + var r1 = +$x.data('min'), r2 = +$x.data('max'); + // when the numbers are too small or have many decimal places, the + // slider may have numeric precision problems (#150) + var scale = Math.pow(10, Math.max(0, +$x.data('scale') || 0)); + r1 = Math.round(r1 * scale); r2 = Math.round(r2 * scale); + var scaleBack = function(x, scale) { + if (scale === 1) return x; + var d = Math.round(Math.log(scale) / Math.log(10)); + // to avoid problems like 3.423/100 -> 0.034230000000000003 + return (x / scale).toFixed(d); + }; + var slider_min = function() { + return filter.noUiSlider('options').range.min; + }; + var slider_max = function() { + return filter.noUiSlider('options').range.max; + }; + $input.on({ + focus: function() { + $x0.show().trigger('show'); + // first, make sure the slider div leaves at least 20px between + // the two (slider value) span's + $x0.width(Math.max(160, $span1.outerWidth() + $span2.outerWidth() + 20)); + // then, if the input is really wide or slider is vertical, + // make the slider the same width as the input + if ($x0.outerWidth() < $input.outerWidth() || data.vertical) { + $x0.outerWidth($input.outerWidth()); + } + // make sure the slider div does not reach beyond the right margin + if ($(window).width() < $x0.offset().left + $x0.width()) { + $x0.offset({ + 'left': $input.offset().left + $input.outerWidth() - $x0.outerWidth() + }); + } + }, + blur: function() { + $x0.hide().trigger('hide'); + }, + input: function() { + if ($input.val() === '') filter.val([slider_min(), slider_max()]); + }, + change: function() { + var v = $input.val().replace(/\s/g, ''); + if (v === '') return; + v = v.split('...'); + if (v.length !== 2) { + $input.parent().addClass('has-error'); + return; + } + if (v[0] === '') v[0] = slider_min(); + if (v[1] === '') v[1] = slider_max(); + $input.parent().removeClass('has-error'); + // treat date as UTC time at midnight + var strTime = function(x) { + var s = type === 'date' ? 'T00:00:00Z' : ''; + var t = new Date(x + s).getTime(); + // add 10 minutes to date since it does not hurt the date, and + // it helps avoid the tricky floating point arithmetic problems, + // e.g. sometimes the date may be a few milliseconds earlier + // than the midnight due to precision problems in noUiSlider + return type === 'date' ? t + 3600000 : t; + }; + if (inArray(type, ['date', 'time'])) { + v[0] = strTime(v[0]); + v[1] = strTime(v[1]); + } + if (v[0] != slider_min()) v[0] *= scale; + if (v[1] != slider_max()) v[1] *= scale; + filter.val(v); + } + }); + var formatDate = function(d, isoFmt) { + d = scaleBack(d, scale); + if (type === 'number') return d; + if (type === 'integer') return parseInt(d); + var x = new Date(+d); + var fmt = ('filterDateFmt' in data) ? data.filterDateFmt[i] : undefined; + if (fmt !== undefined && isoFmt === false) return x[fmt.method].apply(x, fmt.params); + if (type === 'date') { + var pad0 = function(x) { + return ('0' + x).substr(-2, 2); + }; + return x.getUTCFullYear() + '-' + pad0(1 + x.getUTCMonth()) + + '-' + pad0(x.getUTCDate()); + } else { + return x.toISOString(); + } + }; + var opts = type === 'date' ? { step: 60 * 60 * 1000 } : + type === 'integer' ? { step: 1 } : {}; + + opts.orientation = data.vertical ? 'vertical': 'horizontal'; + opts.direction = data.vertical ? 'rtl': 'ltr'; + + filter = $x.noUiSlider($.extend({ + start: [r1, r2], + range: {min: r1, max: r2}, + connect: true + }, opts)); + if (scale > 1) (function() { + var t1 = r1, t2 = r2; + var val = filter.val(); + while (val[0] > r1 || val[1] < r2) { + if (val[0] > r1) { + t1 -= val[0] - r1; + } + if (val[1] < r2) { + t2 += r2 - val[1]; + } + filter = $x.noUiSlider($.extend({ + start: [t1, t2], + range: {min: t1, max: t2}, + connect: true + }, opts), true); + val = filter.val(); + } + r1 = t1; r2 = t2; + })(); + var updateSliderText = function(v1, v2) { + $span1.text(formatDate(v1, false)); $span2.text(formatDate(v2, false)); + }; + updateSliderText(r1, r2); + var updateSlider = function(e) { + var val = filter.val(); + // turn off filter if in full range + $td.data('filter', val[0] > slider_min() || val[1] < slider_max()); + var v1 = formatDate(val[0]), v2 = formatDate(val[1]), ival; + if ($td.data('filter')) { + ival = v1 + ' ... ' + v2; + $input.attr('title', ival).val(ival).trigger('input'); + } else { + $input.attr('title', '').val(''); + } + updateSliderText(val[0], val[1]); + if (e.type === 'slide') return; // no searching when sliding only + if (server) { + table.column(i).search($td.data('filter') ? ival : '').draw(); + return; + } + table.draw(); + }; + filter.on({ + set: updateSlider, + slide: updateSlider + }); + } + + // server-side processing will be handled by R (or whatever server + // language you use); the following code is only needed for client-side + // processing + if (server) { + // if a search string has been pre-set, search now + if (searchCol) searchColumn(i, searchCol).draw(); + return; + } + + var customFilter = function(settings, data, dataIndex) { + // there is no way to attach a search function to a specific table, + // and we need to make sure a global search function is not applied to + // all tables (i.e. a range filter in a previous table should not be + // applied to the current table); we use the settings object to + // determine if we want to perform searching on the current table, + // since settings.sTableId will be different to different tables + if (table.settings()[0] !== settings) return true; + // no filter on this column or no need to filter this column + if (typeof filter === 'undefined' || !$td.data('filter')) return true; + + var r = filter.val(), v, r0, r1; + var i_data = function(i) { + if (!colReorderEnabled()) return i; + var order = table.colReorder.order(), k; + for (k = 0; k < order.length; ++k) if (order[k] === i) return k; + return i; // in theory it will never be here... + } + v = data[i_data(i)]; + if (type === 'number' || type === 'integer') { + v = parseFloat(v); + // how to handle NaN? currently exclude these rows + if (isNaN(v)) return(false); + r0 = parseFloat(scaleBack(r[0], scale)) + r1 = parseFloat(scaleBack(r[1], scale)); + if (v >= r0 && v <= r1) return true; + } else if (type === 'date' || type === 'time') { + v = new Date(v); + r0 = new Date(r[0] / scale); r1 = new Date(r[1] / scale); + if (v >= r0 && v <= r1) return true; + } else if (type === 'factor') { + if (r.length === 0 || inArray(v, r)) return true; + } else if (type === 'logical') { + if (r.length === 0) return true; + if (inArray(v === '' ? 'na' : v, r)) return true; + } + return false; + }; + + $.fn.dataTable.ext.search.push(customFilter); + + // search for the preset search strings if it is non-empty + if (searchCol) { + if (inArray(type, ['factor', 'logical'])) { + filter[0].selectize.setValue(JSON.parse(searchCol)); + } else if (type === 'character') { + $input.trigger('input'); + } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { + $input.trigger('change'); + } + } + + }); + + } + + // highlight search keywords + var highlight = function() { + var body = $(table.table().body()); + // removing the old highlighting first + body.unhighlight(); + + // don't highlight the "not found" row, so we get the rows using the api + if (table.rows({ filter: 'applied' }).data().length === 0) return; + // highlight global search keywords + body.highlight($.trim(table.search()).split(/\s+/)); + // then highlight keywords from individual column filters + if (filterRow) filterRow.each(function(i, td) { + var $td = $(td), type = $td.data('type'); + if (type !== 'character') return; + var $input = $td.children('div').first().children('input'); + var column = table.column(i).nodes().to$(), + val = $.trim($input.val()); + if (type !== 'character' || val === '') return; + column.highlight(val.split(/\s+/)); + }); + }; + + if (options.searchHighlight) { + table + .on('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth', highlight) + .on('destroy', function() { + // remove event handler + table.off('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth'); + }); + + // Set the option for escaping regex characters in our search string. This will be used + // for all future matching. + jQuery.fn.highlight.options.escapeRegex = (!options.search || !options.search.regex); + + // initial highlight for state saved conditions and initial states + highlight(); + } + + // run the callback function on the table instance + if (typeof data.callback === 'function') data.callback(table); + + // double click to edit the cell, row, column, or all cells + if (data.editable) table.on('dblclick.dt', 'tbody td', function(e) { + // only bring up the editor when the cell itself is dbclicked, and ignore + // other dbclick events bubbled up (e.g. from the ) + if (e.target !== this) return; + var target = [], immediate = false; + switch (data.editable.target) { + case 'cell': + target = [this]; + immediate = true; // edit will take effect immediately + break; + case 'row': + target = table.cells(table.cell(this).index().row, '*').nodes(); + break; + case 'column': + target = table.cells('*', table.cell(this).index().column).nodes(); + break; + case 'all': + target = table.cells().nodes(); + break; + default: + throw 'The editable parameter must be "cell", "row", "column", or "all"'; + } + var disableCols = data.editable.disable ? data.editable.disable.columns : null; + var numericCols = data.editable.numeric; + var areaCols = data.editable.area; + for (var i = 0; i < target.length; i++) { + (function(cell, current) { + var $cell = $(cell), html = $cell.html(); + var _cell = table.cell(cell), value = _cell.data(), index = _cell.index().column; + var $input; + if (inArray(index, numericCols)) { + $input = $(''); + } else if (inArray(index, areaCols)) { + $input = $(''); + } else { + $input = $(''); + } + if (!immediate) { + $cell.data('input', $input).data('html', html); + $input.attr('title', 'Hit Ctrl+Enter to finish editing, or Esc to cancel'); + } + $input.val(value); + if (inArray(index, disableCols)) { + $input.attr('readonly', '').css('filter', 'invert(25%)'); + } + $cell.empty().append($input); + if (cell === current) $input.focus(); + $input.css('width', '100%'); + + if (immediate) $input.on('blur', function(e) { + var valueNew = $input.val(); + if (valueNew !== value) { + _cell.data(valueNew); + if (HTMLWidgets.shinyMode) { + changeInput('cell_edit', [cellInfo(cell)], 'DT.cellInfo', null, {priority: 'event'}); + } + // for server-side processing, users have to call replaceData() to update the table + if (!server) table.draw(false); + } else { + $cell.html(html); + } + }).on('keyup', function(e) { + // hit Escape to cancel editing + if (e.keyCode === 27) $input.trigger('blur'); + }); + + // bulk edit (row, column, or all) + if (!immediate) $input.on('keyup', function(e) { + var removeInput = function($cell, restore) { + $cell.data('input').remove(); + if (restore) $cell.html($cell.data('html')); + } + if (e.keyCode === 27) { + for (var i = 0; i < target.length; i++) { + removeInput($(target[i]), true); + } + } else if (e.keyCode === 13 && e.ctrlKey) { + // Ctrl + Enter + var cell, $cell, _cell, cellData = []; + for (var i = 0; i < target.length; i++) { + cell = target[i]; $cell = $(cell); _cell = table.cell(cell); + _cell.data($cell.data('input').val()); + HTMLWidgets.shinyMode && cellData.push(cellInfo(cell)); + removeInput($cell, false); + } + if (HTMLWidgets.shinyMode) { + changeInput('cell_edit', cellData, 'DT.cellInfo', null, {priority: "event"}); + } + if (!server) table.draw(false); + } + }); + })(target[i], this); + } + }); + + // interaction with shiny + if (!HTMLWidgets.shinyMode && !crosstalkOptions.group) return; + + var methods = {}; + var shinyData = {}; + + methods.updateCaption = function(caption) { + if (!caption) return; + $table.children('caption').replaceWith(caption); + } + + // register clear functions to remove input values when the table is removed + instance.clearInputs = {}; + + var changeInput = function(id, value, type, noCrosstalk, opts) { + var event = id; + id = el.id + '_' + id; + if (type) id = id + ':' + type; + // do not update if the new value is the same as old value + if (event !== 'cell_edit' && !/_clicked$/.test(event) && shinyData.hasOwnProperty(id) && shinyData[id] === JSON.stringify(value)) + return; + shinyData[id] = JSON.stringify(value); + if (HTMLWidgets.shinyMode && Shiny.setInputValue) { + Shiny.setInputValue(id, value, opts); + if (!instance.clearInputs[id]) instance.clearInputs[id] = function() { + Shiny.setInputValue(id, null); + } + } + + // HACK + if (event === "rows_selected" && !noCrosstalk) { + if (crosstalkOptions.group) { + var keys = crosstalkOptions.key; + var selectedKeys = null; + if (value) { + selectedKeys = []; + for (var i = 0; i < value.length; i++) { + // The value array's contents use 1-based row numbers, so we must + // convert to 0-based before indexing into the keys array. + selectedKeys.push(keys[value[i] - 1]); + } + } + instance.ctselectHandle.set(selectedKeys); + } + } + }; + + var addOne = function(x) { + return x.map(function(i) { return 1 + i; }); + }; + + var unique = function(x) { + var ux = []; + $.each(x, function(i, el){ + if ($.inArray(el, ux) === -1) ux.push(el); + }); + return ux; + } + + // change the row index of a cell + var tweakCellIndex = function(cell) { + var info = cell.index(); + // some cell may not be valid. e.g, #759 + // when using the RowGroup extension, datatables will + // generate the row label and the cells are not part of + // the data thus contain no row/col info + if (info === undefined) + return {row: null, col: null}; + if (server) { + info.row = DT_rows_current[info.row]; + } else { + info.row += 1; + } + return {row: info.row, col: info.column}; + } + + var cleanSelectedValues = function() { + changeInput('rows_selected', []); + changeInput('columns_selected', []); + changeInput('cells_selected', transposeArray2D([]), 'shiny.matrix'); + } + // #828 we should clean the selection on the server-side when the table reloads + cleanSelectedValues(); + + // a flag to indicates if select extension is initialized or not + var flagSelectExt = table.settings()[0]._select !== undefined; + // the Select extension should only be used in the client mode and + // when the selection.mode is set to none + if (data.selection.mode === 'none' && !server && flagSelectExt) { + var updateRowsSelected = function() { + var rows = table.rows({selected: true}); + var selected = []; + $.each(rows.indexes().toArray(), function(i, v) { + selected.push(v + 1); + }); + changeInput('rows_selected', selected); + } + var updateColsSelected = function() { + var columns = table.columns({selected: true}); + changeInput('columns_selected', columns.indexes().toArray()); + } + var updateCellsSelected = function() { + var cells = table.cells({selected: true}); + var selected = []; + cells.every(function() { + var row = this.index().row; + var col = this.index().column; + selected = selected.concat([[row + 1, col]]); + }); + changeInput('cells_selected', transposeArray2D(selected), 'shiny.matrix'); + } + table.on('select deselect', function(e, dt, type, indexes) { + updateRowsSelected(); + updateColsSelected(); + updateCellsSelected(); + }) + } + + var selMode = data.selection.mode, selTarget = data.selection.target; + var selDisable = data.selection.selectable === false; + if (inArray(selMode, ['single', 'multiple'])) { + var selClass = inArray(data.style, ['bootstrap', 'bootstrap4']) ? 'active' : 'selected'; + // selected1: row indices; selected2: column indices + var initSel = function(x) { + if (x === null || typeof x === 'boolean' || selTarget === 'cell') { + return {rows: [], cols: []}; + } else if (selTarget === 'row') { + return {rows: $.makeArray(x), cols: []}; + } else if (selTarget === 'column') { + return {rows: [], cols: $.makeArray(x)}; + } else if (selTarget === 'row+column') { + return {rows: $.makeArray(x.rows), cols: $.makeArray(x.cols)}; + } + } + var selected = data.selection.selected; + var selected1 = initSel(selected).rows, selected2 = initSel(selected).cols; + // selectable should contain either all positive or all non-positive values, not both + // positive values indicate "selectable" while non-positive values means "nonselectable" + // the assertion is performed on R side. (only column indicides could be zero which indicates + // the row name) + var selectable = data.selection.selectable; + var selectable1 = initSel(selectable).rows, selectable2 = initSel(selectable).cols; + + // After users reorder the rows or filter the table, we cannot use the table index + // directly. Instead, we need this function to find out the rows between the two clicks. + // If user filter the table again between the start click and the end click, the behavior + // would be undefined, but it should not be a problem. + var shiftSelRowsIndex = function(start, end) { + var indexes = server ? DT_rows_all : table.rows({ search: 'applied' }).indexes().toArray(); + start = indexes.indexOf(start); end = indexes.indexOf(end); + // if start is larger than end, we need to swap + if (start > end) { + var tmp = end; end = start; start = tmp; + } + return indexes.slice(start, end + 1); + } + + var serverRowIndex = function(clientRowIndex) { + return server ? DT_rows_current[clientRowIndex] : clientRowIndex + 1; + } + + // row, column, or cell selection + var lastClickedRow; + if (inArray(selTarget, ['row', 'row+column'])) { + // Get the current selected rows. It will also + // update the selected1's value based on the current row selection state + // Note we can't put this function inside selectRows() directly, + // the reason is method.selectRows() will override selected1's value but this + // function will add rows to selected1 (keep the existing selection), which is + // inconsistent with column and cell selection. + var selectedRows = function() { + var rows = table.rows('.' + selClass); + var idx = rows.indexes().toArray(); + if (!server) { + selected1 = addOne(idx); + return selected1; + } + idx = idx.map(function(i) { + return DT_rows_current[i]; + }); + selected1 = selMode === 'multiple' ? unique(selected1.concat(idx)) : idx; + return selected1; + } + // Change selected1's value based on selectable1, then refresh the row state + var onlyKeepSelectableRows = function() { + if (selDisable) { // users can't select; useful when only want backend select + selected1 = []; + return; + } + if (selectable1.length === 0) return; + var nonselectable = selectable1[0] <= 0; + if (nonselectable) { + // should make selectable1 positive + selected1 = $(selected1).not(selectable1.map(function(i) { return -i; })).get(); + } else { + selected1 = $(selected1).filter(selectable1).get(); + } + } + // Change selected1's value based on selectable1, then + // refresh the row selection state according to values in selected1 + var selectRows = function(ignoreSelectable) { + if (!ignoreSelectable) onlyKeepSelectableRows(); + table.$('tr.' + selClass).removeClass(selClass); + if (selected1.length === 0) return; + if (server) { + table.rows({page: 'current'}).every(function() { + if (inArray(DT_rows_current[this.index()], selected1)) { + $(this.node()).addClass(selClass); + } + }); + } else { + var selected0 = selected1.map(function(i) { return i - 1; }); + $(table.rows(selected0).nodes()).addClass(selClass); + } + } + table.on('mousedown.dt', 'tbody tr', function(e) { + var $this = $(this), thisRow = table.row(this); + if (selMode === 'multiple') { + if (e.shiftKey && lastClickedRow !== undefined) { + // select or de-select depends on the last clicked row's status + var flagSel = !$this.hasClass(selClass); + var crtClickedRow = serverRowIndex(thisRow.index()); + if (server) { + var rowsIndex = shiftSelRowsIndex(lastClickedRow, crtClickedRow); + // update current page's selClass + rowsIndex.map(function(i) { + var rowIndex = DT_rows_current.indexOf(i); + if (rowIndex >= 0) { + var row = table.row(rowIndex).nodes().to$(); + var flagRowSel = !row.hasClass(selClass); + if (flagSel === flagRowSel) row.toggleClass(selClass); + } + }); + // update selected1 + if (flagSel) { + selected1 = unique(selected1.concat(rowsIndex)); + } else { + selected1 = selected1.filter(function(index) { + return !inArray(index, rowsIndex); + }); + } + } else { + // js starts from 0 + shiftSelRowsIndex(lastClickedRow - 1, crtClickedRow - 1).map(function(value) { + var row = table.row(value).nodes().to$(); + var flagRowSel = !row.hasClass(selClass); + if (flagSel === flagRowSel) row.toggleClass(selClass); + }); + } + e.preventDefault(); + } else { + $this.toggleClass(selClass); + } + } else { + if ($this.hasClass(selClass)) { + $this.removeClass(selClass); + } else { + table.$('tr.' + selClass).removeClass(selClass); + $this.addClass(selClass); + } + } + if (server && !$this.hasClass(selClass)) { + var id = DT_rows_current[thisRow.index()]; + // remove id from selected1 since its class .selected has been removed + if (inArray(id, selected1)) selected1.splice($.inArray(id, selected1), 1); + } + selectedRows(); // update selected1's value based on selClass + selectRows(false); // only keep the selectable rows + changeInput('rows_selected', selected1); + changeInput('row_last_clicked', serverRowIndex(thisRow.index()), null, null, {priority: 'event'}); + lastClickedRow = serverRowIndex(thisRow.index()); + }); + selectRows(false); // in case users have specified pre-selected rows + // restore selected rows after the table is redrawn (e.g. sort/search/page); + // client-side tables will preserve the selections automatically; for + // server-side tables, we have to *real* row indices are in `selected1` + changeInput('rows_selected', selected1); + if (server) table.on('draw.dt', function(e) { selectRows(false); }); + methods.selectRows = function(selected, ignoreSelectable) { + selected1 = $.makeArray(selected); + selectRows(ignoreSelectable); + changeInput('rows_selected', selected1); + } + } + + if (inArray(selTarget, ['column', 'row+column'])) { + if (selTarget === 'row+column') { + $(table.columns().footer()).css('cursor', 'pointer'); + } + // update selected2's value based on selectable2 + var onlyKeepSelectableCols = function() { + if (selDisable) { // users can't select; useful when only want backend select + selected2 = []; + return; + } + if (selectable2.length === 0) return; + var nonselectable = selectable2[0] <= 0; + if (nonselectable) { + // need to make selectable2 positive + selected2 = $(selected2).not(selectable2.map(function(i) { return -i; })).get(); + } else { + selected2 = $(selected2).filter(selectable2).get(); + } + } + // update selected2 and then + // refresh the col selection state according to values in selected2 + var selectCols = function(ignoreSelectable) { + if (!ignoreSelectable) onlyKeepSelectableCols(); + // if selected2 is not a valide index (e.g., larger than the column number) + // table.columns(selected2) will fail and result in a blank table + // this is different from the table.rows(), where the out-of-range indexes + // doesn't affect at all + selected2 = $(selected2).filter(table.columns().indexes()).get(); + table.columns().nodes().flatten().to$().removeClass(selClass); + if (selected2.length > 0) + table.columns(selected2).nodes().flatten().to$().addClass(selClass); + } + var callback = function() { + var colIdx = selTarget === 'column' ? table.cell(this).index().column : + $.inArray(this, table.columns().footer()), + thisCol = $(table.column(colIdx).nodes()); + if (colIdx === -1) return; + if (thisCol.hasClass(selClass)) { + thisCol.removeClass(selClass); + selected2.splice($.inArray(colIdx, selected2), 1); + } else { + if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); + thisCol.addClass(selClass); + selected2 = selMode === 'single' ? [colIdx] : unique(selected2.concat([colIdx])); + } + selectCols(false); // update selected2 based on selectable + changeInput('columns_selected', selected2); + } + if (selTarget === 'column') { + $(table.table().body()).on('click.dt', 'td', callback); + } else { + $(table.table().footer()).on('click.dt', 'tr th', callback); + } + selectCols(false); // in case users have specified pre-selected columns + changeInput('columns_selected', selected2); + if (server) table.on('draw.dt', function(e) { selectCols(false); }); + methods.selectColumns = function(selected, ignoreSelectable) { + selected2 = $.makeArray(selected); + selectCols(ignoreSelectable); + changeInput('columns_selected', selected2); + } + } + + if (selTarget === 'cell') { + var selected3 = [], selectable3 = []; + if (selected !== null) selected3 = selected; + if (selectable !== null && typeof selectable !== 'boolean') selectable3 = selectable; + var findIndex = function(ij, sel) { + for (var i = 0; i < sel.length; i++) { + if (ij[0] === sel[i][0] && ij[1] === sel[i][1]) return i; + } + return -1; + } + // Change selected3's value based on selectable3, then refresh the cell state + var onlyKeepSelectableCells = function() { + if (selDisable) { // users can't select; useful when only want backend select + selected3 = []; + return; + } + if (selectable3.length === 0) return; + var nonselectable = selectable3[0][0] <= 0; + var out = []; + if (nonselectable) { + selected3.map(function(ij) { + // should make selectable3 positive + if (findIndex([-ij[0], -ij[1]], selectable3) === -1) { out.push(ij); } + }); + } else { + selected3.map(function(ij) { + if (findIndex(ij, selectable3) > -1) { out.push(ij); } + }); + } + selected3 = out; + } + // Change selected3's value based on selectable3, then + // refresh the cell selection state according to values in selected3 + var selectCells = function(ignoreSelectable) { + if (!ignoreSelectable) onlyKeepSelectableCells(); + table.$('td.' + selClass).removeClass(selClass); + if (selected3.length === 0) return; + if (server) { + table.cells({page: 'current'}).every(function() { + var info = tweakCellIndex(this); + if (findIndex([info.row, info.col], selected3) > -1) + $(this.node()).addClass(selClass); + }); + } else { + selected3.map(function(ij) { + $(table.cell(ij[0] - 1, ij[1]).node()).addClass(selClass); + }); + } + }; + table.on('click.dt', 'tbody td', function() { + var $this = $(this), info = tweakCellIndex(table.cell(this)); + if ($this.hasClass(selClass)) { + $this.removeClass(selClass); + selected3.splice(findIndex([info.row, info.col], selected3), 1); + } else { + if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); + $this.addClass(selClass); + selected3 = selMode === 'single' ? [[info.row, info.col]] : + unique(selected3.concat([[info.row, info.col]])); + } + selectCells(false); // must call this to update selected3 based on selectable3 + changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); + }); + selectCells(false); // in case users have specified pre-selected columns + changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); + + if (server) table.on('draw.dt', function(e) { selectCells(false); }); + methods.selectCells = function(selected, ignoreSelectable) { + selected3 = selected ? selected : []; + selectCells(ignoreSelectable); + changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); + } + } + } + + // expose some table info to Shiny + var updateTableInfo = function(e, settings) { + // TODO: is anyone interested in the page info? + // changeInput('page_info', table.page.info()); + var updateRowInfo = function(id, modifier) { + var idx; + if (server) { + idx = modifier.page === 'current' ? DT_rows_current : DT_rows_all; + } else { + var rows = table.rows($.extend({ + search: 'applied', + page: 'all' + }, modifier)); + idx = addOne(rows.indexes().toArray()); + } + changeInput('rows' + '_' + id, idx); + }; + updateRowInfo('current', {page: 'current'}); + updateRowInfo('all', {}); + } + table.on('draw.dt', updateTableInfo); + updateTableInfo(); + + // state info + table.on('draw.dt column-visibility.dt', function() { + changeInput('state', table.state()); + }); + changeInput('state', table.state()); + + // search info + var updateSearchInfo = function() { + changeInput('search', table.search()); + if (filterRow) changeInput('search_columns', filterRow.toArray().map(function(td) { + return $(td).find('input').first().val(); + })); + } + table.on('draw.dt', updateSearchInfo); + updateSearchInfo(); + + var cellInfo = function(thiz) { + var info = tweakCellIndex(table.cell(thiz)); + info.value = table.cell(thiz).data(); + return info; + } + // the current cell clicked on + table.on('click.dt', 'tbody td', function() { + changeInput('cell_clicked', cellInfo(this), null, null, {priority: 'event'}); + }) + changeInput('cell_clicked', {}); + + // do not trigger table selection when clicking on links unless they have classes + table.on('click.dt', 'tbody td a', function(e) { + if (this.className === '') e.stopPropagation(); + }); + + methods.addRow = function(data, rowname, resetPaging) { + var n = table.columns().indexes().length, d = n - data.length; + if (d === 1) { + data = rowname.concat(data) + } else if (d !== 0) { + console.log(data); + console.log(table.columns().indexes()); + throw 'New data must be of the same length as current data (' + n + ')'; + }; + table.row.add(data).draw(resetPaging); + } + + methods.updateSearch = function(keywords) { + if (keywords.global !== null) + $(table.table().container()).find('input[type=search]').first() + .val(keywords.global).trigger('input'); + var columns = keywords.columns; + if (!filterRow || columns === null) return; + filterRow.toArray().map(function(td, i) { + var v = typeof columns === 'string' ? columns : columns[i]; + if (typeof v === 'undefined') { + console.log('The search keyword for column ' + i + ' is undefined') + return; + } + $(td).find('input').first().val(v); + searchColumn(i, v); + }); + table.draw(); + } + + methods.hideCols = function(hide, reset) { + if (reset) table.columns().visible(true, false); + table.columns(hide).visible(false); + } + + methods.showCols = function(show, reset) { + if (reset) table.columns().visible(false, false); + table.columns(show).visible(true); + } + + methods.colReorder = function(order, origOrder) { + table.colReorder.order(order, origOrder); + } + + methods.selectPage = function(page) { + if (table.page.info().pages < page || page < 1) { + throw 'Selected page is out of range'; + }; + table.page(page - 1).draw(false); + } + + methods.reloadData = function(resetPaging, clearSelection) { + // empty selections first if necessary + if (methods.selectRows && inArray('row', clearSelection)) methods.selectRows([]); + if (methods.selectColumns && inArray('column', clearSelection)) methods.selectColumns([]); + if (methods.selectCells && inArray('cell', clearSelection)) methods.selectCells([]); + table.ajax.reload(null, resetPaging); + } + + // update table filters (set new limits of sliders) + methods.updateFilters = function(newProps) { + // loop through each filter in the filter row + filterRow.each(function(i, td) { + var k = i; + if (filterRow.length > newProps.length) { + if (i === 0) return; // first column is row names + k = i - 1; + } + // Update the filters to reflect the updated data. + // Allow "falsy" (e.g. NULL) to signify a no-op. + if (newProps[k]) { + setFilterProps(td, newProps[k]); + } + }); + }; + + table.shinyMethods = methods; + }, + resize: function(el, width, height, instance) { + if (instance.data) this.renderValue(el, instance.data, instance); + + // dynamically adjust height if fillContainer = TRUE + if (instance.fillContainer) + this.fillAvailableHeight(el, height); + + this.adjustWidth(el); + }, + + // dynamically set the scroll body to fill available height + // (used with fillContainer = TRUE) + fillAvailableHeight: function(el, availableHeight) { + + // see how much of the table is occupied by header/footer elements + // and use that to compute a target scroll body height + var dtWrapper = $(el).find('div.dataTables_wrapper'); + var dtScrollBody = $(el).find($('div.dataTables_scrollBody')); + var framingHeight = dtWrapper.innerHeight() - dtScrollBody.innerHeight(); + var scrollBodyHeight = availableHeight - framingHeight; + + // we need to set `max-height` to none as datatables library now sets this + // to a fixed height, disabling the ability to resize to fill the window, + // as it will be set to a fixed 100px under such circumstances, e.g., RStudio IDE, + // or FlexDashboard + // see https://github.com/rstudio/DT/issues/951#issuecomment-1026464509 + dtScrollBody.css('max-height', 'none'); + // set the height + dtScrollBody.height(scrollBodyHeight + 'px'); + }, + + // adjust the width of columns; remove the hard-coded widths on table and the + // scroll header when scrollX/Y are enabled + adjustWidth: function(el) { + var $el = $(el), table = $el.data('datatable'); + if (table) table.columns.adjust(); + $el.find('.dataTables_scrollHeadInner').css('width', '') + .children('table').css('margin-left', ''); + } +}); + + if (!HTMLWidgets.shinyMode) return; + + Shiny.addCustomMessageHandler('datatable-calls', function(data) { + var id = data.id; + var el = document.getElementById(id); + var table = el ? $(el).data('datatable') : null; + if (!table) { + console.log("Couldn't find table with id " + id); + return; + } + + var methods = table.shinyMethods, call = data.call; + if (methods[call.method]) { + methods[call.method].apply(table, call.args); + } else { + console.log("Unknown method " + call.method); + } + }); + +})(); diff --git a/docs/site_libs/datatables-css-0.0.0/datatables-crosstalk.css b/docs/site_libs/datatables-css-0.0.0/datatables-crosstalk.css new file mode 100644 index 00000000..bd1159c8 --- /dev/null +++ b/docs/site_libs/datatables-css-0.0.0/datatables-crosstalk.css @@ -0,0 +1,32 @@ +.dt-crosstalk-fade { + opacity: 0.2; +} + +html body div.DTS div.dataTables_scrollBody { + background: none; +} + + +/* +Fix https://github.com/rstudio/DT/issues/563 +If the `table.display` is set to "block" (e.g., pkgdown), the browser will display +datatable objects strangely. The search panel and the page buttons will still be +in full-width but the table body will be "compact" and shorter. +In therory, having this attributes will affect `dom="t"` +with `display: block` users. But in reality, there should be no one. +We may remove the below lines in the future if the upstream agree to have this there. +See https://github.com/DataTables/DataTablesSrc/issues/160 +*/ + +table.dataTable { + display: table; +} + + +/* +When DTOutput(fill = TRUE), it receives a .html-fill-item class (via htmltools::bindFillRole()), which effectively amounts to `flex: 1 1 auto`. That's mostly fine, but the case where `fillContainer=TRUE`+`height:auto`+`flex-basis:auto` and the container (e.g., a bslib::card()) doesn't have a defined height is a bit problematic since the table wants to fit the parent but the parent wants to fit the table, which results pretty small table height (maybe because there is a minimum height somewhere?). It seems better in this case to impose a 400px height default for the table, which we can do by setting `flex-basis` to 400px (the table is still allowed to grow/shrink when the container has an opinionated height). +*/ + +.html-fill-container > .html-fill-item.datatables { + flex-basis: 400px; +} diff --git a/content/find/parsnip/index_files/dt-core/css/jquery.dataTables.extra.css b/docs/site_libs/dt-core-1.12.1/css/jquery.dataTables.extra.css similarity index 100% rename from content/find/parsnip/index_files/dt-core/css/jquery.dataTables.extra.css rename to docs/site_libs/dt-core-1.12.1/css/jquery.dataTables.extra.css diff --git a/content/find/parsnip/index_files/dt-core/css/jquery.dataTables.min.css b/docs/site_libs/dt-core-1.12.1/css/jquery.dataTables.min.css similarity index 100% rename from content/find/parsnip/index_files/dt-core/css/jquery.dataTables.min.css rename to docs/site_libs/dt-core-1.12.1/css/jquery.dataTables.min.css diff --git a/content/find/parsnip/index_files/dt-core/js/jquery.dataTables.min.js b/docs/site_libs/dt-core-1.12.1/js/jquery.dataTables.min.js similarity index 100% rename from content/find/parsnip/index_files/dt-core/js/jquery.dataTables.min.js rename to docs/site_libs/dt-core-1.12.1/js/jquery.dataTables.min.js diff --git a/docs/site_libs/htmlwidgets-1.6.2/htmlwidgets.js b/docs/site_libs/htmlwidgets-1.6.2/htmlwidgets.js new file mode 100644 index 00000000..1067d029 --- /dev/null +++ b/docs/site_libs/htmlwidgets-1.6.2/htmlwidgets.js @@ -0,0 +1,901 @@ +(function() { + // If window.HTMLWidgets is already defined, then use it; otherwise create a + // new object. This allows preceding code to set options that affect the + // initialization process (though none currently exist). + window.HTMLWidgets = window.HTMLWidgets || {}; + + // See if we're running in a viewer pane. If not, we're in a web browser. + var viewerMode = window.HTMLWidgets.viewerMode = + /\bviewer_pane=1\b/.test(window.location); + + // See if we're running in Shiny mode. If not, it's a static document. + // Note that static widgets can appear in both Shiny and static modes, but + // obviously, Shiny widgets can only appear in Shiny apps/documents. + var shinyMode = window.HTMLWidgets.shinyMode = + typeof(window.Shiny) !== "undefined" && !!window.Shiny.outputBindings; + + // We can't count on jQuery being available, so we implement our own + // version if necessary. + function querySelectorAll(scope, selector) { + if (typeof(jQuery) !== "undefined" && scope instanceof jQuery) { + return scope.find(selector); + } + if (scope.querySelectorAll) { + return scope.querySelectorAll(selector); + } + } + + function asArray(value) { + if (value === null) + return []; + if ($.isArray(value)) + return value; + return [value]; + } + + // Implement jQuery's extend + function extend(target /*, ... */) { + if (arguments.length == 1) { + return target; + } + for (var i = 1; i < arguments.length; i++) { + var source = arguments[i]; + for (var prop in source) { + if (source.hasOwnProperty(prop)) { + target[prop] = source[prop]; + } + } + } + return target; + } + + // IE8 doesn't support Array.forEach. + function forEach(values, callback, thisArg) { + if (values.forEach) { + values.forEach(callback, thisArg); + } else { + for (var i = 0; i < values.length; i++) { + callback.call(thisArg, values[i], i, values); + } + } + } + + // Replaces the specified method with the return value of funcSource. + // + // Note that funcSource should not BE the new method, it should be a function + // that RETURNS the new method. funcSource receives a single argument that is + // the overridden method, it can be called from the new method. The overridden + // method can be called like a regular function, it has the target permanently + // bound to it so "this" will work correctly. + function overrideMethod(target, methodName, funcSource) { + var superFunc = target[methodName] || function() {}; + var superFuncBound = function() { + return superFunc.apply(target, arguments); + }; + target[methodName] = funcSource(superFuncBound); + } + + // Add a method to delegator that, when invoked, calls + // delegatee.methodName. If there is no such method on + // the delegatee, but there was one on delegator before + // delegateMethod was called, then the original version + // is invoked instead. + // For example: + // + // var a = { + // method1: function() { console.log('a1'); } + // method2: function() { console.log('a2'); } + // }; + // var b = { + // method1: function() { console.log('b1'); } + // }; + // delegateMethod(a, b, "method1"); + // delegateMethod(a, b, "method2"); + // a.method1(); + // a.method2(); + // + // The output would be "b1", "a2". + function delegateMethod(delegator, delegatee, methodName) { + var inherited = delegator[methodName]; + delegator[methodName] = function() { + var target = delegatee; + var method = delegatee[methodName]; + + // The method doesn't exist on the delegatee. Instead, + // call the method on the delegator, if it exists. + if (!method) { + target = delegator; + method = inherited; + } + + if (method) { + return method.apply(target, arguments); + } + }; + } + + // Implement a vague facsimilie of jQuery's data method + function elementData(el, name, value) { + if (arguments.length == 2) { + return el["htmlwidget_data_" + name]; + } else if (arguments.length == 3) { + el["htmlwidget_data_" + name] = value; + return el; + } else { + throw new Error("Wrong number of arguments for elementData: " + + arguments.length); + } + } + + // http://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex + function escapeRegExp(str) { + return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); + } + + function hasClass(el, className) { + var re = new RegExp("\\b" + escapeRegExp(className) + "\\b"); + return re.test(el.className); + } + + // elements - array (or array-like object) of HTML elements + // className - class name to test for + // include - if true, only return elements with given className; + // if false, only return elements *without* given className + function filterByClass(elements, className, include) { + var results = []; + for (var i = 0; i < elements.length; i++) { + if (hasClass(elements[i], className) == include) + results.push(elements[i]); + } + return results; + } + + function on(obj, eventName, func) { + if (obj.addEventListener) { + obj.addEventListener(eventName, func, false); + } else if (obj.attachEvent) { + obj.attachEvent(eventName, func); + } + } + + function off(obj, eventName, func) { + if (obj.removeEventListener) + obj.removeEventListener(eventName, func, false); + else if (obj.detachEvent) { + obj.detachEvent(eventName, func); + } + } + + // Translate array of values to top/right/bottom/left, as usual with + // the "padding" CSS property + // https://developer.mozilla.org/en-US/docs/Web/CSS/padding + function unpackPadding(value) { + if (typeof(value) === "number") + value = [value]; + if (value.length === 1) { + return {top: value[0], right: value[0], bottom: value[0], left: value[0]}; + } + if (value.length === 2) { + return {top: value[0], right: value[1], bottom: value[0], left: value[1]}; + } + if (value.length === 3) { + return {top: value[0], right: value[1], bottom: value[2], left: value[1]}; + } + if (value.length === 4) { + return {top: value[0], right: value[1], bottom: value[2], left: value[3]}; + } + } + + // Convert an unpacked padding object to a CSS value + function paddingToCss(paddingObj) { + return paddingObj.top + "px " + paddingObj.right + "px " + paddingObj.bottom + "px " + paddingObj.left + "px"; + } + + // Makes a number suitable for CSS + function px(x) { + if (typeof(x) === "number") + return x + "px"; + else + return x; + } + + // Retrieves runtime widget sizing information for an element. + // The return value is either null, or an object with fill, padding, + // defaultWidth, defaultHeight fields. + function sizingPolicy(el) { + var sizingEl = document.querySelector("script[data-for='" + el.id + "'][type='application/htmlwidget-sizing']"); + if (!sizingEl) + return null; + var sp = JSON.parse(sizingEl.textContent || sizingEl.text || "{}"); + if (viewerMode) { + return sp.viewer; + } else { + return sp.browser; + } + } + + // @param tasks Array of strings (or falsy value, in which case no-op). + // Each element must be a valid JavaScript expression that yields a + // function. Or, can be an array of objects with "code" and "data" + // properties; in this case, the "code" property should be a string + // of JS that's an expr that yields a function, and "data" should be + // an object that will be added as an additional argument when that + // function is called. + // @param target The object that will be "this" for each function + // execution. + // @param args Array of arguments to be passed to the functions. (The + // same arguments will be passed to all functions.) + function evalAndRun(tasks, target, args) { + if (tasks) { + forEach(tasks, function(task) { + var theseArgs = args; + if (typeof(task) === "object") { + theseArgs = theseArgs.concat([task.data]); + task = task.code; + } + var taskFunc = tryEval(task); + if (typeof(taskFunc) !== "function") { + throw new Error("Task must be a function! Source:\n" + task); + } + taskFunc.apply(target, theseArgs); + }); + } + } + + // Attempt eval() both with and without enclosing in parentheses. + // Note that enclosing coerces a function declaration into + // an expression that eval() can parse + // (otherwise, a SyntaxError is thrown) + function tryEval(code) { + var result = null; + try { + result = eval("(" + code + ")"); + } catch(error) { + if (!(error instanceof SyntaxError)) { + throw error; + } + try { + result = eval(code); + } catch(e) { + if (e instanceof SyntaxError) { + throw error; + } else { + throw e; + } + } + } + return result; + } + + function initSizing(el) { + var sizing = sizingPolicy(el); + if (!sizing) + return; + + var cel = document.getElementById("htmlwidget_container"); + if (!cel) + return; + + if (typeof(sizing.padding) !== "undefined") { + document.body.style.margin = "0"; + document.body.style.padding = paddingToCss(unpackPadding(sizing.padding)); + } + + if (sizing.fill) { + document.body.style.overflow = "hidden"; + document.body.style.width = "100%"; + document.body.style.height = "100%"; + document.documentElement.style.width = "100%"; + document.documentElement.style.height = "100%"; + cel.style.position = "absolute"; + var pad = unpackPadding(sizing.padding); + cel.style.top = pad.top + "px"; + cel.style.right = pad.right + "px"; + cel.style.bottom = pad.bottom + "px"; + cel.style.left = pad.left + "px"; + el.style.width = "100%"; + el.style.height = "100%"; + + return { + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } + }; + + } else { + el.style.width = px(sizing.width); + el.style.height = px(sizing.height); + + return { + getWidth: function() { return cel.getBoundingClientRect().width; }, + getHeight: function() { return cel.getBoundingClientRect().height; } + }; + } + } + + // Default implementations for methods + var defaults = { + find: function(scope) { + return querySelectorAll(scope, "." + this.name); + }, + renderError: function(el, err) { + var $el = $(el); + + this.clearError(el); + + // Add all these error classes, as Shiny does + var errClass = "shiny-output-error"; + if (err.type !== null) { + // use the classes of the error condition as CSS class names + errClass = errClass + " " + $.map(asArray(err.type), function(type) { + return errClass + "-" + type; + }).join(" "); + } + errClass = errClass + " htmlwidgets-error"; + + // Is el inline or block? If inline or inline-block, just display:none it + // and add an inline error. + var display = $el.css("display"); + $el.data("restore-display-mode", display); + + if (display === "inline" || display === "inline-block") { + $el.hide(); + if (err.message !== "") { + var errorSpan = $("").addClass(errClass); + errorSpan.text(err.message); + $el.after(errorSpan); + } + } else if (display === "block") { + // If block, add an error just after the el, set visibility:none on the + // el, and position the error to be on top of the el. + // Mark it with a unique ID and CSS class so we can remove it later. + $el.css("visibility", "hidden"); + if (err.message !== "") { + var errorDiv = $("
    ").addClass(errClass).css("position", "absolute") + .css("top", el.offsetTop) + .css("left", el.offsetLeft) + // setting width can push out the page size, forcing otherwise + // unnecessary scrollbars to appear and making it impossible for + // the element to shrink; so use max-width instead + .css("maxWidth", el.offsetWidth) + .css("height", el.offsetHeight); + errorDiv.text(err.message); + $el.after(errorDiv); + + // Really dumb way to keep the size/position of the error in sync with + // the parent element as the window is resized or whatever. + var intId = setInterval(function() { + if (!errorDiv[0].parentElement) { + clearInterval(intId); + return; + } + errorDiv + .css("top", el.offsetTop) + .css("left", el.offsetLeft) + .css("maxWidth", el.offsetWidth) + .css("height", el.offsetHeight); + }, 500); + } + } + }, + clearError: function(el) { + var $el = $(el); + var display = $el.data("restore-display-mode"); + $el.data("restore-display-mode", null); + + if (display === "inline" || display === "inline-block") { + if (display) + $el.css("display", display); + $(el.nextSibling).filter(".htmlwidgets-error").remove(); + } else if (display === "block"){ + $el.css("visibility", "inherit"); + $(el.nextSibling).filter(".htmlwidgets-error").remove(); + } + }, + sizing: {} + }; + + // Called by widget bindings to register a new type of widget. The definition + // object can contain the following properties: + // - name (required) - A string indicating the binding name, which will be + // used by default as the CSS classname to look for. + // - initialize (optional) - A function(el) that will be called once per + // widget element; if a value is returned, it will be passed as the third + // value to renderValue. + // - renderValue (required) - A function(el, data, initValue) that will be + // called with data. Static contexts will cause this to be called once per + // element; Shiny apps will cause this to be called multiple times per + // element, as the data changes. + window.HTMLWidgets.widget = function(definition) { + if (!definition.name) { + throw new Error("Widget must have a name"); + } + if (!definition.type) { + throw new Error("Widget must have a type"); + } + // Currently we only support output widgets + if (definition.type !== "output") { + throw new Error("Unrecognized widget type '" + definition.type + "'"); + } + // TODO: Verify that .name is a valid CSS classname + + // Support new-style instance-bound definitions. Old-style class-bound + // definitions have one widget "object" per widget per type/class of + // widget; the renderValue and resize methods on such widget objects + // take el and instance arguments, because the widget object can't + // store them. New-style instance-bound definitions have one widget + // object per widget instance; the definition that's passed in doesn't + // provide renderValue or resize methods at all, just the single method + // factory(el, width, height) + // which returns an object that has renderValue(x) and resize(w, h). + // This enables a far more natural programming style for the widget + // author, who can store per-instance state using either OO-style + // instance fields or functional-style closure variables (I guess this + // is in contrast to what can only be called C-style pseudo-OO which is + // what we required before). + if (definition.factory) { + definition = createLegacyDefinitionAdapter(definition); + } + + if (!definition.renderValue) { + throw new Error("Widget must have a renderValue function"); + } + + // For static rendering (non-Shiny), use a simple widget registration + // scheme. We also use this scheme for Shiny apps/documents that also + // contain static widgets. + window.HTMLWidgets.widgets = window.HTMLWidgets.widgets || []; + // Merge defaults into the definition; don't mutate the original definition. + var staticBinding = extend({}, defaults, definition); + overrideMethod(staticBinding, "find", function(superfunc) { + return function(scope) { + var results = superfunc(scope); + // Filter out Shiny outputs, we only want the static kind + return filterByClass(results, "html-widget-output", false); + }; + }); + window.HTMLWidgets.widgets.push(staticBinding); + + if (shinyMode) { + // Shiny is running. Register the definition with an output binding. + // The definition itself will not be the output binding, instead + // we will make an output binding object that delegates to the + // definition. This is because we foolishly used the same method + // name (renderValue) for htmlwidgets definition and Shiny bindings + // but they actually have quite different semantics (the Shiny + // bindings receive data that includes lots of metadata that it + // strips off before calling htmlwidgets renderValue). We can't + // just ignore the difference because in some widgets it's helpful + // to call this.renderValue() from inside of resize(), and if + // we're not delegating, then that call will go to the Shiny + // version instead of the htmlwidgets version. + + // Merge defaults with definition, without mutating either. + var bindingDef = extend({}, defaults, definition); + + // This object will be our actual Shiny binding. + var shinyBinding = new Shiny.OutputBinding(); + + // With a few exceptions, we'll want to simply use the bindingDef's + // version of methods if they are available, otherwise fall back to + // Shiny's defaults. NOTE: If Shiny's output bindings gain additional + // methods in the future, and we want them to be overrideable by + // HTMLWidget binding definitions, then we'll need to add them to this + // list. + delegateMethod(shinyBinding, bindingDef, "getId"); + delegateMethod(shinyBinding, bindingDef, "onValueChange"); + delegateMethod(shinyBinding, bindingDef, "onValueError"); + delegateMethod(shinyBinding, bindingDef, "renderError"); + delegateMethod(shinyBinding, bindingDef, "clearError"); + delegateMethod(shinyBinding, bindingDef, "showProgress"); + + // The find, renderValue, and resize are handled differently, because we + // want to actually decorate the behavior of the bindingDef methods. + + shinyBinding.find = function(scope) { + var results = bindingDef.find(scope); + + // Only return elements that are Shiny outputs, not static ones + var dynamicResults = results.filter(".html-widget-output"); + + // It's possible that whatever caused Shiny to think there might be + // new dynamic outputs, also caused there to be new static outputs. + // Since there might be lots of different htmlwidgets bindings, we + // schedule execution for later--no need to staticRender multiple + // times. + if (results.length !== dynamicResults.length) + scheduleStaticRender(); + + return dynamicResults; + }; + + // Wrap renderValue to handle initialization, which unfortunately isn't + // supported natively by Shiny at the time of this writing. + + shinyBinding.renderValue = function(el, data) { + Shiny.renderDependencies(data.deps); + // Resolve strings marked as javascript literals to objects + if (!(data.evals instanceof Array)) data.evals = [data.evals]; + for (var i = 0; data.evals && i < data.evals.length; i++) { + window.HTMLWidgets.evaluateStringMember(data.x, data.evals[i]); + } + if (!bindingDef.renderOnNullValue) { + if (data.x === null) { + el.style.visibility = "hidden"; + return; + } else { + el.style.visibility = "inherit"; + } + } + if (!elementData(el, "initialized")) { + initSizing(el); + + elementData(el, "initialized", true); + if (bindingDef.initialize) { + var rect = el.getBoundingClientRect(); + var result = bindingDef.initialize(el, rect.width, rect.height); + elementData(el, "init_result", result); + } + } + bindingDef.renderValue(el, data.x, elementData(el, "init_result")); + evalAndRun(data.jsHooks.render, elementData(el, "init_result"), [el, data.x]); + }; + + // Only override resize if bindingDef implements it + if (bindingDef.resize) { + shinyBinding.resize = function(el, width, height) { + // Shiny can call resize before initialize/renderValue have been + // called, which doesn't make sense for widgets. + if (elementData(el, "initialized")) { + bindingDef.resize(el, width, height, elementData(el, "init_result")); + } + }; + } + + Shiny.outputBindings.register(shinyBinding, bindingDef.name); + } + }; + + var scheduleStaticRenderTimerId = null; + function scheduleStaticRender() { + if (!scheduleStaticRenderTimerId) { + scheduleStaticRenderTimerId = setTimeout(function() { + scheduleStaticRenderTimerId = null; + window.HTMLWidgets.staticRender(); + }, 1); + } + } + + // Render static widgets after the document finishes loading + // Statically render all elements that are of this widget's class + window.HTMLWidgets.staticRender = function() { + var bindings = window.HTMLWidgets.widgets || []; + forEach(bindings, function(binding) { + var matches = binding.find(document.documentElement); + forEach(matches, function(el) { + var sizeObj = initSizing(el, binding); + + var getSize = function(el) { + if (sizeObj) { + return {w: sizeObj.getWidth(), h: sizeObj.getHeight()} + } else { + var rect = el.getBoundingClientRect(); + return {w: rect.width, h: rect.height} + } + }; + + if (hasClass(el, "html-widget-static-bound")) + return; + el.className = el.className + " html-widget-static-bound"; + + var initResult; + if (binding.initialize) { + var size = getSize(el); + initResult = binding.initialize(el, size.w, size.h); + elementData(el, "init_result", initResult); + } + + if (binding.resize) { + var lastSize = getSize(el); + var resizeHandler = function(e) { + var size = getSize(el); + if (size.w === 0 && size.h === 0) + return; + if (size.w === lastSize.w && size.h === lastSize.h) + return; + lastSize = size; + binding.resize(el, size.w, size.h, initResult); + }; + + on(window, "resize", resizeHandler); + + // This is needed for cases where we're running in a Shiny + // app, but the widget itself is not a Shiny output, but + // rather a simple static widget. One example of this is + // an rmarkdown document that has runtime:shiny and widget + // that isn't in a render function. Shiny only knows to + // call resize handlers for Shiny outputs, not for static + // widgets, so we do it ourselves. + if (window.jQuery) { + window.jQuery(document).on( + "shown.htmlwidgets shown.bs.tab.htmlwidgets shown.bs.collapse.htmlwidgets", + resizeHandler + ); + window.jQuery(document).on( + "hidden.htmlwidgets hidden.bs.tab.htmlwidgets hidden.bs.collapse.htmlwidgets", + resizeHandler + ); + } + + // This is needed for the specific case of ioslides, which + // flips slides between display:none and display:block. + // Ideally we would not have to have ioslide-specific code + // here, but rather have ioslides raise a generic event, + // but the rmarkdown package just went to CRAN so the + // window to getting that fixed may be long. + if (window.addEventListener) { + // It's OK to limit this to window.addEventListener + // browsers because ioslides itself only supports + // such browsers. + on(document, "slideenter", resizeHandler); + on(document, "slideleave", resizeHandler); + } + } + + var scriptData = document.querySelector("script[data-for='" + el.id + "'][type='application/json']"); + if (scriptData) { + var data = JSON.parse(scriptData.textContent || scriptData.text); + // Resolve strings marked as javascript literals to objects + if (!(data.evals instanceof Array)) data.evals = [data.evals]; + for (var k = 0; data.evals && k < data.evals.length; k++) { + window.HTMLWidgets.evaluateStringMember(data.x, data.evals[k]); + } + binding.renderValue(el, data.x, initResult); + evalAndRun(data.jsHooks.render, initResult, [el, data.x]); + } + }); + }); + + invokePostRenderHandlers(); + } + + + function has_jQuery3() { + if (!window.jQuery) { + return false; + } + var $version = window.jQuery.fn.jquery; + var $major_version = parseInt($version.split(".")[0]); + return $major_version >= 3; + } + + /* + / Shiny 1.4 bumped jQuery from 1.x to 3.x which means jQuery's + / on-ready handler (i.e., $(fn)) is now asyncronous (i.e., it now + / really means $(setTimeout(fn)). + / https://jquery.com/upgrade-guide/3.0/#breaking-change-document-ready-handlers-are-now-asynchronous + / + / Since Shiny uses $() to schedule initShiny, shiny>=1.4 calls initShiny + / one tick later than it did before, which means staticRender() is + / called renderValue() earlier than (advanced) widget authors might be expecting. + / https://github.com/rstudio/shiny/issues/2630 + / + / For a concrete example, leaflet has some methods (e.g., updateBounds) + / which reference Shiny methods registered in initShiny (e.g., setInputValue). + / Since leaflet is privy to this life-cycle, it knows to use setTimeout() to + / delay execution of those methods (until Shiny methods are ready) + / https://github.com/rstudio/leaflet/blob/18ec981/javascript/src/index.js#L266-L268 + / + / Ideally widget authors wouldn't need to use this setTimeout() hack that + / leaflet uses to call Shiny methods on a staticRender(). In the long run, + / the logic initShiny should be broken up so that method registration happens + / right away, but binding happens later. + */ + function maybeStaticRenderLater() { + if (shinyMode && has_jQuery3()) { + window.jQuery(window.HTMLWidgets.staticRender); + } else { + window.HTMLWidgets.staticRender(); + } + } + + if (document.addEventListener) { + document.addEventListener("DOMContentLoaded", function() { + document.removeEventListener("DOMContentLoaded", arguments.callee, false); + maybeStaticRenderLater(); + }, false); + } else if (document.attachEvent) { + document.attachEvent("onreadystatechange", function() { + if (document.readyState === "complete") { + document.detachEvent("onreadystatechange", arguments.callee); + maybeStaticRenderLater(); + } + }); + } + + + window.HTMLWidgets.getAttachmentUrl = function(depname, key) { + // If no key, default to the first item + if (typeof(key) === "undefined") + key = 1; + + var link = document.getElementById(depname + "-" + key + "-attachment"); + if (!link) { + throw new Error("Attachment " + depname + "/" + key + " not found in document"); + } + return link.getAttribute("href"); + }; + + window.HTMLWidgets.dataframeToD3 = function(df) { + var names = []; + var length; + for (var name in df) { + if (df.hasOwnProperty(name)) + names.push(name); + if (typeof(df[name]) !== "object" || typeof(df[name].length) === "undefined") { + throw new Error("All fields must be arrays"); + } else if (typeof(length) !== "undefined" && length !== df[name].length) { + throw new Error("All fields must be arrays of the same length"); + } + length = df[name].length; + } + var results = []; + var item; + for (var row = 0; row < length; row++) { + item = {}; + for (var col = 0; col < names.length; col++) { + item[names[col]] = df[names[col]][row]; + } + results.push(item); + } + return results; + }; + + window.HTMLWidgets.transposeArray2D = function(array) { + if (array.length === 0) return array; + var newArray = array[0].map(function(col, i) { + return array.map(function(row) { + return row[i] + }) + }); + return newArray; + }; + // Split value at splitChar, but allow splitChar to be escaped + // using escapeChar. Any other characters escaped by escapeChar + // will be included as usual (including escapeChar itself). + function splitWithEscape(value, splitChar, escapeChar) { + var results = []; + var escapeMode = false; + var currentResult = ""; + for (var pos = 0; pos < value.length; pos++) { + if (!escapeMode) { + if (value[pos] === splitChar) { + results.push(currentResult); + currentResult = ""; + } else if (value[pos] === escapeChar) { + escapeMode = true; + } else { + currentResult += value[pos]; + } + } else { + currentResult += value[pos]; + escapeMode = false; + } + } + if (currentResult !== "") { + results.push(currentResult); + } + return results; + } + // Function authored by Yihui/JJ Allaire + window.HTMLWidgets.evaluateStringMember = function(o, member) { + var parts = splitWithEscape(member, '.', '\\'); + for (var i = 0, l = parts.length; i < l; i++) { + var part = parts[i]; + // part may be a character or 'numeric' member name + if (o !== null && typeof o === "object" && part in o) { + if (i == (l - 1)) { // if we are at the end of the line then evalulate + if (typeof o[part] === "string") + o[part] = tryEval(o[part]); + } else { // otherwise continue to next embedded object + o = o[part]; + } + } + } + }; + + // Retrieve the HTMLWidget instance (i.e. the return value of an + // HTMLWidget binding's initialize() or factory() function) + // associated with an element, or null if none. + window.HTMLWidgets.getInstance = function(el) { + return elementData(el, "init_result"); + }; + + // Finds the first element in the scope that matches the selector, + // and returns the HTMLWidget instance (i.e. the return value of + // an HTMLWidget binding's initialize() or factory() function) + // associated with that element, if any. If no element matches the + // selector, or the first matching element has no HTMLWidget + // instance associated with it, then null is returned. + // + // The scope argument is optional, and defaults to window.document. + window.HTMLWidgets.find = function(scope, selector) { + if (arguments.length == 1) { + selector = scope; + scope = document; + } + + var el = scope.querySelector(selector); + if (el === null) { + return null; + } else { + return window.HTMLWidgets.getInstance(el); + } + }; + + // Finds all elements in the scope that match the selector, and + // returns the HTMLWidget instances (i.e. the return values of + // an HTMLWidget binding's initialize() or factory() function) + // associated with the elements, in an array. If elements that + // match the selector don't have an associated HTMLWidget + // instance, the returned array will contain nulls. + // + // The scope argument is optional, and defaults to window.document. + window.HTMLWidgets.findAll = function(scope, selector) { + if (arguments.length == 1) { + selector = scope; + scope = document; + } + + var nodes = scope.querySelectorAll(selector); + var results = []; + for (var i = 0; i < nodes.length; i++) { + results.push(window.HTMLWidgets.getInstance(nodes[i])); + } + return results; + }; + + var postRenderHandlers = []; + function invokePostRenderHandlers() { + while (postRenderHandlers.length) { + var handler = postRenderHandlers.shift(); + if (handler) { + handler(); + } + } + } + + // Register the given callback function to be invoked after the + // next time static widgets are rendered. + window.HTMLWidgets.addPostRenderHandler = function(callback) { + postRenderHandlers.push(callback); + }; + + // Takes a new-style instance-bound definition, and returns an + // old-style class-bound definition. This saves us from having + // to rewrite all the logic in this file to accomodate both + // types of definitions. + function createLegacyDefinitionAdapter(defn) { + var result = { + name: defn.name, + type: defn.type, + initialize: function(el, width, height) { + return defn.factory(el, width, height); + }, + renderValue: function(el, x, instance) { + return instance.renderValue(x); + }, + resize: function(el, width, height, instance) { + return instance.resize(width, height); + } + }; + + if (defn.find) + result.find = defn.find; + if (defn.renderError) + result.renderError = defn.renderError; + if (defn.clearError) + result.clearError = defn.clearError; + + return result; + } +})(); diff --git a/content/find/parsnip/index_files/jquery/jquery-3.6.0.js b/docs/site_libs/jquery-3.6.0/jquery-3.6.0.js similarity index 100% rename from content/find/parsnip/index_files/jquery/jquery-3.6.0.js rename to docs/site_libs/jquery-3.6.0/jquery-3.6.0.js diff --git a/content/find/parsnip/index_files/jquery/jquery-3.6.0.min.js b/docs/site_libs/jquery-3.6.0/jquery-3.6.0.min.js similarity index 100% rename from content/find/parsnip/index_files/jquery/jquery-3.6.0.min.js rename to docs/site_libs/jquery-3.6.0/jquery-3.6.0.min.js diff --git a/content/find/parsnip/index_files/jquery/jquery-3.6.0.min.map b/docs/site_libs/jquery-3.6.0/jquery-3.6.0.min.map similarity index 100% rename from content/find/parsnip/index_files/jquery/jquery-3.6.0.min.map rename to docs/site_libs/jquery-3.6.0/jquery-3.6.0.min.map diff --git a/content/start/recipes/index_files/kePrint/kePrint.js b/docs/site_libs/kePrint-0.0.1/kePrint.js similarity index 100% rename from content/start/recipes/index_files/kePrint/kePrint.js rename to docs/site_libs/kePrint-0.0.1/kePrint.js diff --git a/content/start/recipes/index_files/lightable/lightable.css b/docs/site_libs/lightable-0.0.1/lightable.css similarity index 100% rename from content/start/recipes/index_files/lightable/lightable.css rename to docs/site_libs/lightable-0.0.1/lightable.css diff --git a/content/find/parsnip/index_files/nouislider/jquery.nouislider.min.css b/docs/site_libs/nouislider-7.0.10/jquery.nouislider.min.css similarity index 100% rename from content/find/parsnip/index_files/nouislider/jquery.nouislider.min.css rename to docs/site_libs/nouislider-7.0.10/jquery.nouislider.min.css diff --git a/content/find/parsnip/index_files/nouislider/jquery.nouislider.min.js b/docs/site_libs/nouislider-7.0.10/jquery.nouislider.min.js similarity index 100% rename from content/find/parsnip/index_files/nouislider/jquery.nouislider.min.js rename to docs/site_libs/nouislider-7.0.10/jquery.nouislider.min.js diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-brands-400.ttf b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-brands-400.ttf new file mode 100644 index 00000000..430a02ed Binary files /dev/null and b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-brands-400.ttf differ diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-brands-400.woff2 b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-brands-400.woff2 new file mode 100644 index 00000000..4d904aab Binary files /dev/null and b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-brands-400.woff2 differ diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-regular-400.ttf b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-regular-400.ttf new file mode 100644 index 00000000..23e3febe Binary files /dev/null and b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-regular-400.ttf differ diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-regular-400.woff2 b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-regular-400.woff2 new file mode 100644 index 00000000..80e3b124 Binary files /dev/null and b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-regular-400.woff2 differ diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-solid-900.ttf b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-solid-900.ttf new file mode 100644 index 00000000..da908242 Binary files /dev/null and b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-solid-900.ttf differ diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-solid-900.woff2 b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-solid-900.woff2 new file mode 100644 index 00000000..360ba115 Binary files /dev/null and b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-solid-900.woff2 differ diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-v4compatibility.ttf b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-v4compatibility.ttf new file mode 100644 index 00000000..e9545ed5 Binary files /dev/null and b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-v4compatibility.ttf differ diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-v4compatibility.woff2 b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-v4compatibility.woff2 new file mode 100644 index 00000000..db5b0b99 Binary files /dev/null and b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/1e21o67/fa-v4compatibility.woff2 differ diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/all.css b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/all.css new file mode 100644 index 00000000..ab3fcbd3 --- /dev/null +++ b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/all.css @@ -0,0 +1,7831 @@ +/*! + * Font Awesome Free 6.1.1 by @fontawesome - https://fontawesome.com + * License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) + * Copyright 2022 Fonticons, Inc. + */ +.fa { + font-family: var(--fa-style-family, "Font Awesome 6 Free"); + font-weight: var(--fa-style, 900); } + +.fa, +.fas, +.fa-solid, +.far, +.fa-regular, +.fal, +.fa-light, +.fat, +.fa-thin, +.fad, +.fa-duotone, +.fab, +.fa-brands { + -moz-osx-font-smoothing: grayscale; + -webkit-font-smoothing: antialiased; + display: var(--fa-display, inline-block); + font-style: normal; + font-variant: normal; + line-height: 1; + text-rendering: auto; } + +.fa-1x { + font-size: 1em; } + +.fa-2x { + font-size: 2em; } + +.fa-3x { + font-size: 3em; } + +.fa-4x { + font-size: 4em; } + +.fa-5x { + font-size: 5em; } + +.fa-6x { + font-size: 6em; } + +.fa-7x { + font-size: 7em; } + +.fa-8x { + font-size: 8em; } + +.fa-9x { + font-size: 9em; } + +.fa-10x { + font-size: 10em; } + +.fa-2xs { + font-size: 0.625em; + line-height: 0.1em; + vertical-align: 0.225em; } + +.fa-xs { + font-size: 0.75em; + line-height: 0.08333em; + vertical-align: 0.125em; } + +.fa-sm { + font-size: 0.875em; + line-height: 0.07143em; + vertical-align: 0.05357em; } + +.fa-lg { + font-size: 1.25em; + line-height: 0.05em; + vertical-align: -0.075em; } + +.fa-xl { + font-size: 1.5em; + line-height: 0.04167em; + vertical-align: -0.125em; } + +.fa-2xl { + font-size: 2em; + line-height: 0.03125em; + vertical-align: -0.1875em; } + +.fa-fw { + text-align: center; + width: 1.25em; } + +.fa-ul { + list-style-type: none; + margin-left: var(--fa-li-margin, 2.5em); + padding-left: 0; } + .fa-ul > li { + position: relative; } + +.fa-li { + left: calc(var(--fa-li-width, 2em) * -1); + position: absolute; + text-align: center; + width: var(--fa-li-width, 2em); + line-height: inherit; } + +.fa-border { + border-color: var(--fa-border-color, #eee); + border-radius: var(--fa-border-radius, 0.1em); + border-style: var(--fa-border-style, solid); + border-width: var(--fa-border-width, 0.08em); + padding: var(--fa-border-padding, 0.2em 0.25em 0.15em); } + +.fa-pull-left { + float: left; + margin-right: var(--fa-pull-margin, 0.3em); } + +.fa-pull-right { + float: right; + margin-left: var(--fa-pull-margin, 0.3em); } + +.fa-beat { + -webkit-animation-name: fa-beat; + animation-name: fa-beat; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, ease-in-out); + animation-timing-function: var(--fa-animation-timing, ease-in-out); } + +.fa-bounce { + -webkit-animation-name: fa-bounce; + animation-name: fa-bounce; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.28, 0.84, 0.42, 1)); + animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.28, 0.84, 0.42, 1)); } + +.fa-fade { + -webkit-animation-name: fa-fade; + animation-name: fa-fade; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.4, 0, 0.6, 1)); + animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.4, 0, 0.6, 1)); } + +.fa-beat-fade { + -webkit-animation-name: fa-beat-fade; + animation-name: fa-beat-fade; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.4, 0, 0.6, 1)); + animation-timing-function: var(--fa-animation-timing, cubic-bezier(0.4, 0, 0.6, 1)); } + +.fa-flip { + -webkit-animation-name: fa-flip; + animation-name: fa-flip; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, ease-in-out); + animation-timing-function: var(--fa-animation-timing, ease-in-out); } + +.fa-shake { + -webkit-animation-name: fa-shake; + animation-name: fa-shake; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, linear); + animation-timing-function: var(--fa-animation-timing, linear); } + +.fa-spin { + -webkit-animation-name: fa-spin; + animation-name: fa-spin; + -webkit-animation-delay: var(--fa-animation-delay, 0); + animation-delay: var(--fa-animation-delay, 0); + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 2s); + animation-duration: var(--fa-animation-duration, 2s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, linear); + animation-timing-function: var(--fa-animation-timing, linear); } + +.fa-spin-reverse { + --fa-animation-direction: reverse; } + +.fa-pulse, +.fa-spin-pulse { + -webkit-animation-name: fa-spin; + animation-name: fa-spin; + -webkit-animation-direction: var(--fa-animation-direction, normal); + animation-direction: var(--fa-animation-direction, normal); + -webkit-animation-duration: var(--fa-animation-duration, 1s); + animation-duration: var(--fa-animation-duration, 1s); + -webkit-animation-iteration-count: var(--fa-animation-iteration-count, infinite); + animation-iteration-count: var(--fa-animation-iteration-count, infinite); + -webkit-animation-timing-function: var(--fa-animation-timing, steps(8)); + animation-timing-function: var(--fa-animation-timing, steps(8)); } + +@media (prefers-reduced-motion: reduce) { + .fa-beat, + .fa-bounce, + .fa-fade, + .fa-beat-fade, + .fa-flip, + .fa-pulse, + .fa-shake, + .fa-spin, + .fa-spin-pulse { + -webkit-animation-delay: -1ms; + animation-delay: -1ms; + -webkit-animation-duration: 1ms; + animation-duration: 1ms; + -webkit-animation-iteration-count: 1; + animation-iteration-count: 1; + transition-delay: 0s; + transition-duration: 0s; } } + +@-webkit-keyframes fa-beat { + 0%, 90% { + -webkit-transform: scale(1); + transform: scale(1); } + 45% { + -webkit-transform: scale(var(--fa-beat-scale, 1.25)); + transform: scale(var(--fa-beat-scale, 1.25)); } } + +@keyframes fa-beat { + 0%, 90% { + -webkit-transform: scale(1); + transform: scale(1); } + 45% { + -webkit-transform: scale(var(--fa-beat-scale, 1.25)); + transform: scale(var(--fa-beat-scale, 1.25)); } } + +@-webkit-keyframes fa-bounce { + 0% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } + 10% { + -webkit-transform: scale(var(--fa-bounce-start-scale-x, 1.1), var(--fa-bounce-start-scale-y, 0.9)) translateY(0); + transform: scale(var(--fa-bounce-start-scale-x, 1.1), var(--fa-bounce-start-scale-y, 0.9)) translateY(0); } + 30% { + -webkit-transform: scale(var(--fa-bounce-jump-scale-x, 0.9), var(--fa-bounce-jump-scale-y, 1.1)) translateY(var(--fa-bounce-height, -0.5em)); + transform: scale(var(--fa-bounce-jump-scale-x, 0.9), var(--fa-bounce-jump-scale-y, 1.1)) translateY(var(--fa-bounce-height, -0.5em)); } + 50% { + -webkit-transform: scale(var(--fa-bounce-land-scale-x, 1.05), var(--fa-bounce-land-scale-y, 0.95)) translateY(0); + transform: scale(var(--fa-bounce-land-scale-x, 1.05), var(--fa-bounce-land-scale-y, 0.95)) translateY(0); } + 57% { + -webkit-transform: scale(1, 1) translateY(var(--fa-bounce-rebound, -0.125em)); + transform: scale(1, 1) translateY(var(--fa-bounce-rebound, -0.125em)); } + 64% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } + 100% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } } + +@keyframes fa-bounce { + 0% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } + 10% { + -webkit-transform: scale(var(--fa-bounce-start-scale-x, 1.1), var(--fa-bounce-start-scale-y, 0.9)) translateY(0); + transform: scale(var(--fa-bounce-start-scale-x, 1.1), var(--fa-bounce-start-scale-y, 0.9)) translateY(0); } + 30% { + -webkit-transform: scale(var(--fa-bounce-jump-scale-x, 0.9), var(--fa-bounce-jump-scale-y, 1.1)) translateY(var(--fa-bounce-height, -0.5em)); + transform: scale(var(--fa-bounce-jump-scale-x, 0.9), var(--fa-bounce-jump-scale-y, 1.1)) translateY(var(--fa-bounce-height, -0.5em)); } + 50% { + -webkit-transform: scale(var(--fa-bounce-land-scale-x, 1.05), var(--fa-bounce-land-scale-y, 0.95)) translateY(0); + transform: scale(var(--fa-bounce-land-scale-x, 1.05), var(--fa-bounce-land-scale-y, 0.95)) translateY(0); } + 57% { + -webkit-transform: scale(1, 1) translateY(var(--fa-bounce-rebound, -0.125em)); + transform: scale(1, 1) translateY(var(--fa-bounce-rebound, -0.125em)); } + 64% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } + 100% { + -webkit-transform: scale(1, 1) translateY(0); + transform: scale(1, 1) translateY(0); } } + +@-webkit-keyframes fa-fade { + 50% { + opacity: var(--fa-fade-opacity, 0.4); } } + +@keyframes fa-fade { + 50% { + opacity: var(--fa-fade-opacity, 0.4); } } + +@-webkit-keyframes fa-beat-fade { + 0%, 100% { + opacity: var(--fa-beat-fade-opacity, 0.4); + -webkit-transform: scale(1); + transform: scale(1); } + 50% { + opacity: 1; + -webkit-transform: scale(var(--fa-beat-fade-scale, 1.125)); + transform: scale(var(--fa-beat-fade-scale, 1.125)); } } + +@keyframes fa-beat-fade { + 0%, 100% { + opacity: var(--fa-beat-fade-opacity, 0.4); + -webkit-transform: scale(1); + transform: scale(1); } + 50% { + opacity: 1; + -webkit-transform: scale(var(--fa-beat-fade-scale, 1.125)); + transform: scale(var(--fa-beat-fade-scale, 1.125)); } } + +@-webkit-keyframes fa-flip { + 50% { + -webkit-transform: rotate3d(var(--fa-flip-x, 0), var(--fa-flip-y, 1), var(--fa-flip-z, 0), var(--fa-flip-angle, -180deg)); + transform: rotate3d(var(--fa-flip-x, 0), var(--fa-flip-y, 1), var(--fa-flip-z, 0), var(--fa-flip-angle, -180deg)); } } + +@keyframes fa-flip { + 50% { + -webkit-transform: rotate3d(var(--fa-flip-x, 0), var(--fa-flip-y, 1), var(--fa-flip-z, 0), var(--fa-flip-angle, -180deg)); + transform: rotate3d(var(--fa-flip-x, 0), var(--fa-flip-y, 1), var(--fa-flip-z, 0), var(--fa-flip-angle, -180deg)); } } + +@-webkit-keyframes fa-shake { + 0% { + -webkit-transform: rotate(-15deg); + transform: rotate(-15deg); } + 4% { + -webkit-transform: rotate(15deg); + transform: rotate(15deg); } + 8%, 24% { + -webkit-transform: rotate(-18deg); + transform: rotate(-18deg); } + 12%, 28% { + -webkit-transform: rotate(18deg); + transform: rotate(18deg); } + 16% { + -webkit-transform: rotate(-22deg); + transform: rotate(-22deg); } + 20% { + -webkit-transform: rotate(22deg); + transform: rotate(22deg); } + 32% { + -webkit-transform: rotate(-12deg); + transform: rotate(-12deg); } + 36% { + -webkit-transform: rotate(12deg); + transform: rotate(12deg); } + 40%, 100% { + -webkit-transform: rotate(0deg); + transform: rotate(0deg); } } + +@keyframes fa-shake { + 0% { + -webkit-transform: rotate(-15deg); + transform: rotate(-15deg); } + 4% { + -webkit-transform: rotate(15deg); + transform: rotate(15deg); } + 8%, 24% { + -webkit-transform: rotate(-18deg); + transform: rotate(-18deg); } + 12%, 28% { + -webkit-transform: rotate(18deg); + transform: rotate(18deg); } + 16% { + -webkit-transform: rotate(-22deg); + transform: rotate(-22deg); } + 20% { + -webkit-transform: rotate(22deg); + transform: rotate(22deg); } + 32% { + -webkit-transform: rotate(-12deg); + transform: rotate(-12deg); } + 36% { + -webkit-transform: rotate(12deg); + transform: rotate(12deg); } + 40%, 100% { + -webkit-transform: rotate(0deg); + transform: rotate(0deg); } } + +@-webkit-keyframes fa-spin { + 0% { + -webkit-transform: rotate(0deg); + transform: rotate(0deg); } + 100% { + -webkit-transform: rotate(360deg); + transform: rotate(360deg); } } + +@keyframes fa-spin { + 0% { + -webkit-transform: rotate(0deg); + transform: rotate(0deg); } + 100% { + -webkit-transform: rotate(360deg); + transform: rotate(360deg); } } + +.fa-rotate-90 { + -webkit-transform: rotate(90deg); + transform: rotate(90deg); } + +.fa-rotate-180 { + -webkit-transform: rotate(180deg); + transform: rotate(180deg); } + +.fa-rotate-270 { + -webkit-transform: rotate(270deg); + transform: rotate(270deg); } + +.fa-flip-horizontal { + -webkit-transform: scale(-1, 1); + transform: scale(-1, 1); } + +.fa-flip-vertical { + -webkit-transform: scale(1, -1); + transform: scale(1, -1); } + +.fa-flip-both, +.fa-flip-horizontal.fa-flip-vertical { + -webkit-transform: scale(-1, -1); + transform: scale(-1, -1); } + +.fa-rotate-by { + -webkit-transform: rotate(var(--fa-rotate-angle, none)); + transform: rotate(var(--fa-rotate-angle, none)); } + +.fa-stack { + display: inline-block; + height: 2em; + line-height: 2em; + position: relative; + vertical-align: middle; + width: 2.5em; } + +.fa-stack-1x, +.fa-stack-2x { + left: 0; + position: absolute; + text-align: center; + width: 100%; + z-index: var(--fa-stack-z-index, auto); } + +.fa-stack-1x { + line-height: inherit; } + +.fa-stack-2x { + font-size: 2em; } + +.fa-inverse { + color: var(--fa-inverse, #fff); } + +/* Font Awesome uses the Unicode Private Use Area (PUA) to ensure screen +readers do not read off random characters that represent icons */ +.fa-0::before { + content: "\30"; } + +.fa-1::before { + content: "\31"; } + +.fa-2::before { + content: "\32"; } + +.fa-3::before { + content: "\33"; } + +.fa-4::before { + content: "\34"; } + +.fa-5::before { + content: "\35"; } + +.fa-6::before { + content: "\36"; } + +.fa-7::before { + content: "\37"; } + +.fa-8::before { + content: "\38"; } + +.fa-9::before { + content: "\39"; } + +.fa-a::before { + content: "\41"; } + +.fa-address-book::before { + content: "\f2b9"; } + +.fa-contact-book::before { + content: "\f2b9"; } + +.fa-address-card::before { + content: "\f2bb"; } + +.fa-contact-card::before { + content: "\f2bb"; } + +.fa-vcard::before { + content: "\f2bb"; } + +.fa-align-center::before { + content: "\f037"; } + +.fa-align-justify::before { + content: "\f039"; } + +.fa-align-left::before { + content: "\f036"; } + +.fa-align-right::before { + content: "\f038"; } + +.fa-anchor::before { + content: "\f13d"; } + +.fa-anchor-circle-check::before { + content: "\e4aa"; } + +.fa-anchor-circle-exclamation::before { + content: "\e4ab"; } + +.fa-anchor-circle-xmark::before { + content: "\e4ac"; } + +.fa-anchor-lock::before { + content: "\e4ad"; } + +.fa-angle-down::before { + content: "\f107"; } + +.fa-angle-left::before { + content: "\f104"; } + +.fa-angle-right::before { + content: "\f105"; } + +.fa-angle-up::before { + content: "\f106"; } + +.fa-angles-down::before { + content: "\f103"; } + +.fa-angle-double-down::before { + content: "\f103"; } + +.fa-angles-left::before { + content: "\f100"; } + +.fa-angle-double-left::before { + content: "\f100"; } + +.fa-angles-right::before { + content: "\f101"; } + +.fa-angle-double-right::before { + content: "\f101"; } + +.fa-angles-up::before { + content: "\f102"; } + +.fa-angle-double-up::before { + content: "\f102"; } + +.fa-ankh::before { + content: "\f644"; } + +.fa-apple-whole::before { + content: "\f5d1"; } + +.fa-apple-alt::before { + content: "\f5d1"; } + +.fa-archway::before { + content: "\f557"; } + +.fa-arrow-down::before { + content: "\f063"; } + +.fa-arrow-down-1-9::before { + content: "\f162"; } + +.fa-sort-numeric-asc::before { + content: "\f162"; } + +.fa-sort-numeric-down::before { + content: "\f162"; } + +.fa-arrow-down-9-1::before { + content: "\f886"; } + +.fa-sort-numeric-desc::before { + content: "\f886"; } + +.fa-sort-numeric-down-alt::before { + content: "\f886"; } + +.fa-arrow-down-a-z::before { + content: "\f15d"; } + +.fa-sort-alpha-asc::before { + content: "\f15d"; } + +.fa-sort-alpha-down::before { + content: "\f15d"; } + +.fa-arrow-down-long::before { + content: "\f175"; } + +.fa-long-arrow-down::before { + content: "\f175"; } + +.fa-arrow-down-short-wide::before { + content: "\f884"; } + +.fa-sort-amount-desc::before { + content: "\f884"; } + +.fa-sort-amount-down-alt::before { + content: "\f884"; } + +.fa-arrow-down-up-across-line::before { + content: "\e4af"; } + +.fa-arrow-down-up-lock::before { + content: "\e4b0"; } + +.fa-arrow-down-wide-short::before { + content: "\f160"; } + +.fa-sort-amount-asc::before { + content: "\f160"; } + +.fa-sort-amount-down::before { + content: "\f160"; } + +.fa-arrow-down-z-a::before { + content: "\f881"; } + +.fa-sort-alpha-desc::before { + content: "\f881"; } + +.fa-sort-alpha-down-alt::before { + content: "\f881"; } + +.fa-arrow-left::before { + content: "\f060"; } + +.fa-arrow-left-long::before { + content: "\f177"; } + +.fa-long-arrow-left::before { + content: "\f177"; } + +.fa-arrow-pointer::before { + content: "\f245"; } + +.fa-mouse-pointer::before { + content: "\f245"; } + +.fa-arrow-right::before { + content: "\f061"; } + +.fa-arrow-right-arrow-left::before { + content: "\f0ec"; } + +.fa-exchange::before { + content: "\f0ec"; } + +.fa-arrow-right-from-bracket::before { + content: "\f08b"; } + +.fa-sign-out::before { + content: "\f08b"; } + +.fa-arrow-right-long::before { + content: "\f178"; } + +.fa-long-arrow-right::before { + content: "\f178"; } + +.fa-arrow-right-to-bracket::before { + content: "\f090"; } + +.fa-sign-in::before { + content: "\f090"; } + +.fa-arrow-right-to-city::before { + content: "\e4b3"; } + +.fa-arrow-rotate-left::before { + content: "\f0e2"; } + +.fa-arrow-left-rotate::before { + content: "\f0e2"; } + +.fa-arrow-rotate-back::before { + content: "\f0e2"; } + +.fa-arrow-rotate-backward::before { + content: "\f0e2"; } + +.fa-undo::before { + content: "\f0e2"; } + +.fa-arrow-rotate-right::before { + content: "\f01e"; } + +.fa-arrow-right-rotate::before { + content: "\f01e"; } + +.fa-arrow-rotate-forward::before { + content: "\f01e"; } + +.fa-redo::before { + content: "\f01e"; } + +.fa-arrow-trend-down::before { + content: "\e097"; } + +.fa-arrow-trend-up::before { + content: "\e098"; } + +.fa-arrow-turn-down::before { + content: "\f149"; } + +.fa-level-down::before { + content: "\f149"; } + +.fa-arrow-turn-up::before { + content: "\f148"; } + +.fa-level-up::before { + content: "\f148"; } + +.fa-arrow-up::before { + content: "\f062"; } + +.fa-arrow-up-1-9::before { + content: "\f163"; } + +.fa-sort-numeric-up::before { + content: "\f163"; } + +.fa-arrow-up-9-1::before { + content: "\f887"; } + +.fa-sort-numeric-up-alt::before { + content: "\f887"; } + +.fa-arrow-up-a-z::before { + content: "\f15e"; } + +.fa-sort-alpha-up::before { + content: "\f15e"; } + +.fa-arrow-up-from-bracket::before { + content: "\e09a"; } + +.fa-arrow-up-from-ground-water::before { + content: "\e4b5"; } + +.fa-arrow-up-from-water-pump::before { + content: "\e4b6"; } + +.fa-arrow-up-long::before { + content: "\f176"; } + +.fa-long-arrow-up::before { + content: "\f176"; } + +.fa-arrow-up-right-dots::before { + content: "\e4b7"; } + +.fa-arrow-up-right-from-square::before { + content: "\f08e"; } + +.fa-external-link::before { + content: "\f08e"; } + +.fa-arrow-up-short-wide::before { + content: "\f885"; } + +.fa-sort-amount-up-alt::before { + content: "\f885"; } + +.fa-arrow-up-wide-short::before { + content: "\f161"; } + +.fa-sort-amount-up::before { + content: "\f161"; } + +.fa-arrow-up-z-a::before { + content: "\f882"; } + +.fa-sort-alpha-up-alt::before { + content: "\f882"; } + +.fa-arrows-down-to-line::before { + content: "\e4b8"; } + +.fa-arrows-down-to-people::before { + content: "\e4b9"; } + +.fa-arrows-left-right::before { + content: "\f07e"; } + +.fa-arrows-h::before { + content: "\f07e"; } + +.fa-arrows-left-right-to-line::before { + content: "\e4ba"; } + +.fa-arrows-rotate::before { + content: "\f021"; } + +.fa-refresh::before { + content: "\f021"; } + +.fa-sync::before { + content: "\f021"; } + +.fa-arrows-spin::before { + content: "\e4bb"; } + +.fa-arrows-split-up-and-left::before { + content: "\e4bc"; } + +.fa-arrows-to-circle::before { + content: "\e4bd"; } + +.fa-arrows-to-dot::before { + content: "\e4be"; } + +.fa-arrows-to-eye::before { + content: "\e4bf"; } + +.fa-arrows-turn-right::before { + content: "\e4c0"; } + +.fa-arrows-turn-to-dots::before { + content: "\e4c1"; } + +.fa-arrows-up-down::before { + content: "\f07d"; } + +.fa-arrows-v::before { + content: "\f07d"; } + +.fa-arrows-up-down-left-right::before { + content: "\f047"; } + +.fa-arrows::before { + content: "\f047"; } + +.fa-arrows-up-to-line::before { + content: "\e4c2"; } + +.fa-asterisk::before { + content: "\2a"; } + +.fa-at::before { + content: "\40"; } + +.fa-atom::before { + content: "\f5d2"; } + +.fa-audio-description::before { + content: "\f29e"; } + +.fa-austral-sign::before { + content: "\e0a9"; } + +.fa-award::before { + content: "\f559"; } + +.fa-b::before { + content: "\42"; } + +.fa-baby::before { + content: "\f77c"; } + +.fa-baby-carriage::before { + content: "\f77d"; } + +.fa-carriage-baby::before { + content: "\f77d"; } + +.fa-backward::before { + content: "\f04a"; } + +.fa-backward-fast::before { + content: "\f049"; } + +.fa-fast-backward::before { + content: "\f049"; } + +.fa-backward-step::before { + content: "\f048"; } + +.fa-step-backward::before { + content: "\f048"; } + +.fa-bacon::before { + content: "\f7e5"; } + +.fa-bacteria::before { + content: "\e059"; } + +.fa-bacterium::before { + content: "\e05a"; } + +.fa-bag-shopping::before { + content: "\f290"; } + +.fa-shopping-bag::before { + content: "\f290"; } + +.fa-bahai::before { + content: "\f666"; } + +.fa-baht-sign::before { + content: "\e0ac"; } + +.fa-ban::before { + content: "\f05e"; } + +.fa-cancel::before { + content: "\f05e"; } + +.fa-ban-smoking::before { + content: "\f54d"; } + +.fa-smoking-ban::before { + content: "\f54d"; } + +.fa-bandage::before { + content: "\f462"; } + +.fa-band-aid::before { + content: "\f462"; } + +.fa-barcode::before { + content: "\f02a"; } + +.fa-bars::before { + content: "\f0c9"; } + +.fa-navicon::before { + content: "\f0c9"; } + +.fa-bars-progress::before { + content: "\f828"; } + +.fa-tasks-alt::before { + content: "\f828"; } + +.fa-bars-staggered::before { + content: "\f550"; } + +.fa-reorder::before { + content: "\f550"; } + +.fa-stream::before { + content: "\f550"; } + +.fa-baseball::before { + content: "\f433"; } + +.fa-baseball-ball::before { + content: "\f433"; } + +.fa-baseball-bat-ball::before { + content: "\f432"; } + +.fa-basket-shopping::before { + content: "\f291"; } + +.fa-shopping-basket::before { + content: "\f291"; } + +.fa-basketball::before { + content: "\f434"; } + +.fa-basketball-ball::before { + content: "\f434"; } + +.fa-bath::before { + content: "\f2cd"; } + +.fa-bathtub::before { + content: "\f2cd"; } + +.fa-battery-empty::before { + content: "\f244"; } + +.fa-battery-0::before { + content: "\f244"; } + +.fa-battery-full::before { + content: "\f240"; } + +.fa-battery::before { + content: "\f240"; } + +.fa-battery-5::before { + content: "\f240"; } + +.fa-battery-half::before { + content: "\f242"; } + +.fa-battery-3::before { + content: "\f242"; } + +.fa-battery-quarter::before { + content: "\f243"; } + +.fa-battery-2::before { + content: "\f243"; } + +.fa-battery-three-quarters::before { + content: "\f241"; } + +.fa-battery-4::before { + content: "\f241"; } + +.fa-bed::before { + content: "\f236"; } + +.fa-bed-pulse::before { + content: "\f487"; } + +.fa-procedures::before { + content: "\f487"; } + +.fa-beer-mug-empty::before { + content: "\f0fc"; } + +.fa-beer::before { + content: "\f0fc"; } + +.fa-bell::before { + content: "\f0f3"; } + +.fa-bell-concierge::before { + content: "\f562"; } + +.fa-concierge-bell::before { + content: "\f562"; } + +.fa-bell-slash::before { + content: "\f1f6"; } + +.fa-bezier-curve::before { + content: "\f55b"; } + +.fa-bicycle::before { + content: "\f206"; } + +.fa-binoculars::before { + content: "\f1e5"; } + +.fa-biohazard::before { + content: "\f780"; } + +.fa-bitcoin-sign::before { + content: "\e0b4"; } + +.fa-blender::before { + content: "\f517"; } + +.fa-blender-phone::before { + content: "\f6b6"; } + +.fa-blog::before { + content: "\f781"; } + +.fa-bold::before { + content: "\f032"; } + +.fa-bolt::before { + content: "\f0e7"; } + +.fa-zap::before { + content: "\f0e7"; } + +.fa-bolt-lightning::before { + content: "\e0b7"; } + +.fa-bomb::before { + content: "\f1e2"; } + +.fa-bone::before { + content: "\f5d7"; } + +.fa-bong::before { + content: "\f55c"; } + +.fa-book::before { + content: "\f02d"; } + +.fa-book-atlas::before { + content: "\f558"; } + +.fa-atlas::before { + content: "\f558"; } + +.fa-book-bible::before { + content: "\f647"; } + +.fa-bible::before { + content: "\f647"; } + +.fa-book-bookmark::before { + content: "\e0bb"; } + +.fa-book-journal-whills::before { + content: "\f66a"; } + +.fa-journal-whills::before { + content: "\f66a"; } + +.fa-book-medical::before { + content: "\f7e6"; } + +.fa-book-open::before { + content: "\f518"; } + +.fa-book-open-reader::before { + content: "\f5da"; } + +.fa-book-reader::before { + content: "\f5da"; } + +.fa-book-quran::before { + content: "\f687"; } + +.fa-quran::before { + content: "\f687"; } + +.fa-book-skull::before { + content: "\f6b7"; } + +.fa-book-dead::before { + content: "\f6b7"; } + +.fa-bookmark::before { + content: "\f02e"; } + +.fa-border-all::before { + content: "\f84c"; } + +.fa-border-none::before { + content: "\f850"; } + +.fa-border-top-left::before { + content: "\f853"; } + +.fa-border-style::before { + content: "\f853"; } + +.fa-bore-hole::before { + content: "\e4c3"; } + +.fa-bottle-droplet::before { + content: "\e4c4"; } + +.fa-bottle-water::before { + content: "\e4c5"; } + +.fa-bowl-food::before { + content: "\e4c6"; } + +.fa-bowl-rice::before { + content: "\e2eb"; } + +.fa-bowling-ball::before { + content: "\f436"; } + +.fa-box::before { + content: "\f466"; } + +.fa-box-archive::before { + content: "\f187"; } + +.fa-archive::before { + content: "\f187"; } + +.fa-box-open::before { + content: "\f49e"; } + +.fa-box-tissue::before { + content: "\e05b"; } + +.fa-boxes-packing::before { + content: "\e4c7"; } + +.fa-boxes-stacked::before { + content: "\f468"; } + +.fa-boxes::before { + content: "\f468"; } + +.fa-boxes-alt::before { + content: "\f468"; } + +.fa-braille::before { + content: "\f2a1"; } + +.fa-brain::before { + content: "\f5dc"; } + +.fa-brazilian-real-sign::before { + content: "\e46c"; } + +.fa-bread-slice::before { + content: "\f7ec"; } + +.fa-bridge::before { + content: "\e4c8"; } + +.fa-bridge-circle-check::before { + content: "\e4c9"; } + +.fa-bridge-circle-exclamation::before { + content: "\e4ca"; } + +.fa-bridge-circle-xmark::before { + content: "\e4cb"; } + +.fa-bridge-lock::before { + content: "\e4cc"; } + +.fa-bridge-water::before { + content: "\e4ce"; } + +.fa-briefcase::before { + content: "\f0b1"; } + +.fa-briefcase-medical::before { + content: "\f469"; } + +.fa-broom::before { + content: "\f51a"; } + +.fa-broom-ball::before { + content: "\f458"; } + +.fa-quidditch::before { + content: "\f458"; } + +.fa-quidditch-broom-ball::before { + content: "\f458"; } + +.fa-brush::before { + content: "\f55d"; } + +.fa-bucket::before { + content: "\e4cf"; } + +.fa-bug::before { + content: "\f188"; } + +.fa-bug-slash::before { + content: "\e490"; } + +.fa-bugs::before { + content: "\e4d0"; } + +.fa-building::before { + content: "\f1ad"; } + +.fa-building-circle-arrow-right::before { + content: "\e4d1"; } + +.fa-building-circle-check::before { + content: "\e4d2"; } + +.fa-building-circle-exclamation::before { + content: "\e4d3"; } + +.fa-building-circle-xmark::before { + content: "\e4d4"; } + +.fa-building-columns::before { + content: "\f19c"; } + +.fa-bank::before { + content: "\f19c"; } + +.fa-institution::before { + content: "\f19c"; } + +.fa-museum::before { + content: "\f19c"; } + +.fa-university::before { + content: "\f19c"; } + +.fa-building-flag::before { + content: "\e4d5"; } + +.fa-building-lock::before { + content: "\e4d6"; } + +.fa-building-ngo::before { + content: "\e4d7"; } + +.fa-building-shield::before { + content: "\e4d8"; } + +.fa-building-un::before { + content: "\e4d9"; } + +.fa-building-user::before { + content: "\e4da"; } + +.fa-building-wheat::before { + content: "\e4db"; } + +.fa-bullhorn::before { + content: "\f0a1"; } + +.fa-bullseye::before { + content: "\f140"; } + +.fa-burger::before { + content: "\f805"; } + +.fa-hamburger::before { + content: "\f805"; } + +.fa-burst::before { + content: "\e4dc"; } + +.fa-bus::before { + content: "\f207"; } + +.fa-bus-simple::before { + content: "\f55e"; } + +.fa-bus-alt::before { + content: "\f55e"; } + +.fa-business-time::before { + content: "\f64a"; } + +.fa-briefcase-clock::before { + content: "\f64a"; } + +.fa-c::before { + content: "\43"; } + +.fa-cake-candles::before { + content: "\f1fd"; } + +.fa-birthday-cake::before { + content: "\f1fd"; } + +.fa-cake::before { + content: "\f1fd"; } + +.fa-calculator::before { + content: "\f1ec"; } + +.fa-calendar::before { + content: "\f133"; } + +.fa-calendar-check::before { + content: "\f274"; } + +.fa-calendar-day::before { + content: "\f783"; } + +.fa-calendar-days::before { + content: "\f073"; } + +.fa-calendar-alt::before { + content: "\f073"; } + +.fa-calendar-minus::before { + content: "\f272"; } + +.fa-calendar-plus::before { + content: "\f271"; } + +.fa-calendar-week::before { + content: "\f784"; } + +.fa-calendar-xmark::before { + content: "\f273"; } + +.fa-calendar-times::before { + content: "\f273"; } + +.fa-camera::before { + content: "\f030"; } + +.fa-camera-alt::before { + content: "\f030"; } + +.fa-camera-retro::before { + content: "\f083"; } + +.fa-camera-rotate::before { + content: "\e0d8"; } + +.fa-campground::before { + content: "\f6bb"; } + +.fa-candy-cane::before { + content: "\f786"; } + +.fa-cannabis::before { + content: "\f55f"; } + +.fa-capsules::before { + content: "\f46b"; } + +.fa-car::before { + content: "\f1b9"; } + +.fa-automobile::before { + content: "\f1b9"; } + +.fa-car-battery::before { + content: "\f5df"; } + +.fa-battery-car::before { + content: "\f5df"; } + +.fa-car-burst::before { + content: "\f5e1"; } + +.fa-car-crash::before { + content: "\f5e1"; } + +.fa-car-on::before { + content: "\e4dd"; } + +.fa-car-rear::before { + content: "\f5de"; } + +.fa-car-alt::before { + content: "\f5de"; } + +.fa-car-side::before { + content: "\f5e4"; } + +.fa-car-tunnel::before { + content: "\e4de"; } + +.fa-caravan::before { + content: "\f8ff"; } + +.fa-caret-down::before { + content: "\f0d7"; } + +.fa-caret-left::before { + content: "\f0d9"; } + +.fa-caret-right::before { + content: "\f0da"; } + +.fa-caret-up::before { + content: "\f0d8"; } + +.fa-carrot::before { + content: "\f787"; } + +.fa-cart-arrow-down::before { + content: "\f218"; } + +.fa-cart-flatbed::before { + content: "\f474"; } + +.fa-dolly-flatbed::before { + content: "\f474"; } + +.fa-cart-flatbed-suitcase::before { + content: "\f59d"; } + +.fa-luggage-cart::before { + content: "\f59d"; } + +.fa-cart-plus::before { + content: "\f217"; } + +.fa-cart-shopping::before { + content: "\f07a"; } + +.fa-shopping-cart::before { + content: "\f07a"; } + +.fa-cash-register::before { + content: "\f788"; } + +.fa-cat::before { + content: "\f6be"; } + +.fa-cedi-sign::before { + content: "\e0df"; } + +.fa-cent-sign::before { + content: "\e3f5"; } + +.fa-certificate::before { + content: "\f0a3"; } + +.fa-chair::before { + content: "\f6c0"; } + +.fa-chalkboard::before { + content: "\f51b"; } + +.fa-blackboard::before { + content: "\f51b"; } + +.fa-chalkboard-user::before { + content: "\f51c"; } + +.fa-chalkboard-teacher::before { + content: "\f51c"; } + +.fa-champagne-glasses::before { + content: "\f79f"; } + +.fa-glass-cheers::before { + content: "\f79f"; } + +.fa-charging-station::before { + content: "\f5e7"; } + +.fa-chart-area::before { + content: "\f1fe"; } + +.fa-area-chart::before { + content: "\f1fe"; } + +.fa-chart-bar::before { + content: "\f080"; } + +.fa-bar-chart::before { + content: "\f080"; } + +.fa-chart-column::before { + content: "\e0e3"; } + +.fa-chart-gantt::before { + content: "\e0e4"; } + +.fa-chart-line::before { + content: "\f201"; } + +.fa-line-chart::before { + content: "\f201"; } + +.fa-chart-pie::before { + content: "\f200"; } + +.fa-pie-chart::before { + content: "\f200"; } + +.fa-chart-simple::before { + content: "\e473"; } + +.fa-check::before { + content: "\f00c"; } + +.fa-check-double::before { + content: "\f560"; } + +.fa-check-to-slot::before { + content: "\f772"; } + +.fa-vote-yea::before { + content: "\f772"; } + +.fa-cheese::before { + content: "\f7ef"; } + +.fa-chess::before { + content: "\f439"; } + +.fa-chess-bishop::before { + content: "\f43a"; } + +.fa-chess-board::before { + content: "\f43c"; } + +.fa-chess-king::before { + content: "\f43f"; } + +.fa-chess-knight::before { + content: "\f441"; } + +.fa-chess-pawn::before { + content: "\f443"; } + +.fa-chess-queen::before { + content: "\f445"; } + +.fa-chess-rook::before { + content: "\f447"; } + +.fa-chevron-down::before { + content: "\f078"; } + +.fa-chevron-left::before { + content: "\f053"; } + +.fa-chevron-right::before { + content: "\f054"; } + +.fa-chevron-up::before { + content: "\f077"; } + +.fa-child::before { + content: "\f1ae"; } + +.fa-child-dress::before { + content: "\e59c"; } + +.fa-child-reaching::before { + content: "\e59d"; } + +.fa-child-rifle::before { + content: "\e4e0"; } + +.fa-children::before { + content: "\e4e1"; } + +.fa-church::before { + content: "\f51d"; } + +.fa-circle::before { + content: "\f111"; } + +.fa-circle-arrow-down::before { + content: "\f0ab"; } + +.fa-arrow-circle-down::before { + content: "\f0ab"; } + +.fa-circle-arrow-left::before { + content: "\f0a8"; } + +.fa-arrow-circle-left::before { + content: "\f0a8"; } + +.fa-circle-arrow-right::before { + content: "\f0a9"; } + +.fa-arrow-circle-right::before { + content: "\f0a9"; } + +.fa-circle-arrow-up::before { + content: "\f0aa"; } + +.fa-arrow-circle-up::before { + content: "\f0aa"; } + +.fa-circle-check::before { + content: "\f058"; } + +.fa-check-circle::before { + content: "\f058"; } + +.fa-circle-chevron-down::before { + content: "\f13a"; } + +.fa-chevron-circle-down::before { + content: "\f13a"; } + +.fa-circle-chevron-left::before { + content: "\f137"; } + +.fa-chevron-circle-left::before { + content: "\f137"; } + +.fa-circle-chevron-right::before { + content: "\f138"; } + +.fa-chevron-circle-right::before { + content: "\f138"; } + +.fa-circle-chevron-up::before { + content: "\f139"; } + +.fa-chevron-circle-up::before { + content: "\f139"; } + +.fa-circle-dollar-to-slot::before { + content: "\f4b9"; } + +.fa-donate::before { + content: "\f4b9"; } + +.fa-circle-dot::before { + content: "\f192"; } + +.fa-dot-circle::before { + content: "\f192"; } + +.fa-circle-down::before { + content: "\f358"; } + +.fa-arrow-alt-circle-down::before { + content: "\f358"; } + +.fa-circle-exclamation::before { + content: "\f06a"; } + +.fa-exclamation-circle::before { + content: "\f06a"; } + +.fa-circle-h::before { + content: "\f47e"; } + +.fa-hospital-symbol::before { + content: "\f47e"; } + +.fa-circle-half-stroke::before { + content: "\f042"; } + +.fa-adjust::before { + content: "\f042"; } + +.fa-circle-info::before { + content: "\f05a"; } + +.fa-info-circle::before { + content: "\f05a"; } + +.fa-circle-left::before { + content: "\f359"; } + +.fa-arrow-alt-circle-left::before { + content: "\f359"; } + +.fa-circle-minus::before { + content: "\f056"; } + +.fa-minus-circle::before { + content: "\f056"; } + +.fa-circle-nodes::before { + content: "\e4e2"; } + +.fa-circle-notch::before { + content: "\f1ce"; } + +.fa-circle-pause::before { + content: "\f28b"; } + +.fa-pause-circle::before { + content: "\f28b"; } + +.fa-circle-play::before { + content: "\f144"; } + +.fa-play-circle::before { + content: "\f144"; } + +.fa-circle-plus::before { + content: "\f055"; } + +.fa-plus-circle::before { + content: "\f055"; } + +.fa-circle-question::before { + content: "\f059"; } + +.fa-question-circle::before { + content: "\f059"; } + +.fa-circle-radiation::before { + content: "\f7ba"; } + +.fa-radiation-alt::before { + content: "\f7ba"; } + +.fa-circle-right::before { + content: "\f35a"; } + +.fa-arrow-alt-circle-right::before { + content: "\f35a"; } + +.fa-circle-stop::before { + content: "\f28d"; } + +.fa-stop-circle::before { + content: "\f28d"; } + +.fa-circle-up::before { + content: "\f35b"; } + +.fa-arrow-alt-circle-up::before { + content: "\f35b"; } + +.fa-circle-user::before { + content: "\f2bd"; } + +.fa-user-circle::before { + content: "\f2bd"; } + +.fa-circle-xmark::before { + content: "\f057"; } + +.fa-times-circle::before { + content: "\f057"; } + +.fa-xmark-circle::before { + content: "\f057"; } + +.fa-city::before { + content: "\f64f"; } + +.fa-clapperboard::before { + content: "\e131"; } + +.fa-clipboard::before { + content: "\f328"; } + +.fa-clipboard-check::before { + content: "\f46c"; } + +.fa-clipboard-list::before { + content: "\f46d"; } + +.fa-clipboard-question::before { + content: "\e4e3"; } + +.fa-clipboard-user::before { + content: "\f7f3"; } + +.fa-clock::before { + content: "\f017"; } + +.fa-clock-four::before { + content: "\f017"; } + +.fa-clock-rotate-left::before { + content: "\f1da"; } + +.fa-history::before { + content: "\f1da"; } + +.fa-clone::before { + content: "\f24d"; } + +.fa-closed-captioning::before { + content: "\f20a"; } + +.fa-cloud::before { + content: "\f0c2"; } + +.fa-cloud-arrow-down::before { + content: "\f0ed"; } + +.fa-cloud-download::before { + content: "\f0ed"; } + +.fa-cloud-download-alt::before { + content: "\f0ed"; } + +.fa-cloud-arrow-up::before { + content: "\f0ee"; } + +.fa-cloud-upload::before { + content: "\f0ee"; } + +.fa-cloud-upload-alt::before { + content: "\f0ee"; } + +.fa-cloud-bolt::before { + content: "\f76c"; } + +.fa-thunderstorm::before { + content: "\f76c"; } + +.fa-cloud-meatball::before { + content: "\f73b"; } + +.fa-cloud-moon::before { + content: "\f6c3"; } + +.fa-cloud-moon-rain::before { + content: "\f73c"; } + +.fa-cloud-rain::before { + content: "\f73d"; } + +.fa-cloud-showers-heavy::before { + content: "\f740"; } + +.fa-cloud-showers-water::before { + content: "\e4e4"; } + +.fa-cloud-sun::before { + content: "\f6c4"; } + +.fa-cloud-sun-rain::before { + content: "\f743"; } + +.fa-clover::before { + content: "\e139"; } + +.fa-code::before { + content: "\f121"; } + +.fa-code-branch::before { + content: "\f126"; } + +.fa-code-commit::before { + content: "\f386"; } + +.fa-code-compare::before { + content: "\e13a"; } + +.fa-code-fork::before { + content: "\e13b"; } + +.fa-code-merge::before { + content: "\f387"; } + +.fa-code-pull-request::before { + content: "\e13c"; } + +.fa-coins::before { + content: "\f51e"; } + +.fa-colon-sign::before { + content: "\e140"; } + +.fa-comment::before { + content: "\f075"; } + +.fa-comment-dollar::before { + content: "\f651"; } + +.fa-comment-dots::before { + content: "\f4ad"; } + +.fa-commenting::before { + content: "\f4ad"; } + +.fa-comment-medical::before { + content: "\f7f5"; } + +.fa-comment-slash::before { + content: "\f4b3"; } + +.fa-comment-sms::before { + content: "\f7cd"; } + +.fa-sms::before { + content: "\f7cd"; } + +.fa-comments::before { + content: "\f086"; } + +.fa-comments-dollar::before { + content: "\f653"; } + +.fa-compact-disc::before { + content: "\f51f"; } + +.fa-compass::before { + content: "\f14e"; } + +.fa-compass-drafting::before { + content: "\f568"; } + +.fa-drafting-compass::before { + content: "\f568"; } + +.fa-compress::before { + content: "\f066"; } + +.fa-computer::before { + content: "\e4e5"; } + +.fa-computer-mouse::before { + content: "\f8cc"; } + +.fa-mouse::before { + content: "\f8cc"; } + +.fa-cookie::before { + content: "\f563"; } + +.fa-cookie-bite::before { + content: "\f564"; } + +.fa-copy::before { + content: "\f0c5"; } + +.fa-copyright::before { + content: "\f1f9"; } + +.fa-couch::before { + content: "\f4b8"; } + +.fa-cow::before { + content: "\f6c8"; } + +.fa-credit-card::before { + content: "\f09d"; } + +.fa-credit-card-alt::before { + content: "\f09d"; } + +.fa-crop::before { + content: "\f125"; } + +.fa-crop-simple::before { + content: "\f565"; } + +.fa-crop-alt::before { + content: "\f565"; } + +.fa-cross::before { + content: "\f654"; } + +.fa-crosshairs::before { + content: "\f05b"; } + +.fa-crow::before { + content: "\f520"; } + +.fa-crown::before { + content: "\f521"; } + +.fa-crutch::before { + content: "\f7f7"; } + +.fa-cruzeiro-sign::before { + content: "\e152"; } + +.fa-cube::before { + content: "\f1b2"; } + +.fa-cubes::before { + content: "\f1b3"; } + +.fa-cubes-stacked::before { + content: "\e4e6"; } + +.fa-d::before { + content: "\44"; } + +.fa-database::before { + content: "\f1c0"; } + +.fa-delete-left::before { + content: "\f55a"; } + +.fa-backspace::before { + content: "\f55a"; } + +.fa-democrat::before { + content: "\f747"; } + +.fa-desktop::before { + content: "\f390"; } + +.fa-desktop-alt::before { + content: "\f390"; } + +.fa-dharmachakra::before { + content: "\f655"; } + +.fa-diagram-next::before { + content: "\e476"; } + +.fa-diagram-predecessor::before { + content: "\e477"; } + +.fa-diagram-project::before { + content: "\f542"; } + +.fa-project-diagram::before { + content: "\f542"; } + +.fa-diagram-successor::before { + content: "\e47a"; } + +.fa-diamond::before { + content: "\f219"; } + +.fa-diamond-turn-right::before { + content: "\f5eb"; } + +.fa-directions::before { + content: "\f5eb"; } + +.fa-dice::before { + content: "\f522"; } + +.fa-dice-d20::before { + content: "\f6cf"; } + +.fa-dice-d6::before { + content: "\f6d1"; } + +.fa-dice-five::before { + content: "\f523"; } + +.fa-dice-four::before { + content: "\f524"; } + +.fa-dice-one::before { + content: "\f525"; } + +.fa-dice-six::before { + content: "\f526"; } + +.fa-dice-three::before { + content: "\f527"; } + +.fa-dice-two::before { + content: "\f528"; } + +.fa-disease::before { + content: "\f7fa"; } + +.fa-display::before { + content: "\e163"; } + +.fa-divide::before { + content: "\f529"; } + +.fa-dna::before { + content: "\f471"; } + +.fa-dog::before { + content: "\f6d3"; } + +.fa-dollar-sign::before { + content: "\24"; } + +.fa-dollar::before { + content: "\24"; } + +.fa-usd::before { + content: "\24"; } + +.fa-dolly::before { + content: "\f472"; } + +.fa-dolly-box::before { + content: "\f472"; } + +.fa-dong-sign::before { + content: "\e169"; } + +.fa-door-closed::before { + content: "\f52a"; } + +.fa-door-open::before { + content: "\f52b"; } + +.fa-dove::before { + content: "\f4ba"; } + +.fa-down-left-and-up-right-to-center::before { + content: "\f422"; } + +.fa-compress-alt::before { + content: "\f422"; } + +.fa-down-long::before { + content: "\f309"; } + +.fa-long-arrow-alt-down::before { + content: "\f309"; } + +.fa-download::before { + content: "\f019"; } + +.fa-dragon::before { + content: "\f6d5"; } + +.fa-draw-polygon::before { + content: "\f5ee"; } + +.fa-droplet::before { + content: "\f043"; } + +.fa-tint::before { + content: "\f043"; } + +.fa-droplet-slash::before { + content: "\f5c7"; } + +.fa-tint-slash::before { + content: "\f5c7"; } + +.fa-drum::before { + content: "\f569"; } + +.fa-drum-steelpan::before { + content: "\f56a"; } + +.fa-drumstick-bite::before { + content: "\f6d7"; } + +.fa-dumbbell::before { + content: "\f44b"; } + +.fa-dumpster::before { + content: "\f793"; } + +.fa-dumpster-fire::before { + content: "\f794"; } + +.fa-dungeon::before { + content: "\f6d9"; } + +.fa-e::before { + content: "\45"; } + +.fa-ear-deaf::before { + content: "\f2a4"; } + +.fa-deaf::before { + content: "\f2a4"; } + +.fa-deafness::before { + content: "\f2a4"; } + +.fa-hard-of-hearing::before { + content: "\f2a4"; } + +.fa-ear-listen::before { + content: "\f2a2"; } + +.fa-assistive-listening-systems::before { + content: "\f2a2"; } + +.fa-earth-africa::before { + content: "\f57c"; } + +.fa-globe-africa::before { + content: "\f57c"; } + +.fa-earth-americas::before { + content: "\f57d"; } + +.fa-earth::before { + content: "\f57d"; } + +.fa-earth-america::before { + content: "\f57d"; } + +.fa-globe-americas::before { + content: "\f57d"; } + +.fa-earth-asia::before { + content: "\f57e"; } + +.fa-globe-asia::before { + content: "\f57e"; } + +.fa-earth-europe::before { + content: "\f7a2"; } + +.fa-globe-europe::before { + content: "\f7a2"; } + +.fa-earth-oceania::before { + content: "\e47b"; } + +.fa-globe-oceania::before { + content: "\e47b"; } + +.fa-egg::before { + content: "\f7fb"; } + +.fa-eject::before { + content: "\f052"; } + +.fa-elevator::before { + content: "\e16d"; } + +.fa-ellipsis::before { + content: "\f141"; } + +.fa-ellipsis-h::before { + content: "\f141"; } + +.fa-ellipsis-vertical::before { + content: "\f142"; } + +.fa-ellipsis-v::before { + content: "\f142"; } + +.fa-envelope::before { + content: "\f0e0"; } + +.fa-envelope-circle-check::before { + content: "\e4e8"; } + +.fa-envelope-open::before { + content: "\f2b6"; } + +.fa-envelope-open-text::before { + content: "\f658"; } + +.fa-envelopes-bulk::before { + content: "\f674"; } + +.fa-mail-bulk::before { + content: "\f674"; } + +.fa-equals::before { + content: "\3d"; } + +.fa-eraser::before { + content: "\f12d"; } + +.fa-ethernet::before { + content: "\f796"; } + +.fa-euro-sign::before { + content: "\f153"; } + +.fa-eur::before { + content: "\f153"; } + +.fa-euro::before { + content: "\f153"; } + +.fa-exclamation::before { + content: "\21"; } + +.fa-expand::before { + content: "\f065"; } + +.fa-explosion::before { + content: "\e4e9"; } + +.fa-eye::before { + content: "\f06e"; } + +.fa-eye-dropper::before { + content: "\f1fb"; } + +.fa-eye-dropper-empty::before { + content: "\f1fb"; } + +.fa-eyedropper::before { + content: "\f1fb"; } + +.fa-eye-low-vision::before { + content: "\f2a8"; } + +.fa-low-vision::before { + content: "\f2a8"; } + +.fa-eye-slash::before { + content: "\f070"; } + +.fa-f::before { + content: "\46"; } + +.fa-face-angry::before { + content: "\f556"; } + +.fa-angry::before { + content: "\f556"; } + +.fa-face-dizzy::before { + content: "\f567"; } + +.fa-dizzy::before { + content: "\f567"; } + +.fa-face-flushed::before { + content: "\f579"; } + +.fa-flushed::before { + content: "\f579"; } + +.fa-face-frown::before { + content: "\f119"; } + +.fa-frown::before { + content: "\f119"; } + +.fa-face-frown-open::before { + content: "\f57a"; } + +.fa-frown-open::before { + content: "\f57a"; } + +.fa-face-grimace::before { + content: "\f57f"; } + +.fa-grimace::before { + content: "\f57f"; } + +.fa-face-grin::before { + content: "\f580"; } + +.fa-grin::before { + content: "\f580"; } + +.fa-face-grin-beam::before { + content: "\f582"; } + +.fa-grin-beam::before { + content: "\f582"; } + +.fa-face-grin-beam-sweat::before { + content: "\f583"; } + +.fa-grin-beam-sweat::before { + content: "\f583"; } + +.fa-face-grin-hearts::before { + content: "\f584"; } + +.fa-grin-hearts::before { + content: "\f584"; } + +.fa-face-grin-squint::before { + content: "\f585"; } + +.fa-grin-squint::before { + content: "\f585"; } + +.fa-face-grin-squint-tears::before { + content: "\f586"; } + +.fa-grin-squint-tears::before { + content: "\f586"; } + +.fa-face-grin-stars::before { + content: "\f587"; } + +.fa-grin-stars::before { + content: "\f587"; } + +.fa-face-grin-tears::before { + content: "\f588"; } + +.fa-grin-tears::before { + content: "\f588"; } + +.fa-face-grin-tongue::before { + content: "\f589"; } + +.fa-grin-tongue::before { + content: "\f589"; } + +.fa-face-grin-tongue-squint::before { + content: "\f58a"; } + +.fa-grin-tongue-squint::before { + content: "\f58a"; } + +.fa-face-grin-tongue-wink::before { + content: "\f58b"; } + +.fa-grin-tongue-wink::before { + content: "\f58b"; } + +.fa-face-grin-wide::before { + content: "\f581"; } + +.fa-grin-alt::before { + content: "\f581"; } + +.fa-face-grin-wink::before { + content: "\f58c"; } + +.fa-grin-wink::before { + content: "\f58c"; } + +.fa-face-kiss::before { + content: "\f596"; } + +.fa-kiss::before { + content: "\f596"; } + +.fa-face-kiss-beam::before { + content: "\f597"; } + +.fa-kiss-beam::before { + content: "\f597"; } + +.fa-face-kiss-wink-heart::before { + content: "\f598"; } + +.fa-kiss-wink-heart::before { + content: "\f598"; } + +.fa-face-laugh::before { + content: "\f599"; } + +.fa-laugh::before { + content: "\f599"; } + +.fa-face-laugh-beam::before { + content: "\f59a"; } + +.fa-laugh-beam::before { + content: "\f59a"; } + +.fa-face-laugh-squint::before { + content: "\f59b"; } + +.fa-laugh-squint::before { + content: "\f59b"; } + +.fa-face-laugh-wink::before { + content: "\f59c"; } + +.fa-laugh-wink::before { + content: "\f59c"; } + +.fa-face-meh::before { + content: "\f11a"; } + +.fa-meh::before { + content: "\f11a"; } + +.fa-face-meh-blank::before { + content: "\f5a4"; } + +.fa-meh-blank::before { + content: "\f5a4"; } + +.fa-face-rolling-eyes::before { + content: "\f5a5"; } + +.fa-meh-rolling-eyes::before { + content: "\f5a5"; } + +.fa-face-sad-cry::before { + content: "\f5b3"; } + +.fa-sad-cry::before { + content: "\f5b3"; } + +.fa-face-sad-tear::before { + content: "\f5b4"; } + +.fa-sad-tear::before { + content: "\f5b4"; } + +.fa-face-smile::before { + content: "\f118"; } + +.fa-smile::before { + content: "\f118"; } + +.fa-face-smile-beam::before { + content: "\f5b8"; } + +.fa-smile-beam::before { + content: "\f5b8"; } + +.fa-face-smile-wink::before { + content: "\f4da"; } + +.fa-smile-wink::before { + content: "\f4da"; } + +.fa-face-surprise::before { + content: "\f5c2"; } + +.fa-surprise::before { + content: "\f5c2"; } + +.fa-face-tired::before { + content: "\f5c8"; } + +.fa-tired::before { + content: "\f5c8"; } + +.fa-fan::before { + content: "\f863"; } + +.fa-faucet::before { + content: "\e005"; } + +.fa-faucet-drip::before { + content: "\e006"; } + +.fa-fax::before { + content: "\f1ac"; } + +.fa-feather::before { + content: "\f52d"; } + +.fa-feather-pointed::before { + content: "\f56b"; } + +.fa-feather-alt::before { + content: "\f56b"; } + +.fa-ferry::before { + content: "\e4ea"; } + +.fa-file::before { + content: "\f15b"; } + +.fa-file-arrow-down::before { + content: "\f56d"; } + +.fa-file-download::before { + content: "\f56d"; } + +.fa-file-arrow-up::before { + content: "\f574"; } + +.fa-file-upload::before { + content: "\f574"; } + +.fa-file-audio::before { + content: "\f1c7"; } + +.fa-file-circle-check::before { + content: "\e493"; } + +.fa-file-circle-exclamation::before { + content: "\e4eb"; } + +.fa-file-circle-minus::before { + content: "\e4ed"; } + +.fa-file-circle-plus::before { + content: "\e4ee"; } + +.fa-file-circle-question::before { + content: "\e4ef"; } + +.fa-file-circle-xmark::before { + content: "\e494"; } + +.fa-file-code::before { + content: "\f1c9"; } + +.fa-file-contract::before { + content: "\f56c"; } + +.fa-file-csv::before { + content: "\f6dd"; } + +.fa-file-excel::before { + content: "\f1c3"; } + +.fa-file-export::before { + content: "\f56e"; } + +.fa-arrow-right-from-file::before { + content: "\f56e"; } + +.fa-file-image::before { + content: "\f1c5"; } + +.fa-file-import::before { + content: "\f56f"; } + +.fa-arrow-right-to-file::before { + content: "\f56f"; } + +.fa-file-invoice::before { + content: "\f570"; } + +.fa-file-invoice-dollar::before { + content: "\f571"; } + +.fa-file-lines::before { + content: "\f15c"; } + +.fa-file-alt::before { + content: "\f15c"; } + +.fa-file-text::before { + content: "\f15c"; } + +.fa-file-medical::before { + content: "\f477"; } + +.fa-file-pdf::before { + content: "\f1c1"; } + +.fa-file-pen::before { + content: "\f31c"; } + +.fa-file-edit::before { + content: "\f31c"; } + +.fa-file-powerpoint::before { + content: "\f1c4"; } + +.fa-file-prescription::before { + content: "\f572"; } + +.fa-file-shield::before { + content: "\e4f0"; } + +.fa-file-signature::before { + content: "\f573"; } + +.fa-file-video::before { + content: "\f1c8"; } + +.fa-file-waveform::before { + content: "\f478"; } + +.fa-file-medical-alt::before { + content: "\f478"; } + +.fa-file-word::before { + content: "\f1c2"; } + +.fa-file-zipper::before { + content: "\f1c6"; } + +.fa-file-archive::before { + content: "\f1c6"; } + +.fa-fill::before { + content: "\f575"; } + +.fa-fill-drip::before { + content: "\f576"; } + +.fa-film::before { + content: "\f008"; } + +.fa-filter::before { + content: "\f0b0"; } + +.fa-filter-circle-dollar::before { + content: "\f662"; } + +.fa-funnel-dollar::before { + content: "\f662"; } + +.fa-filter-circle-xmark::before { + content: "\e17b"; } + +.fa-fingerprint::before { + content: "\f577"; } + +.fa-fire::before { + content: "\f06d"; } + +.fa-fire-burner::before { + content: "\e4f1"; } + +.fa-fire-extinguisher::before { + content: "\f134"; } + +.fa-fire-flame-curved::before { + content: "\f7e4"; } + +.fa-fire-alt::before { + content: "\f7e4"; } + +.fa-fire-flame-simple::before { + content: "\f46a"; } + +.fa-burn::before { + content: "\f46a"; } + +.fa-fish::before { + content: "\f578"; } + +.fa-fish-fins::before { + content: "\e4f2"; } + +.fa-flag::before { + content: "\f024"; } + +.fa-flag-checkered::before { + content: "\f11e"; } + +.fa-flag-usa::before { + content: "\f74d"; } + +.fa-flask::before { + content: "\f0c3"; } + +.fa-flask-vial::before { + content: "\e4f3"; } + +.fa-floppy-disk::before { + content: "\f0c7"; } + +.fa-save::before { + content: "\f0c7"; } + +.fa-florin-sign::before { + content: "\e184"; } + +.fa-folder::before { + content: "\f07b"; } + +.fa-folder-blank::before { + content: "\f07b"; } + +.fa-folder-closed::before { + content: "\e185"; } + +.fa-folder-minus::before { + content: "\f65d"; } + +.fa-folder-open::before { + content: "\f07c"; } + +.fa-folder-plus::before { + content: "\f65e"; } + +.fa-folder-tree::before { + content: "\f802"; } + +.fa-font::before { + content: "\f031"; } + +.fa-football::before { + content: "\f44e"; } + +.fa-football-ball::before { + content: "\f44e"; } + +.fa-forward::before { + content: "\f04e"; } + +.fa-forward-fast::before { + content: "\f050"; } + +.fa-fast-forward::before { + content: "\f050"; } + +.fa-forward-step::before { + content: "\f051"; } + +.fa-step-forward::before { + content: "\f051"; } + +.fa-franc-sign::before { + content: "\e18f"; } + +.fa-frog::before { + content: "\f52e"; } + +.fa-futbol::before { + content: "\f1e3"; } + +.fa-futbol-ball::before { + content: "\f1e3"; } + +.fa-soccer-ball::before { + content: "\f1e3"; } + +.fa-g::before { + content: "\47"; } + +.fa-gamepad::before { + content: "\f11b"; } + +.fa-gas-pump::before { + content: "\f52f"; } + +.fa-gauge::before { + content: "\f624"; } + +.fa-dashboard::before { + content: "\f624"; } + +.fa-gauge-med::before { + content: "\f624"; } + +.fa-tachometer-alt-average::before { + content: "\f624"; } + +.fa-gauge-high::before { + content: "\f625"; } + +.fa-tachometer-alt::before { + content: "\f625"; } + +.fa-tachometer-alt-fast::before { + content: "\f625"; } + +.fa-gauge-simple::before { + content: "\f629"; } + +.fa-gauge-simple-med::before { + content: "\f629"; } + +.fa-tachometer-average::before { + content: "\f629"; } + +.fa-gauge-simple-high::before { + content: "\f62a"; } + +.fa-tachometer::before { + content: "\f62a"; } + +.fa-tachometer-fast::before { + content: "\f62a"; } + +.fa-gavel::before { + content: "\f0e3"; } + +.fa-legal::before { + content: "\f0e3"; } + +.fa-gear::before { + content: "\f013"; } + +.fa-cog::before { + content: "\f013"; } + +.fa-gears::before { + content: "\f085"; } + +.fa-cogs::before { + content: "\f085"; } + +.fa-gem::before { + content: "\f3a5"; } + +.fa-genderless::before { + content: "\f22d"; } + +.fa-ghost::before { + content: "\f6e2"; } + +.fa-gift::before { + content: "\f06b"; } + +.fa-gifts::before { + content: "\f79c"; } + +.fa-glass-water::before { + content: "\e4f4"; } + +.fa-glass-water-droplet::before { + content: "\e4f5"; } + +.fa-glasses::before { + content: "\f530"; } + +.fa-globe::before { + content: "\f0ac"; } + +.fa-golf-ball-tee::before { + content: "\f450"; } + +.fa-golf-ball::before { + content: "\f450"; } + +.fa-gopuram::before { + content: "\f664"; } + +.fa-graduation-cap::before { + content: "\f19d"; } + +.fa-mortar-board::before { + content: "\f19d"; } + +.fa-greater-than::before { + content: "\3e"; } + +.fa-greater-than-equal::before { + content: "\f532"; } + +.fa-grip::before { + content: "\f58d"; } + +.fa-grip-horizontal::before { + content: "\f58d"; } + +.fa-grip-lines::before { + content: "\f7a4"; } + +.fa-grip-lines-vertical::before { + content: "\f7a5"; } + +.fa-grip-vertical::before { + content: "\f58e"; } + +.fa-group-arrows-rotate::before { + content: "\e4f6"; } + +.fa-guarani-sign::before { + content: "\e19a"; } + +.fa-guitar::before { + content: "\f7a6"; } + +.fa-gun::before { + content: "\e19b"; } + +.fa-h::before { + content: "\48"; } + +.fa-hammer::before { + content: "\f6e3"; } + +.fa-hamsa::before { + content: "\f665"; } + +.fa-hand::before { + content: "\f256"; } + +.fa-hand-paper::before { + content: "\f256"; } + +.fa-hand-back-fist::before { + content: "\f255"; } + +.fa-hand-rock::before { + content: "\f255"; } + +.fa-hand-dots::before { + content: "\f461"; } + +.fa-allergies::before { + content: "\f461"; } + +.fa-hand-fist::before { + content: "\f6de"; } + +.fa-fist-raised::before { + content: "\f6de"; } + +.fa-hand-holding::before { + content: "\f4bd"; } + +.fa-hand-holding-dollar::before { + content: "\f4c0"; } + +.fa-hand-holding-usd::before { + content: "\f4c0"; } + +.fa-hand-holding-droplet::before { + content: "\f4c1"; } + +.fa-hand-holding-water::before { + content: "\f4c1"; } + +.fa-hand-holding-hand::before { + content: "\e4f7"; } + +.fa-hand-holding-heart::before { + content: "\f4be"; } + +.fa-hand-holding-medical::before { + content: "\e05c"; } + +.fa-hand-lizard::before { + content: "\f258"; } + +.fa-hand-middle-finger::before { + content: "\f806"; } + +.fa-hand-peace::before { + content: "\f25b"; } + +.fa-hand-point-down::before { + content: "\f0a7"; } + +.fa-hand-point-left::before { + content: "\f0a5"; } + +.fa-hand-point-right::before { + content: "\f0a4"; } + +.fa-hand-point-up::before { + content: "\f0a6"; } + +.fa-hand-pointer::before { + content: "\f25a"; } + +.fa-hand-scissors::before { + content: "\f257"; } + +.fa-hand-sparkles::before { + content: "\e05d"; } + +.fa-hand-spock::before { + content: "\f259"; } + +.fa-handcuffs::before { + content: "\e4f8"; } + +.fa-hands::before { + content: "\f2a7"; } + +.fa-sign-language::before { + content: "\f2a7"; } + +.fa-signing::before { + content: "\f2a7"; } + +.fa-hands-asl-interpreting::before { + content: "\f2a3"; } + +.fa-american-sign-language-interpreting::before { + content: "\f2a3"; } + +.fa-asl-interpreting::before { + content: "\f2a3"; } + +.fa-hands-american-sign-language-interpreting::before { + content: "\f2a3"; } + +.fa-hands-bound::before { + content: "\e4f9"; } + +.fa-hands-bubbles::before { + content: "\e05e"; } + +.fa-hands-wash::before { + content: "\e05e"; } + +.fa-hands-clapping::before { + content: "\e1a8"; } + +.fa-hands-holding::before { + content: "\f4c2"; } + +.fa-hands-holding-child::before { + content: "\e4fa"; } + +.fa-hands-holding-circle::before { + content: "\e4fb"; } + +.fa-hands-praying::before { + content: "\f684"; } + +.fa-praying-hands::before { + content: "\f684"; } + +.fa-handshake::before { + content: "\f2b5"; } + +.fa-handshake-angle::before { + content: "\f4c4"; } + +.fa-hands-helping::before { + content: "\f4c4"; } + +.fa-handshake-simple::before { + content: "\f4c6"; } + +.fa-handshake-alt::before { + content: "\f4c6"; } + +.fa-handshake-simple-slash::before { + content: "\e05f"; } + +.fa-handshake-alt-slash::before { + content: "\e05f"; } + +.fa-handshake-slash::before { + content: "\e060"; } + +.fa-hanukiah::before { + content: "\f6e6"; } + +.fa-hard-drive::before { + content: "\f0a0"; } + +.fa-hdd::before { + content: "\f0a0"; } + +.fa-hashtag::before { + content: "\23"; } + +.fa-hat-cowboy::before { + content: "\f8c0"; } + +.fa-hat-cowboy-side::before { + content: "\f8c1"; } + +.fa-hat-wizard::before { + content: "\f6e8"; } + +.fa-head-side-cough::before { + content: "\e061"; } + +.fa-head-side-cough-slash::before { + content: "\e062"; } + +.fa-head-side-mask::before { + content: "\e063"; } + +.fa-head-side-virus::before { + content: "\e064"; } + +.fa-heading::before { + content: "\f1dc"; } + +.fa-header::before { + content: "\f1dc"; } + +.fa-headphones::before { + content: "\f025"; } + +.fa-headphones-simple::before { + content: "\f58f"; } + +.fa-headphones-alt::before { + content: "\f58f"; } + +.fa-headset::before { + content: "\f590"; } + +.fa-heart::before { + content: "\f004"; } + +.fa-heart-circle-bolt::before { + content: "\e4fc"; } + +.fa-heart-circle-check::before { + content: "\e4fd"; } + +.fa-heart-circle-exclamation::before { + content: "\e4fe"; } + +.fa-heart-circle-minus::before { + content: "\e4ff"; } + +.fa-heart-circle-plus::before { + content: "\e500"; } + +.fa-heart-circle-xmark::before { + content: "\e501"; } + +.fa-heart-crack::before { + content: "\f7a9"; } + +.fa-heart-broken::before { + content: "\f7a9"; } + +.fa-heart-pulse::before { + content: "\f21e"; } + +.fa-heartbeat::before { + content: "\f21e"; } + +.fa-helicopter::before { + content: "\f533"; } + +.fa-helicopter-symbol::before { + content: "\e502"; } + +.fa-helmet-safety::before { + content: "\f807"; } + +.fa-hard-hat::before { + content: "\f807"; } + +.fa-hat-hard::before { + content: "\f807"; } + +.fa-helmet-un::before { + content: "\e503"; } + +.fa-highlighter::before { + content: "\f591"; } + +.fa-hill-avalanche::before { + content: "\e507"; } + +.fa-hill-rockslide::before { + content: "\e508"; } + +.fa-hippo::before { + content: "\f6ed"; } + +.fa-hockey-puck::before { + content: "\f453"; } + +.fa-holly-berry::before { + content: "\f7aa"; } + +.fa-horse::before { + content: "\f6f0"; } + +.fa-horse-head::before { + content: "\f7ab"; } + +.fa-hospital::before { + content: "\f0f8"; } + +.fa-hospital-alt::before { + content: "\f0f8"; } + +.fa-hospital-wide::before { + content: "\f0f8"; } + +.fa-hospital-user::before { + content: "\f80d"; } + +.fa-hot-tub-person::before { + content: "\f593"; } + +.fa-hot-tub::before { + content: "\f593"; } + +.fa-hotdog::before { + content: "\f80f"; } + +.fa-hotel::before { + content: "\f594"; } + +.fa-hourglass::before { + content: "\f254"; } + +.fa-hourglass-2::before { + content: "\f254"; } + +.fa-hourglass-half::before { + content: "\f254"; } + +.fa-hourglass-empty::before { + content: "\f252"; } + +.fa-hourglass-end::before { + content: "\f253"; } + +.fa-hourglass-3::before { + content: "\f253"; } + +.fa-hourglass-start::before { + content: "\f251"; } + +.fa-hourglass-1::before { + content: "\f251"; } + +.fa-house::before { + content: "\f015"; } + +.fa-home::before { + content: "\f015"; } + +.fa-home-alt::before { + content: "\f015"; } + +.fa-home-lg-alt::before { + content: "\f015"; } + +.fa-house-chimney::before { + content: "\e3af"; } + +.fa-home-lg::before { + content: "\e3af"; } + +.fa-house-chimney-crack::before { + content: "\f6f1"; } + +.fa-house-damage::before { + content: "\f6f1"; } + +.fa-house-chimney-medical::before { + content: "\f7f2"; } + +.fa-clinic-medical::before { + content: "\f7f2"; } + +.fa-house-chimney-user::before { + content: "\e065"; } + +.fa-house-chimney-window::before { + content: "\e00d"; } + +.fa-house-circle-check::before { + content: "\e509"; } + +.fa-house-circle-exclamation::before { + content: "\e50a"; } + +.fa-house-circle-xmark::before { + content: "\e50b"; } + +.fa-house-crack::before { + content: "\e3b1"; } + +.fa-house-fire::before { + content: "\e50c"; } + +.fa-house-flag::before { + content: "\e50d"; } + +.fa-house-flood-water::before { + content: "\e50e"; } + +.fa-house-flood-water-circle-arrow-right::before { + content: "\e50f"; } + +.fa-house-laptop::before { + content: "\e066"; } + +.fa-laptop-house::before { + content: "\e066"; } + +.fa-house-lock::before { + content: "\e510"; } + +.fa-house-medical::before { + content: "\e3b2"; } + +.fa-house-medical-circle-check::before { + content: "\e511"; } + +.fa-house-medical-circle-exclamation::before { + content: "\e512"; } + +.fa-house-medical-circle-xmark::before { + content: "\e513"; } + +.fa-house-medical-flag::before { + content: "\e514"; } + +.fa-house-signal::before { + content: "\e012"; } + +.fa-house-tsunami::before { + content: "\e515"; } + +.fa-house-user::before { + content: "\e1b0"; } + +.fa-home-user::before { + content: "\e1b0"; } + +.fa-hryvnia-sign::before { + content: "\f6f2"; } + +.fa-hryvnia::before { + content: "\f6f2"; } + +.fa-hurricane::before { + content: "\f751"; } + +.fa-i::before { + content: "\49"; } + +.fa-i-cursor::before { + content: "\f246"; } + +.fa-ice-cream::before { + content: "\f810"; } + +.fa-icicles::before { + content: "\f7ad"; } + +.fa-icons::before { + content: "\f86d"; } + +.fa-heart-music-camera-bolt::before { + content: "\f86d"; } + +.fa-id-badge::before { + content: "\f2c1"; } + +.fa-id-card::before { + content: "\f2c2"; } + +.fa-drivers-license::before { + content: "\f2c2"; } + +.fa-id-card-clip::before { + content: "\f47f"; } + +.fa-id-card-alt::before { + content: "\f47f"; } + +.fa-igloo::before { + content: "\f7ae"; } + +.fa-image::before { + content: "\f03e"; } + +.fa-image-portrait::before { + content: "\f3e0"; } + +.fa-portrait::before { + content: "\f3e0"; } + +.fa-images::before { + content: "\f302"; } + +.fa-inbox::before { + content: "\f01c"; } + +.fa-indent::before { + content: "\f03c"; } + +.fa-indian-rupee-sign::before { + content: "\e1bc"; } + +.fa-indian-rupee::before { + content: "\e1bc"; } + +.fa-inr::before { + content: "\e1bc"; } + +.fa-industry::before { + content: "\f275"; } + +.fa-infinity::before { + content: "\f534"; } + +.fa-info::before { + content: "\f129"; } + +.fa-italic::before { + content: "\f033"; } + +.fa-j::before { + content: "\4a"; } + +.fa-jar::before { + content: "\e516"; } + +.fa-jar-wheat::before { + content: "\e517"; } + +.fa-jedi::before { + content: "\f669"; } + +.fa-jet-fighter::before { + content: "\f0fb"; } + +.fa-fighter-jet::before { + content: "\f0fb"; } + +.fa-jet-fighter-up::before { + content: "\e518"; } + +.fa-joint::before { + content: "\f595"; } + +.fa-jug-detergent::before { + content: "\e519"; } + +.fa-k::before { + content: "\4b"; } + +.fa-kaaba::before { + content: "\f66b"; } + +.fa-key::before { + content: "\f084"; } + +.fa-keyboard::before { + content: "\f11c"; } + +.fa-khanda::before { + content: "\f66d"; } + +.fa-kip-sign::before { + content: "\e1c4"; } + +.fa-kit-medical::before { + content: "\f479"; } + +.fa-first-aid::before { + content: "\f479"; } + +.fa-kitchen-set::before { + content: "\e51a"; } + +.fa-kiwi-bird::before { + content: "\f535"; } + +.fa-l::before { + content: "\4c"; } + +.fa-land-mine-on::before { + content: "\e51b"; } + +.fa-landmark::before { + content: "\f66f"; } + +.fa-landmark-dome::before { + content: "\f752"; } + +.fa-landmark-alt::before { + content: "\f752"; } + +.fa-landmark-flag::before { + content: "\e51c"; } + +.fa-language::before { + content: "\f1ab"; } + +.fa-laptop::before { + content: "\f109"; } + +.fa-laptop-code::before { + content: "\f5fc"; } + +.fa-laptop-file::before { + content: "\e51d"; } + +.fa-laptop-medical::before { + content: "\f812"; } + +.fa-lari-sign::before { + content: "\e1c8"; } + +.fa-layer-group::before { + content: "\f5fd"; } + +.fa-leaf::before { + content: "\f06c"; } + +.fa-left-long::before { + content: "\f30a"; } + +.fa-long-arrow-alt-left::before { + content: "\f30a"; } + +.fa-left-right::before { + content: "\f337"; } + +.fa-arrows-alt-h::before { + content: "\f337"; } + +.fa-lemon::before { + content: "\f094"; } + +.fa-less-than::before { + content: "\3c"; } + +.fa-less-than-equal::before { + content: "\f537"; } + +.fa-life-ring::before { + content: "\f1cd"; } + +.fa-lightbulb::before { + content: "\f0eb"; } + +.fa-lines-leaning::before { + content: "\e51e"; } + +.fa-link::before { + content: "\f0c1"; } + +.fa-chain::before { + content: "\f0c1"; } + +.fa-link-slash::before { + content: "\f127"; } + +.fa-chain-broken::before { + content: "\f127"; } + +.fa-chain-slash::before { + content: "\f127"; } + +.fa-unlink::before { + content: "\f127"; } + +.fa-lira-sign::before { + content: "\f195"; } + +.fa-list::before { + content: "\f03a"; } + +.fa-list-squares::before { + content: "\f03a"; } + +.fa-list-check::before { + content: "\f0ae"; } + +.fa-tasks::before { + content: "\f0ae"; } + +.fa-list-ol::before { + content: "\f0cb"; } + +.fa-list-1-2::before { + content: "\f0cb"; } + +.fa-list-numeric::before { + content: "\f0cb"; } + +.fa-list-ul::before { + content: "\f0ca"; } + +.fa-list-dots::before { + content: "\f0ca"; } + +.fa-litecoin-sign::before { + content: "\e1d3"; } + +.fa-location-arrow::before { + content: "\f124"; } + +.fa-location-crosshairs::before { + content: "\f601"; } + +.fa-location::before { + content: "\f601"; } + +.fa-location-dot::before { + content: "\f3c5"; } + +.fa-map-marker-alt::before { + content: "\f3c5"; } + +.fa-location-pin::before { + content: "\f041"; } + +.fa-map-marker::before { + content: "\f041"; } + +.fa-location-pin-lock::before { + content: "\e51f"; } + +.fa-lock::before { + content: "\f023"; } + +.fa-lock-open::before { + content: "\f3c1"; } + +.fa-locust::before { + content: "\e520"; } + +.fa-lungs::before { + content: "\f604"; } + +.fa-lungs-virus::before { + content: "\e067"; } + +.fa-m::before { + content: "\4d"; } + +.fa-magnet::before { + content: "\f076"; } + +.fa-magnifying-glass::before { + content: "\f002"; } + +.fa-search::before { + content: "\f002"; } + +.fa-magnifying-glass-arrow-right::before { + content: "\e521"; } + +.fa-magnifying-glass-chart::before { + content: "\e522"; } + +.fa-magnifying-glass-dollar::before { + content: "\f688"; } + +.fa-search-dollar::before { + content: "\f688"; } + +.fa-magnifying-glass-location::before { + content: "\f689"; } + +.fa-search-location::before { + content: "\f689"; } + +.fa-magnifying-glass-minus::before { + content: "\f010"; } + +.fa-search-minus::before { + content: "\f010"; } + +.fa-magnifying-glass-plus::before { + content: "\f00e"; } + +.fa-search-plus::before { + content: "\f00e"; } + +.fa-manat-sign::before { + content: "\e1d5"; } + +.fa-map::before { + content: "\f279"; } + +.fa-map-location::before { + content: "\f59f"; } + +.fa-map-marked::before { + content: "\f59f"; } + +.fa-map-location-dot::before { + content: "\f5a0"; } + +.fa-map-marked-alt::before { + content: "\f5a0"; } + +.fa-map-pin::before { + content: "\f276"; } + +.fa-marker::before { + content: "\f5a1"; } + +.fa-mars::before { + content: "\f222"; } + +.fa-mars-and-venus::before { + content: "\f224"; } + +.fa-mars-and-venus-burst::before { + content: "\e523"; } + +.fa-mars-double::before { + content: "\f227"; } + +.fa-mars-stroke::before { + content: "\f229"; } + +.fa-mars-stroke-right::before { + content: "\f22b"; } + +.fa-mars-stroke-h::before { + content: "\f22b"; } + +.fa-mars-stroke-up::before { + content: "\f22a"; } + +.fa-mars-stroke-v::before { + content: "\f22a"; } + +.fa-martini-glass::before { + content: "\f57b"; } + +.fa-glass-martini-alt::before { + content: "\f57b"; } + +.fa-martini-glass-citrus::before { + content: "\f561"; } + +.fa-cocktail::before { + content: "\f561"; } + +.fa-martini-glass-empty::before { + content: "\f000"; } + +.fa-glass-martini::before { + content: "\f000"; } + +.fa-mask::before { + content: "\f6fa"; } + +.fa-mask-face::before { + content: "\e1d7"; } + +.fa-mask-ventilator::before { + content: "\e524"; } + +.fa-masks-theater::before { + content: "\f630"; } + +.fa-theater-masks::before { + content: "\f630"; } + +.fa-mattress-pillow::before { + content: "\e525"; } + +.fa-maximize::before { + content: "\f31e"; } + +.fa-expand-arrows-alt::before { + content: "\f31e"; } + +.fa-medal::before { + content: "\f5a2"; } + +.fa-memory::before { + content: "\f538"; } + +.fa-menorah::before { + content: "\f676"; } + +.fa-mercury::before { + content: "\f223"; } + +.fa-message::before { + content: "\f27a"; } + +.fa-comment-alt::before { + content: "\f27a"; } + +.fa-meteor::before { + content: "\f753"; } + +.fa-microchip::before { + content: "\f2db"; } + +.fa-microphone::before { + content: "\f130"; } + +.fa-microphone-lines::before { + content: "\f3c9"; } + +.fa-microphone-alt::before { + content: "\f3c9"; } + +.fa-microphone-lines-slash::before { + content: "\f539"; } + +.fa-microphone-alt-slash::before { + content: "\f539"; } + +.fa-microphone-slash::before { + content: "\f131"; } + +.fa-microscope::before { + content: "\f610"; } + +.fa-mill-sign::before { + content: "\e1ed"; } + +.fa-minimize::before { + content: "\f78c"; } + +.fa-compress-arrows-alt::before { + content: "\f78c"; } + +.fa-minus::before { + content: "\f068"; } + +.fa-subtract::before { + content: "\f068"; } + +.fa-mitten::before { + content: "\f7b5"; } + +.fa-mobile::before { + content: "\f3ce"; } + +.fa-mobile-android::before { + content: "\f3ce"; } + +.fa-mobile-phone::before { + content: "\f3ce"; } + +.fa-mobile-button::before { + content: "\f10b"; } + +.fa-mobile-retro::before { + content: "\e527"; } + +.fa-mobile-screen::before { + content: "\f3cf"; } + +.fa-mobile-android-alt::before { + content: "\f3cf"; } + +.fa-mobile-screen-button::before { + content: "\f3cd"; } + +.fa-mobile-alt::before { + content: "\f3cd"; } + +.fa-money-bill::before { + content: "\f0d6"; } + +.fa-money-bill-1::before { + content: "\f3d1"; } + +.fa-money-bill-alt::before { + content: "\f3d1"; } + +.fa-money-bill-1-wave::before { + content: "\f53b"; } + +.fa-money-bill-wave-alt::before { + content: "\f53b"; } + +.fa-money-bill-transfer::before { + content: "\e528"; } + +.fa-money-bill-trend-up::before { + content: "\e529"; } + +.fa-money-bill-wave::before { + content: "\f53a"; } + +.fa-money-bill-wheat::before { + content: "\e52a"; } + +.fa-money-bills::before { + content: "\e1f3"; } + +.fa-money-check::before { + content: "\f53c"; } + +.fa-money-check-dollar::before { + content: "\f53d"; } + +.fa-money-check-alt::before { + content: "\f53d"; } + +.fa-monument::before { + content: "\f5a6"; } + +.fa-moon::before { + content: "\f186"; } + +.fa-mortar-pestle::before { + content: "\f5a7"; } + +.fa-mosque::before { + content: "\f678"; } + +.fa-mosquito::before { + content: "\e52b"; } + +.fa-mosquito-net::before { + content: "\e52c"; } + +.fa-motorcycle::before { + content: "\f21c"; } + +.fa-mound::before { + content: "\e52d"; } + +.fa-mountain::before { + content: "\f6fc"; } + +.fa-mountain-city::before { + content: "\e52e"; } + +.fa-mountain-sun::before { + content: "\e52f"; } + +.fa-mug-hot::before { + content: "\f7b6"; } + +.fa-mug-saucer::before { + content: "\f0f4"; } + +.fa-coffee::before { + content: "\f0f4"; } + +.fa-music::before { + content: "\f001"; } + +.fa-n::before { + content: "\4e"; } + +.fa-naira-sign::before { + content: "\e1f6"; } + +.fa-network-wired::before { + content: "\f6ff"; } + +.fa-neuter::before { + content: "\f22c"; } + +.fa-newspaper::before { + content: "\f1ea"; } + +.fa-not-equal::before { + content: "\f53e"; } + +.fa-note-sticky::before { + content: "\f249"; } + +.fa-sticky-note::before { + content: "\f249"; } + +.fa-notes-medical::before { + content: "\f481"; } + +.fa-o::before { + content: "\4f"; } + +.fa-object-group::before { + content: "\f247"; } + +.fa-object-ungroup::before { + content: "\f248"; } + +.fa-oil-can::before { + content: "\f613"; } + +.fa-oil-well::before { + content: "\e532"; } + +.fa-om::before { + content: "\f679"; } + +.fa-otter::before { + content: "\f700"; } + +.fa-outdent::before { + content: "\f03b"; } + +.fa-dedent::before { + content: "\f03b"; } + +.fa-p::before { + content: "\50"; } + +.fa-pager::before { + content: "\f815"; } + +.fa-paint-roller::before { + content: "\f5aa"; } + +.fa-paintbrush::before { + content: "\f1fc"; } + +.fa-paint-brush::before { + content: "\f1fc"; } + +.fa-palette::before { + content: "\f53f"; } + +.fa-pallet::before { + content: "\f482"; } + +.fa-panorama::before { + content: "\e209"; } + +.fa-paper-plane::before { + content: "\f1d8"; } + +.fa-paperclip::before { + content: "\f0c6"; } + +.fa-parachute-box::before { + content: "\f4cd"; } + +.fa-paragraph::before { + content: "\f1dd"; } + +.fa-passport::before { + content: "\f5ab"; } + +.fa-paste::before { + content: "\f0ea"; } + +.fa-file-clipboard::before { + content: "\f0ea"; } + +.fa-pause::before { + content: "\f04c"; } + +.fa-paw::before { + content: "\f1b0"; } + +.fa-peace::before { + content: "\f67c"; } + +.fa-pen::before { + content: "\f304"; } + +.fa-pen-clip::before { + content: "\f305"; } + +.fa-pen-alt::before { + content: "\f305"; } + +.fa-pen-fancy::before { + content: "\f5ac"; } + +.fa-pen-nib::before { + content: "\f5ad"; } + +.fa-pen-ruler::before { + content: "\f5ae"; } + +.fa-pencil-ruler::before { + content: "\f5ae"; } + +.fa-pen-to-square::before { + content: "\f044"; } + +.fa-edit::before { + content: "\f044"; } + +.fa-pencil::before { + content: "\f303"; } + +.fa-pencil-alt::before { + content: "\f303"; } + +.fa-people-arrows-left-right::before { + content: "\e068"; } + +.fa-people-arrows::before { + content: "\e068"; } + +.fa-people-carry-box::before { + content: "\f4ce"; } + +.fa-people-carry::before { + content: "\f4ce"; } + +.fa-people-group::before { + content: "\e533"; } + +.fa-people-line::before { + content: "\e534"; } + +.fa-people-pulling::before { + content: "\e535"; } + +.fa-people-robbery::before { + content: "\e536"; } + +.fa-people-roof::before { + content: "\e537"; } + +.fa-pepper-hot::before { + content: "\f816"; } + +.fa-percent::before { + content: "\25"; } + +.fa-percentage::before { + content: "\25"; } + +.fa-person::before { + content: "\f183"; } + +.fa-male::before { + content: "\f183"; } + +.fa-person-arrow-down-to-line::before { + content: "\e538"; } + +.fa-person-arrow-up-from-line::before { + content: "\e539"; } + +.fa-person-biking::before { + content: "\f84a"; } + +.fa-biking::before { + content: "\f84a"; } + +.fa-person-booth::before { + content: "\f756"; } + +.fa-person-breastfeeding::before { + content: "\e53a"; } + +.fa-person-burst::before { + content: "\e53b"; } + +.fa-person-cane::before { + content: "\e53c"; } + +.fa-person-chalkboard::before { + content: "\e53d"; } + +.fa-person-circle-check::before { + content: "\e53e"; } + +.fa-person-circle-exclamation::before { + content: "\e53f"; } + +.fa-person-circle-minus::before { + content: "\e540"; } + +.fa-person-circle-plus::before { + content: "\e541"; } + +.fa-person-circle-question::before { + content: "\e542"; } + +.fa-person-circle-xmark::before { + content: "\e543"; } + +.fa-person-digging::before { + content: "\f85e"; } + +.fa-digging::before { + content: "\f85e"; } + +.fa-person-dots-from-line::before { + content: "\f470"; } + +.fa-diagnoses::before { + content: "\f470"; } + +.fa-person-dress::before { + content: "\f182"; } + +.fa-female::before { + content: "\f182"; } + +.fa-person-dress-burst::before { + content: "\e544"; } + +.fa-person-drowning::before { + content: "\e545"; } + +.fa-person-falling::before { + content: "\e546"; } + +.fa-person-falling-burst::before { + content: "\e547"; } + +.fa-person-half-dress::before { + content: "\e548"; } + +.fa-person-harassing::before { + content: "\e549"; } + +.fa-person-hiking::before { + content: "\f6ec"; } + +.fa-hiking::before { + content: "\f6ec"; } + +.fa-person-military-pointing::before { + content: "\e54a"; } + +.fa-person-military-rifle::before { + content: "\e54b"; } + +.fa-person-military-to-person::before { + content: "\e54c"; } + +.fa-person-praying::before { + content: "\f683"; } + +.fa-pray::before { + content: "\f683"; } + +.fa-person-pregnant::before { + content: "\e31e"; } + +.fa-person-rays::before { + content: "\e54d"; } + +.fa-person-rifle::before { + content: "\e54e"; } + +.fa-person-running::before { + content: "\f70c"; } + +.fa-running::before { + content: "\f70c"; } + +.fa-person-shelter::before { + content: "\e54f"; } + +.fa-person-skating::before { + content: "\f7c5"; } + +.fa-skating::before { + content: "\f7c5"; } + +.fa-person-skiing::before { + content: "\f7c9"; } + +.fa-skiing::before { + content: "\f7c9"; } + +.fa-person-skiing-nordic::before { + content: "\f7ca"; } + +.fa-skiing-nordic::before { + content: "\f7ca"; } + +.fa-person-snowboarding::before { + content: "\f7ce"; } + +.fa-snowboarding::before { + content: "\f7ce"; } + +.fa-person-swimming::before { + content: "\f5c4"; } + +.fa-swimmer::before { + content: "\f5c4"; } + +.fa-person-through-window::before { + content: "\e433"; } + +.fa-person-walking::before { + content: "\f554"; } + +.fa-walking::before { + content: "\f554"; } + +.fa-person-walking-arrow-loop-left::before { + content: "\e551"; } + +.fa-person-walking-arrow-right::before { + content: "\e552"; } + +.fa-person-walking-dashed-line-arrow-right::before { + content: "\e553"; } + +.fa-person-walking-luggage::before { + content: "\e554"; } + +.fa-person-walking-with-cane::before { + content: "\f29d"; } + +.fa-blind::before { + content: "\f29d"; } + +.fa-peseta-sign::before { + content: "\e221"; } + +.fa-peso-sign::before { + content: "\e222"; } + +.fa-phone::before { + content: "\f095"; } + +.fa-phone-flip::before { + content: "\f879"; } + +.fa-phone-alt::before { + content: "\f879"; } + +.fa-phone-slash::before { + content: "\f3dd"; } + +.fa-phone-volume::before { + content: "\f2a0"; } + +.fa-volume-control-phone::before { + content: "\f2a0"; } + +.fa-photo-film::before { + content: "\f87c"; } + +.fa-photo-video::before { + content: "\f87c"; } + +.fa-piggy-bank::before { + content: "\f4d3"; } + +.fa-pills::before { + content: "\f484"; } + +.fa-pizza-slice::before { + content: "\f818"; } + +.fa-place-of-worship::before { + content: "\f67f"; } + +.fa-plane::before { + content: "\f072"; } + +.fa-plane-arrival::before { + content: "\f5af"; } + +.fa-plane-circle-check::before { + content: "\e555"; } + +.fa-plane-circle-exclamation::before { + content: "\e556"; } + +.fa-plane-circle-xmark::before { + content: "\e557"; } + +.fa-plane-departure::before { + content: "\f5b0"; } + +.fa-plane-lock::before { + content: "\e558"; } + +.fa-plane-slash::before { + content: "\e069"; } + +.fa-plane-up::before { + content: "\e22d"; } + +.fa-plant-wilt::before { + content: "\e43b"; } + +.fa-plate-wheat::before { + content: "\e55a"; } + +.fa-play::before { + content: "\f04b"; } + +.fa-plug::before { + content: "\f1e6"; } + +.fa-plug-circle-bolt::before { + content: "\e55b"; } + +.fa-plug-circle-check::before { + content: "\e55c"; } + +.fa-plug-circle-exclamation::before { + content: "\e55d"; } + +.fa-plug-circle-minus::before { + content: "\e55e"; } + +.fa-plug-circle-plus::before { + content: "\e55f"; } + +.fa-plug-circle-xmark::before { + content: "\e560"; } + +.fa-plus::before { + content: "\2b"; } + +.fa-add::before { + content: "\2b"; } + +.fa-plus-minus::before { + content: "\e43c"; } + +.fa-podcast::before { + content: "\f2ce"; } + +.fa-poo::before { + content: "\f2fe"; } + +.fa-poo-storm::before { + content: "\f75a"; } + +.fa-poo-bolt::before { + content: "\f75a"; } + +.fa-poop::before { + content: "\f619"; } + +.fa-power-off::before { + content: "\f011"; } + +.fa-prescription::before { + content: "\f5b1"; } + +.fa-prescription-bottle::before { + content: "\f485"; } + +.fa-prescription-bottle-medical::before { + content: "\f486"; } + +.fa-prescription-bottle-alt::before { + content: "\f486"; } + +.fa-print::before { + content: "\f02f"; } + +.fa-pump-medical::before { + content: "\e06a"; } + +.fa-pump-soap::before { + content: "\e06b"; } + +.fa-puzzle-piece::before { + content: "\f12e"; } + +.fa-q::before { + content: "\51"; } + +.fa-qrcode::before { + content: "\f029"; } + +.fa-question::before { + content: "\3f"; } + +.fa-quote-left::before { + content: "\f10d"; } + +.fa-quote-left-alt::before { + content: "\f10d"; } + +.fa-quote-right::before { + content: "\f10e"; } + +.fa-quote-right-alt::before { + content: "\f10e"; } + +.fa-r::before { + content: "\52"; } + +.fa-radiation::before { + content: "\f7b9"; } + +.fa-radio::before { + content: "\f8d7"; } + +.fa-rainbow::before { + content: "\f75b"; } + +.fa-ranking-star::before { + content: "\e561"; } + +.fa-receipt::before { + content: "\f543"; } + +.fa-record-vinyl::before { + content: "\f8d9"; } + +.fa-rectangle-ad::before { + content: "\f641"; } + +.fa-ad::before { + content: "\f641"; } + +.fa-rectangle-list::before { + content: "\f022"; } + +.fa-list-alt::before { + content: "\f022"; } + +.fa-rectangle-xmark::before { + content: "\f410"; } + +.fa-rectangle-times::before { + content: "\f410"; } + +.fa-times-rectangle::before { + content: "\f410"; } + +.fa-window-close::before { + content: "\f410"; } + +.fa-recycle::before { + content: "\f1b8"; } + +.fa-registered::before { + content: "\f25d"; } + +.fa-repeat::before { + content: "\f363"; } + +.fa-reply::before { + content: "\f3e5"; } + +.fa-mail-reply::before { + content: "\f3e5"; } + +.fa-reply-all::before { + content: "\f122"; } + +.fa-mail-reply-all::before { + content: "\f122"; } + +.fa-republican::before { + content: "\f75e"; } + +.fa-restroom::before { + content: "\f7bd"; } + +.fa-retweet::before { + content: "\f079"; } + +.fa-ribbon::before { + content: "\f4d6"; } + +.fa-right-from-bracket::before { + content: "\f2f5"; } + +.fa-sign-out-alt::before { + content: "\f2f5"; } + +.fa-right-left::before { + content: "\f362"; } + +.fa-exchange-alt::before { + content: "\f362"; } + +.fa-right-long::before { + content: "\f30b"; } + +.fa-long-arrow-alt-right::before { + content: "\f30b"; } + +.fa-right-to-bracket::before { + content: "\f2f6"; } + +.fa-sign-in-alt::before { + content: "\f2f6"; } + +.fa-ring::before { + content: "\f70b"; } + +.fa-road::before { + content: "\f018"; } + +.fa-road-barrier::before { + content: "\e562"; } + +.fa-road-bridge::before { + content: "\e563"; } + +.fa-road-circle-check::before { + content: "\e564"; } + +.fa-road-circle-exclamation::before { + content: "\e565"; } + +.fa-road-circle-xmark::before { + content: "\e566"; } + +.fa-road-lock::before { + content: "\e567"; } + +.fa-road-spikes::before { + content: "\e568"; } + +.fa-robot::before { + content: "\f544"; } + +.fa-rocket::before { + content: "\f135"; } + +.fa-rotate::before { + content: "\f2f1"; } + +.fa-sync-alt::before { + content: "\f2f1"; } + +.fa-rotate-left::before { + content: "\f2ea"; } + +.fa-rotate-back::before { + content: "\f2ea"; } + +.fa-rotate-backward::before { + content: "\f2ea"; } + +.fa-undo-alt::before { + content: "\f2ea"; } + +.fa-rotate-right::before { + content: "\f2f9"; } + +.fa-redo-alt::before { + content: "\f2f9"; } + +.fa-rotate-forward::before { + content: "\f2f9"; } + +.fa-route::before { + content: "\f4d7"; } + +.fa-rss::before { + content: "\f09e"; } + +.fa-feed::before { + content: "\f09e"; } + +.fa-ruble-sign::before { + content: "\f158"; } + +.fa-rouble::before { + content: "\f158"; } + +.fa-rub::before { + content: "\f158"; } + +.fa-ruble::before { + content: "\f158"; } + +.fa-rug::before { + content: "\e569"; } + +.fa-ruler::before { + content: "\f545"; } + +.fa-ruler-combined::before { + content: "\f546"; } + +.fa-ruler-horizontal::before { + content: "\f547"; } + +.fa-ruler-vertical::before { + content: "\f548"; } + +.fa-rupee-sign::before { + content: "\f156"; } + +.fa-rupee::before { + content: "\f156"; } + +.fa-rupiah-sign::before { + content: "\e23d"; } + +.fa-s::before { + content: "\53"; } + +.fa-sack-dollar::before { + content: "\f81d"; } + +.fa-sack-xmark::before { + content: "\e56a"; } + +.fa-sailboat::before { + content: "\e445"; } + +.fa-satellite::before { + content: "\f7bf"; } + +.fa-satellite-dish::before { + content: "\f7c0"; } + +.fa-scale-balanced::before { + content: "\f24e"; } + +.fa-balance-scale::before { + content: "\f24e"; } + +.fa-scale-unbalanced::before { + content: "\f515"; } + +.fa-balance-scale-left::before { + content: "\f515"; } + +.fa-scale-unbalanced-flip::before { + content: "\f516"; } + +.fa-balance-scale-right::before { + content: "\f516"; } + +.fa-school::before { + content: "\f549"; } + +.fa-school-circle-check::before { + content: "\e56b"; } + +.fa-school-circle-exclamation::before { + content: "\e56c"; } + +.fa-school-circle-xmark::before { + content: "\e56d"; } + +.fa-school-flag::before { + content: "\e56e"; } + +.fa-school-lock::before { + content: "\e56f"; } + +.fa-scissors::before { + content: "\f0c4"; } + +.fa-cut::before { + content: "\f0c4"; } + +.fa-screwdriver::before { + content: "\f54a"; } + +.fa-screwdriver-wrench::before { + content: "\f7d9"; } + +.fa-tools::before { + content: "\f7d9"; } + +.fa-scroll::before { + content: "\f70e"; } + +.fa-scroll-torah::before { + content: "\f6a0"; } + +.fa-torah::before { + content: "\f6a0"; } + +.fa-sd-card::before { + content: "\f7c2"; } + +.fa-section::before { + content: "\e447"; } + +.fa-seedling::before { + content: "\f4d8"; } + +.fa-sprout::before { + content: "\f4d8"; } + +.fa-server::before { + content: "\f233"; } + +.fa-shapes::before { + content: "\f61f"; } + +.fa-triangle-circle-square::before { + content: "\f61f"; } + +.fa-share::before { + content: "\f064"; } + +.fa-arrow-turn-right::before { + content: "\f064"; } + +.fa-mail-forward::before { + content: "\f064"; } + +.fa-share-from-square::before { + content: "\f14d"; } + +.fa-share-square::before { + content: "\f14d"; } + +.fa-share-nodes::before { + content: "\f1e0"; } + +.fa-share-alt::before { + content: "\f1e0"; } + +.fa-sheet-plastic::before { + content: "\e571"; } + +.fa-shekel-sign::before { + content: "\f20b"; } + +.fa-ils::before { + content: "\f20b"; } + +.fa-shekel::before { + content: "\f20b"; } + +.fa-sheqel::before { + content: "\f20b"; } + +.fa-sheqel-sign::before { + content: "\f20b"; } + +.fa-shield::before { + content: "\f132"; } + +.fa-shield-blank::before { + content: "\f132"; } + +.fa-shield-cat::before { + content: "\e572"; } + +.fa-shield-dog::before { + content: "\e573"; } + +.fa-shield-halved::before { + content: "\f3ed"; } + +.fa-shield-alt::before { + content: "\f3ed"; } + +.fa-shield-heart::before { + content: "\e574"; } + +.fa-shield-virus::before { + content: "\e06c"; } + +.fa-ship::before { + content: "\f21a"; } + +.fa-shirt::before { + content: "\f553"; } + +.fa-t-shirt::before { + content: "\f553"; } + +.fa-tshirt::before { + content: "\f553"; } + +.fa-shoe-prints::before { + content: "\f54b"; } + +.fa-shop::before { + content: "\f54f"; } + +.fa-store-alt::before { + content: "\f54f"; } + +.fa-shop-lock::before { + content: "\e4a5"; } + +.fa-shop-slash::before { + content: "\e070"; } + +.fa-store-alt-slash::before { + content: "\e070"; } + +.fa-shower::before { + content: "\f2cc"; } + +.fa-shrimp::before { + content: "\e448"; } + +.fa-shuffle::before { + content: "\f074"; } + +.fa-random::before { + content: "\f074"; } + +.fa-shuttle-space::before { + content: "\f197"; } + +.fa-space-shuttle::before { + content: "\f197"; } + +.fa-sign-hanging::before { + content: "\f4d9"; } + +.fa-sign::before { + content: "\f4d9"; } + +.fa-signal::before { + content: "\f012"; } + +.fa-signal-5::before { + content: "\f012"; } + +.fa-signal-perfect::before { + content: "\f012"; } + +.fa-signature::before { + content: "\f5b7"; } + +.fa-signs-post::before { + content: "\f277"; } + +.fa-map-signs::before { + content: "\f277"; } + +.fa-sim-card::before { + content: "\f7c4"; } + +.fa-sink::before { + content: "\e06d"; } + +.fa-sitemap::before { + content: "\f0e8"; } + +.fa-skull::before { + content: "\f54c"; } + +.fa-skull-crossbones::before { + content: "\f714"; } + +.fa-slash::before { + content: "\f715"; } + +.fa-sleigh::before { + content: "\f7cc"; } + +.fa-sliders::before { + content: "\f1de"; } + +.fa-sliders-h::before { + content: "\f1de"; } + +.fa-smog::before { + content: "\f75f"; } + +.fa-smoking::before { + content: "\f48d"; } + +.fa-snowflake::before { + content: "\f2dc"; } + +.fa-snowman::before { + content: "\f7d0"; } + +.fa-snowplow::before { + content: "\f7d2"; } + +.fa-soap::before { + content: "\e06e"; } + +.fa-socks::before { + content: "\f696"; } + +.fa-solar-panel::before { + content: "\f5ba"; } + +.fa-sort::before { + content: "\f0dc"; } + +.fa-unsorted::before { + content: "\f0dc"; } + +.fa-sort-down::before { + content: "\f0dd"; } + +.fa-sort-desc::before { + content: "\f0dd"; } + +.fa-sort-up::before { + content: "\f0de"; } + +.fa-sort-asc::before { + content: "\f0de"; } + +.fa-spa::before { + content: "\f5bb"; } + +.fa-spaghetti-monster-flying::before { + content: "\f67b"; } + +.fa-pastafarianism::before { + content: "\f67b"; } + +.fa-spell-check::before { + content: "\f891"; } + +.fa-spider::before { + content: "\f717"; } + +.fa-spinner::before { + content: "\f110"; } + +.fa-splotch::before { + content: "\f5bc"; } + +.fa-spoon::before { + content: "\f2e5"; } + +.fa-utensil-spoon::before { + content: "\f2e5"; } + +.fa-spray-can::before { + content: "\f5bd"; } + +.fa-spray-can-sparkles::before { + content: "\f5d0"; } + +.fa-air-freshener::before { + content: "\f5d0"; } + +.fa-square::before { + content: "\f0c8"; } + +.fa-square-arrow-up-right::before { + content: "\f14c"; } + +.fa-external-link-square::before { + content: "\f14c"; } + +.fa-square-caret-down::before { + content: "\f150"; } + +.fa-caret-square-down::before { + content: "\f150"; } + +.fa-square-caret-left::before { + content: "\f191"; } + +.fa-caret-square-left::before { + content: "\f191"; } + +.fa-square-caret-right::before { + content: "\f152"; } + +.fa-caret-square-right::before { + content: "\f152"; } + +.fa-square-caret-up::before { + content: "\f151"; } + +.fa-caret-square-up::before { + content: "\f151"; } + +.fa-square-check::before { + content: "\f14a"; } + +.fa-check-square::before { + content: "\f14a"; } + +.fa-square-envelope::before { + content: "\f199"; } + +.fa-envelope-square::before { + content: "\f199"; } + +.fa-square-full::before { + content: "\f45c"; } + +.fa-square-h::before { + content: "\f0fd"; } + +.fa-h-square::before { + content: "\f0fd"; } + +.fa-square-minus::before { + content: "\f146"; } + +.fa-minus-square::before { + content: "\f146"; } + +.fa-square-nfi::before { + content: "\e576"; } + +.fa-square-parking::before { + content: "\f540"; } + +.fa-parking::before { + content: "\f540"; } + +.fa-square-pen::before { + content: "\f14b"; } + +.fa-pen-square::before { + content: "\f14b"; } + +.fa-pencil-square::before { + content: "\f14b"; } + +.fa-square-person-confined::before { + content: "\e577"; } + +.fa-square-phone::before { + content: "\f098"; } + +.fa-phone-square::before { + content: "\f098"; } + +.fa-square-phone-flip::before { + content: "\f87b"; } + +.fa-phone-square-alt::before { + content: "\f87b"; } + +.fa-square-plus::before { + content: "\f0fe"; } + +.fa-plus-square::before { + content: "\f0fe"; } + +.fa-square-poll-horizontal::before { + content: "\f682"; } + +.fa-poll-h::before { + content: "\f682"; } + +.fa-square-poll-vertical::before { + content: "\f681"; } + +.fa-poll::before { + content: "\f681"; } + +.fa-square-root-variable::before { + content: "\f698"; } + +.fa-square-root-alt::before { + content: "\f698"; } + +.fa-square-rss::before { + content: "\f143"; } + +.fa-rss-square::before { + content: "\f143"; } + +.fa-square-share-nodes::before { + content: "\f1e1"; } + +.fa-share-alt-square::before { + content: "\f1e1"; } + +.fa-square-up-right::before { + content: "\f360"; } + +.fa-external-link-square-alt::before { + content: "\f360"; } + +.fa-square-virus::before { + content: "\e578"; } + +.fa-square-xmark::before { + content: "\f2d3"; } + +.fa-times-square::before { + content: "\f2d3"; } + +.fa-xmark-square::before { + content: "\f2d3"; } + +.fa-staff-aesculapius::before { + content: "\e579"; } + +.fa-rod-asclepius::before { + content: "\e579"; } + +.fa-rod-snake::before { + content: "\e579"; } + +.fa-staff-snake::before { + content: "\e579"; } + +.fa-stairs::before { + content: "\e289"; } + +.fa-stamp::before { + content: "\f5bf"; } + +.fa-star::before { + content: "\f005"; } + +.fa-star-and-crescent::before { + content: "\f699"; } + +.fa-star-half::before { + content: "\f089"; } + +.fa-star-half-stroke::before { + content: "\f5c0"; } + +.fa-star-half-alt::before { + content: "\f5c0"; } + +.fa-star-of-david::before { + content: "\f69a"; } + +.fa-star-of-life::before { + content: "\f621"; } + +.fa-sterling-sign::before { + content: "\f154"; } + +.fa-gbp::before { + content: "\f154"; } + +.fa-pound-sign::before { + content: "\f154"; } + +.fa-stethoscope::before { + content: "\f0f1"; } + +.fa-stop::before { + content: "\f04d"; } + +.fa-stopwatch::before { + content: "\f2f2"; } + +.fa-stopwatch-20::before { + content: "\e06f"; } + +.fa-store::before { + content: "\f54e"; } + +.fa-store-slash::before { + content: "\e071"; } + +.fa-street-view::before { + content: "\f21d"; } + +.fa-strikethrough::before { + content: "\f0cc"; } + +.fa-stroopwafel::before { + content: "\f551"; } + +.fa-subscript::before { + content: "\f12c"; } + +.fa-suitcase::before { + content: "\f0f2"; } + +.fa-suitcase-medical::before { + content: "\f0fa"; } + +.fa-medkit::before { + content: "\f0fa"; } + +.fa-suitcase-rolling::before { + content: "\f5c1"; } + +.fa-sun::before { + content: "\f185"; } + +.fa-sun-plant-wilt::before { + content: "\e57a"; } + +.fa-superscript::before { + content: "\f12b"; } + +.fa-swatchbook::before { + content: "\f5c3"; } + +.fa-synagogue::before { + content: "\f69b"; } + +.fa-syringe::before { + content: "\f48e"; } + +.fa-t::before { + content: "\54"; } + +.fa-table::before { + content: "\f0ce"; } + +.fa-table-cells::before { + content: "\f00a"; } + +.fa-th::before { + content: "\f00a"; } + +.fa-table-cells-large::before { + content: "\f009"; } + +.fa-th-large::before { + content: "\f009"; } + +.fa-table-columns::before { + content: "\f0db"; } + +.fa-columns::before { + content: "\f0db"; } + +.fa-table-list::before { + content: "\f00b"; } + +.fa-th-list::before { + content: "\f00b"; } + +.fa-table-tennis-paddle-ball::before { + content: "\f45d"; } + +.fa-ping-pong-paddle-ball::before { + content: "\f45d"; } + +.fa-table-tennis::before { + content: "\f45d"; } + +.fa-tablet::before { + content: "\f3fb"; } + +.fa-tablet-android::before { + content: "\f3fb"; } + +.fa-tablet-button::before { + content: "\f10a"; } + +.fa-tablet-screen-button::before { + content: "\f3fa"; } + +.fa-tablet-alt::before { + content: "\f3fa"; } + +.fa-tablets::before { + content: "\f490"; } + +.fa-tachograph-digital::before { + content: "\f566"; } + +.fa-digital-tachograph::before { + content: "\f566"; } + +.fa-tag::before { + content: "\f02b"; } + +.fa-tags::before { + content: "\f02c"; } + +.fa-tape::before { + content: "\f4db"; } + +.fa-tarp::before { + content: "\e57b"; } + +.fa-tarp-droplet::before { + content: "\e57c"; } + +.fa-taxi::before { + content: "\f1ba"; } + +.fa-cab::before { + content: "\f1ba"; } + +.fa-teeth::before { + content: "\f62e"; } + +.fa-teeth-open::before { + content: "\f62f"; } + +.fa-temperature-arrow-down::before { + content: "\e03f"; } + +.fa-temperature-down::before { + content: "\e03f"; } + +.fa-temperature-arrow-up::before { + content: "\e040"; } + +.fa-temperature-up::before { + content: "\e040"; } + +.fa-temperature-empty::before { + content: "\f2cb"; } + +.fa-temperature-0::before { + content: "\f2cb"; } + +.fa-thermometer-0::before { + content: "\f2cb"; } + +.fa-thermometer-empty::before { + content: "\f2cb"; } + +.fa-temperature-full::before { + content: "\f2c7"; } + +.fa-temperature-4::before { + content: "\f2c7"; } + +.fa-thermometer-4::before { + content: "\f2c7"; } + +.fa-thermometer-full::before { + content: "\f2c7"; } + +.fa-temperature-half::before { + content: "\f2c9"; } + +.fa-temperature-2::before { + content: "\f2c9"; } + +.fa-thermometer-2::before { + content: "\f2c9"; } + +.fa-thermometer-half::before { + content: "\f2c9"; } + +.fa-temperature-high::before { + content: "\f769"; } + +.fa-temperature-low::before { + content: "\f76b"; } + +.fa-temperature-quarter::before { + content: "\f2ca"; } + +.fa-temperature-1::before { + content: "\f2ca"; } + +.fa-thermometer-1::before { + content: "\f2ca"; } + +.fa-thermometer-quarter::before { + content: "\f2ca"; } + +.fa-temperature-three-quarters::before { + content: "\f2c8"; } + +.fa-temperature-3::before { + content: "\f2c8"; } + +.fa-thermometer-3::before { + content: "\f2c8"; } + +.fa-thermometer-three-quarters::before { + content: "\f2c8"; } + +.fa-tenge-sign::before { + content: "\f7d7"; } + +.fa-tenge::before { + content: "\f7d7"; } + +.fa-tent::before { + content: "\e57d"; } + +.fa-tent-arrow-down-to-line::before { + content: "\e57e"; } + +.fa-tent-arrow-left-right::before { + content: "\e57f"; } + +.fa-tent-arrow-turn-left::before { + content: "\e580"; } + +.fa-tent-arrows-down::before { + content: "\e581"; } + +.fa-tents::before { + content: "\e582"; } + +.fa-terminal::before { + content: "\f120"; } + +.fa-text-height::before { + content: "\f034"; } + +.fa-text-slash::before { + content: "\f87d"; } + +.fa-remove-format::before { + content: "\f87d"; } + +.fa-text-width::before { + content: "\f035"; } + +.fa-thermometer::before { + content: "\f491"; } + +.fa-thumbs-down::before { + content: "\f165"; } + +.fa-thumbs-up::before { + content: "\f164"; } + +.fa-thumbtack::before { + content: "\f08d"; } + +.fa-thumb-tack::before { + content: "\f08d"; } + +.fa-ticket::before { + content: "\f145"; } + +.fa-ticket-simple::before { + content: "\f3ff"; } + +.fa-ticket-alt::before { + content: "\f3ff"; } + +.fa-timeline::before { + content: "\e29c"; } + +.fa-toggle-off::before { + content: "\f204"; } + +.fa-toggle-on::before { + content: "\f205"; } + +.fa-toilet::before { + content: "\f7d8"; } + +.fa-toilet-paper::before { + content: "\f71e"; } + +.fa-toilet-paper-slash::before { + content: "\e072"; } + +.fa-toilet-portable::before { + content: "\e583"; } + +.fa-toilets-portable::before { + content: "\e584"; } + +.fa-toolbox::before { + content: "\f552"; } + +.fa-tooth::before { + content: "\f5c9"; } + +.fa-torii-gate::before { + content: "\f6a1"; } + +.fa-tornado::before { + content: "\f76f"; } + +.fa-tower-broadcast::before { + content: "\f519"; } + +.fa-broadcast-tower::before { + content: "\f519"; } + +.fa-tower-cell::before { + content: "\e585"; } + +.fa-tower-observation::before { + content: "\e586"; } + +.fa-tractor::before { + content: "\f722"; } + +.fa-trademark::before { + content: "\f25c"; } + +.fa-traffic-light::before { + content: "\f637"; } + +.fa-trailer::before { + content: "\e041"; } + +.fa-train::before { + content: "\f238"; } + +.fa-train-subway::before { + content: "\f239"; } + +.fa-subway::before { + content: "\f239"; } + +.fa-train-tram::before { + content: "\f7da"; } + +.fa-tram::before { + content: "\f7da"; } + +.fa-transgender::before { + content: "\f225"; } + +.fa-transgender-alt::before { + content: "\f225"; } + +.fa-trash::before { + content: "\f1f8"; } + +.fa-trash-arrow-up::before { + content: "\f829"; } + +.fa-trash-restore::before { + content: "\f829"; } + +.fa-trash-can::before { + content: "\f2ed"; } + +.fa-trash-alt::before { + content: "\f2ed"; } + +.fa-trash-can-arrow-up::before { + content: "\f82a"; } + +.fa-trash-restore-alt::before { + content: "\f82a"; } + +.fa-tree::before { + content: "\f1bb"; } + +.fa-tree-city::before { + content: "\e587"; } + +.fa-triangle-exclamation::before { + content: "\f071"; } + +.fa-exclamation-triangle::before { + content: "\f071"; } + +.fa-warning::before { + content: "\f071"; } + +.fa-trophy::before { + content: "\f091"; } + +.fa-trowel::before { + content: "\e589"; } + +.fa-trowel-bricks::before { + content: "\e58a"; } + +.fa-truck::before { + content: "\f0d1"; } + +.fa-truck-arrow-right::before { + content: "\e58b"; } + +.fa-truck-droplet::before { + content: "\e58c"; } + +.fa-truck-fast::before { + content: "\f48b"; } + +.fa-shipping-fast::before { + content: "\f48b"; } + +.fa-truck-field::before { + content: "\e58d"; } + +.fa-truck-field-un::before { + content: "\e58e"; } + +.fa-truck-front::before { + content: "\e2b7"; } + +.fa-truck-medical::before { + content: "\f0f9"; } + +.fa-ambulance::before { + content: "\f0f9"; } + +.fa-truck-monster::before { + content: "\f63b"; } + +.fa-truck-moving::before { + content: "\f4df"; } + +.fa-truck-pickup::before { + content: "\f63c"; } + +.fa-truck-plane::before { + content: "\e58f"; } + +.fa-truck-ramp-box::before { + content: "\f4de"; } + +.fa-truck-loading::before { + content: "\f4de"; } + +.fa-tty::before { + content: "\f1e4"; } + +.fa-teletype::before { + content: "\f1e4"; } + +.fa-turkish-lira-sign::before { + content: "\e2bb"; } + +.fa-try::before { + content: "\e2bb"; } + +.fa-turkish-lira::before { + content: "\e2bb"; } + +.fa-turn-down::before { + content: "\f3be"; } + +.fa-level-down-alt::before { + content: "\f3be"; } + +.fa-turn-up::before { + content: "\f3bf"; } + +.fa-level-up-alt::before { + content: "\f3bf"; } + +.fa-tv::before { + content: "\f26c"; } + +.fa-television::before { + content: "\f26c"; } + +.fa-tv-alt::before { + content: "\f26c"; } + +.fa-u::before { + content: "\55"; } + +.fa-umbrella::before { + content: "\f0e9"; } + +.fa-umbrella-beach::before { + content: "\f5ca"; } + +.fa-underline::before { + content: "\f0cd"; } + +.fa-universal-access::before { + content: "\f29a"; } + +.fa-unlock::before { + content: "\f09c"; } + +.fa-unlock-keyhole::before { + content: "\f13e"; } + +.fa-unlock-alt::before { + content: "\f13e"; } + +.fa-up-down::before { + content: "\f338"; } + +.fa-arrows-alt-v::before { + content: "\f338"; } + +.fa-up-down-left-right::before { + content: "\f0b2"; } + +.fa-arrows-alt::before { + content: "\f0b2"; } + +.fa-up-long::before { + content: "\f30c"; } + +.fa-long-arrow-alt-up::before { + content: "\f30c"; } + +.fa-up-right-and-down-left-from-center::before { + content: "\f424"; } + +.fa-expand-alt::before { + content: "\f424"; } + +.fa-up-right-from-square::before { + content: "\f35d"; } + +.fa-external-link-alt::before { + content: "\f35d"; } + +.fa-upload::before { + content: "\f093"; } + +.fa-user::before { + content: "\f007"; } + +.fa-user-astronaut::before { + content: "\f4fb"; } + +.fa-user-check::before { + content: "\f4fc"; } + +.fa-user-clock::before { + content: "\f4fd"; } + +.fa-user-doctor::before { + content: "\f0f0"; } + +.fa-user-md::before { + content: "\f0f0"; } + +.fa-user-gear::before { + content: "\f4fe"; } + +.fa-user-cog::before { + content: "\f4fe"; } + +.fa-user-graduate::before { + content: "\f501"; } + +.fa-user-group::before { + content: "\f500"; } + +.fa-user-friends::before { + content: "\f500"; } + +.fa-user-injured::before { + content: "\f728"; } + +.fa-user-large::before { + content: "\f406"; } + +.fa-user-alt::before { + content: "\f406"; } + +.fa-user-large-slash::before { + content: "\f4fa"; } + +.fa-user-alt-slash::before { + content: "\f4fa"; } + +.fa-user-lock::before { + content: "\f502"; } + +.fa-user-minus::before { + content: "\f503"; } + +.fa-user-ninja::before { + content: "\f504"; } + +.fa-user-nurse::before { + content: "\f82f"; } + +.fa-user-pen::before { + content: "\f4ff"; } + +.fa-user-edit::before { + content: "\f4ff"; } + +.fa-user-plus::before { + content: "\f234"; } + +.fa-user-secret::before { + content: "\f21b"; } + +.fa-user-shield::before { + content: "\f505"; } + +.fa-user-slash::before { + content: "\f506"; } + +.fa-user-tag::before { + content: "\f507"; } + +.fa-user-tie::before { + content: "\f508"; } + +.fa-user-xmark::before { + content: "\f235"; } + +.fa-user-times::before { + content: "\f235"; } + +.fa-users::before { + content: "\f0c0"; } + +.fa-users-between-lines::before { + content: "\e591"; } + +.fa-users-gear::before { + content: "\f509"; } + +.fa-users-cog::before { + content: "\f509"; } + +.fa-users-line::before { + content: "\e592"; } + +.fa-users-rays::before { + content: "\e593"; } + +.fa-users-rectangle::before { + content: "\e594"; } + +.fa-users-slash::before { + content: "\e073"; } + +.fa-users-viewfinder::before { + content: "\e595"; } + +.fa-utensils::before { + content: "\f2e7"; } + +.fa-cutlery::before { + content: "\f2e7"; } + +.fa-v::before { + content: "\56"; } + +.fa-van-shuttle::before { + content: "\f5b6"; } + +.fa-shuttle-van::before { + content: "\f5b6"; } + +.fa-vault::before { + content: "\e2c5"; } + +.fa-vector-square::before { + content: "\f5cb"; } + +.fa-venus::before { + content: "\f221"; } + +.fa-venus-double::before { + content: "\f226"; } + +.fa-venus-mars::before { + content: "\f228"; } + +.fa-vest::before { + content: "\e085"; } + +.fa-vest-patches::before { + content: "\e086"; } + +.fa-vial::before { + content: "\f492"; } + +.fa-vial-circle-check::before { + content: "\e596"; } + +.fa-vial-virus::before { + content: "\e597"; } + +.fa-vials::before { + content: "\f493"; } + +.fa-video::before { + content: "\f03d"; } + +.fa-video-camera::before { + content: "\f03d"; } + +.fa-video-slash::before { + content: "\f4e2"; } + +.fa-vihara::before { + content: "\f6a7"; } + +.fa-virus::before { + content: "\e074"; } + +.fa-virus-covid::before { + content: "\e4a8"; } + +.fa-virus-covid-slash::before { + content: "\e4a9"; } + +.fa-virus-slash::before { + content: "\e075"; } + +.fa-viruses::before { + content: "\e076"; } + +.fa-voicemail::before { + content: "\f897"; } + +.fa-volcano::before { + content: "\f770"; } + +.fa-volleyball::before { + content: "\f45f"; } + +.fa-volleyball-ball::before { + content: "\f45f"; } + +.fa-volume-high::before { + content: "\f028"; } + +.fa-volume-up::before { + content: "\f028"; } + +.fa-volume-low::before { + content: "\f027"; } + +.fa-volume-down::before { + content: "\f027"; } + +.fa-volume-off::before { + content: "\f026"; } + +.fa-volume-xmark::before { + content: "\f6a9"; } + +.fa-volume-mute::before { + content: "\f6a9"; } + +.fa-volume-times::before { + content: "\f6a9"; } + +.fa-vr-cardboard::before { + content: "\f729"; } + +.fa-w::before { + content: "\57"; } + +.fa-walkie-talkie::before { + content: "\f8ef"; } + +.fa-wallet::before { + content: "\f555"; } + +.fa-wand-magic::before { + content: "\f0d0"; } + +.fa-magic::before { + content: "\f0d0"; } + +.fa-wand-magic-sparkles::before { + content: "\e2ca"; } + +.fa-magic-wand-sparkles::before { + content: "\e2ca"; } + +.fa-wand-sparkles::before { + content: "\f72b"; } + +.fa-warehouse::before { + content: "\f494"; } + +.fa-water::before { + content: "\f773"; } + +.fa-water-ladder::before { + content: "\f5c5"; } + +.fa-ladder-water::before { + content: "\f5c5"; } + +.fa-swimming-pool::before { + content: "\f5c5"; } + +.fa-wave-square::before { + content: "\f83e"; } + +.fa-weight-hanging::before { + content: "\f5cd"; } + +.fa-weight-scale::before { + content: "\f496"; } + +.fa-weight::before { + content: "\f496"; } + +.fa-wheat-awn::before { + content: "\e2cd"; } + +.fa-wheat-alt::before { + content: "\e2cd"; } + +.fa-wheat-awn-circle-exclamation::before { + content: "\e598"; } + +.fa-wheelchair::before { + content: "\f193"; } + +.fa-wheelchair-move::before { + content: "\e2ce"; } + +.fa-wheelchair-alt::before { + content: "\e2ce"; } + +.fa-whiskey-glass::before { + content: "\f7a0"; } + +.fa-glass-whiskey::before { + content: "\f7a0"; } + +.fa-wifi::before { + content: "\f1eb"; } + +.fa-wifi-3::before { + content: "\f1eb"; } + +.fa-wifi-strong::before { + content: "\f1eb"; } + +.fa-wind::before { + content: "\f72e"; } + +.fa-window-maximize::before { + content: "\f2d0"; } + +.fa-window-minimize::before { + content: "\f2d1"; } + +.fa-window-restore::before { + content: "\f2d2"; } + +.fa-wine-bottle::before { + content: "\f72f"; } + +.fa-wine-glass::before { + content: "\f4e3"; } + +.fa-wine-glass-empty::before { + content: "\f5ce"; } + +.fa-wine-glass-alt::before { + content: "\f5ce"; } + +.fa-won-sign::before { + content: "\f159"; } + +.fa-krw::before { + content: "\f159"; } + +.fa-won::before { + content: "\f159"; } + +.fa-worm::before { + content: "\e599"; } + +.fa-wrench::before { + content: "\f0ad"; } + +.fa-x::before { + content: "\58"; } + +.fa-x-ray::before { + content: "\f497"; } + +.fa-xmark::before { + content: "\f00d"; } + +.fa-close::before { + content: "\f00d"; } + +.fa-multiply::before { + content: "\f00d"; } + +.fa-remove::before { + content: "\f00d"; } + +.fa-times::before { + content: "\f00d"; } + +.fa-xmarks-lines::before { + content: "\e59a"; } + +.fa-y::before { + content: "\59"; } + +.fa-yen-sign::before { + content: "\f157"; } + +.fa-cny::before { + content: "\f157"; } + +.fa-jpy::before { + content: "\f157"; } + +.fa-rmb::before { + content: "\f157"; } + +.fa-yen::before { + content: "\f157"; } + +.fa-yin-yang::before { + content: "\f6ad"; } + +.fa-z::before { + content: "\5a"; } + +.sr-only, +.fa-sr-only { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + margin: -1px; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border-width: 0; } + +.sr-only-focusable:not(:focus), +.fa-sr-only-focusable:not(:focus) { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + margin: -1px; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border-width: 0; } +:root, :host { + --fa-font-brands: normal 400 1em/1 "Font Awesome 6 Brands"; } + +@font-face { + font-family: 'Font Awesome 6 Brands'; + font-style: normal; + font-weight: 400; + font-display: block; + src: url("1e21o67/fa-brands-400.woff2") format("woff2"), url("1e21o67/fa-brands-400.ttf") format("truetype"); } + +.fab, +.fa-brands { + font-family: 'Font Awesome 6 Brands'; + font-weight: 400; } + +.fa-42-group:before { + content: "\e080"; } + +.fa-innosoft:before { + content: "\e080"; } + +.fa-500px:before { + content: "\f26e"; } + +.fa-accessible-icon:before { + content: "\f368"; } + +.fa-accusoft:before { + content: "\f369"; } + +.fa-adn:before { + content: "\f170"; } + +.fa-adversal:before { + content: "\f36a"; } + +.fa-affiliatetheme:before { + content: "\f36b"; } + +.fa-airbnb:before { + content: "\f834"; } + +.fa-algolia:before { + content: "\f36c"; } + +.fa-alipay:before { + content: "\f642"; } + +.fa-amazon:before { + content: "\f270"; } + +.fa-amazon-pay:before { + content: "\f42c"; } + +.fa-amilia:before { + content: "\f36d"; } + +.fa-android:before { + content: "\f17b"; } + +.fa-angellist:before { + content: "\f209"; } + +.fa-angrycreative:before { + content: "\f36e"; } + +.fa-angular:before { + content: "\f420"; } + +.fa-app-store:before { + content: "\f36f"; } + +.fa-app-store-ios:before { + content: "\f370"; } + +.fa-apper:before { + content: "\f371"; } + +.fa-apple:before { + content: "\f179"; } + +.fa-apple-pay:before { + content: "\f415"; } + +.fa-artstation:before { + content: "\f77a"; } + +.fa-asymmetrik:before { + content: "\f372"; } + +.fa-atlassian:before { + content: "\f77b"; } + +.fa-audible:before { + content: "\f373"; } + +.fa-autoprefixer:before { + content: "\f41c"; } + +.fa-avianex:before { + content: "\f374"; } + +.fa-aviato:before { + content: "\f421"; } + +.fa-aws:before { + content: "\f375"; } + +.fa-bandcamp:before { + content: "\f2d5"; } + +.fa-battle-net:before { + content: "\f835"; } + +.fa-behance:before { + content: "\f1b4"; } + +.fa-behance-square:before { + content: "\f1b5"; } + +.fa-bilibili:before { + content: "\e3d9"; } + +.fa-bimobject:before { + content: "\f378"; } + +.fa-bitbucket:before { + content: "\f171"; } + +.fa-bitcoin:before { + content: "\f379"; } + +.fa-bity:before { + content: "\f37a"; } + +.fa-black-tie:before { + content: "\f27e"; } + +.fa-blackberry:before { + content: "\f37b"; } + +.fa-blogger:before { + content: "\f37c"; } + +.fa-blogger-b:before { + content: "\f37d"; } + +.fa-bluetooth:before { + content: "\f293"; } + +.fa-bluetooth-b:before { + content: "\f294"; } + +.fa-bootstrap:before { + content: "\f836"; } + +.fa-bots:before { + content: "\e340"; } + +.fa-btc:before { + content: "\f15a"; } + +.fa-buffer:before { + content: "\f837"; } + +.fa-buromobelexperte:before { + content: "\f37f"; } + +.fa-buy-n-large:before { + content: "\f8a6"; } + +.fa-buysellads:before { + content: "\f20d"; } + +.fa-canadian-maple-leaf:before { + content: "\f785"; } + +.fa-cc-amazon-pay:before { + content: "\f42d"; } + +.fa-cc-amex:before { + content: "\f1f3"; } + +.fa-cc-apple-pay:before { + content: "\f416"; } + +.fa-cc-diners-club:before { + content: "\f24c"; } + +.fa-cc-discover:before { + content: "\f1f2"; } + +.fa-cc-jcb:before { + content: "\f24b"; } + +.fa-cc-mastercard:before { + content: "\f1f1"; } + +.fa-cc-paypal:before { + content: "\f1f4"; } + +.fa-cc-stripe:before { + content: "\f1f5"; } + +.fa-cc-visa:before { + content: "\f1f0"; } + +.fa-centercode:before { + content: "\f380"; } + +.fa-centos:before { + content: "\f789"; } + +.fa-chrome:before { + content: "\f268"; } + +.fa-chromecast:before { + content: "\f838"; } + +.fa-cloudflare:before { + content: "\e07d"; } + +.fa-cloudscale:before { + content: "\f383"; } + +.fa-cloudsmith:before { + content: "\f384"; } + +.fa-cloudversify:before { + content: "\f385"; } + +.fa-cmplid:before { + content: "\e360"; } + +.fa-codepen:before { + content: "\f1cb"; } + +.fa-codiepie:before { + content: "\f284"; } + +.fa-confluence:before { + content: "\f78d"; } + +.fa-connectdevelop:before { + content: "\f20e"; } + +.fa-contao:before { + content: "\f26d"; } + +.fa-cotton-bureau:before { + content: "\f89e"; } + +.fa-cpanel:before { + content: "\f388"; } + +.fa-creative-commons:before { + content: "\f25e"; } + +.fa-creative-commons-by:before { + content: "\f4e7"; } + +.fa-creative-commons-nc:before { + content: "\f4e8"; } + +.fa-creative-commons-nc-eu:before { + content: "\f4e9"; } + +.fa-creative-commons-nc-jp:before { + content: "\f4ea"; } + +.fa-creative-commons-nd:before { + content: "\f4eb"; } + +.fa-creative-commons-pd:before { + content: "\f4ec"; } + +.fa-creative-commons-pd-alt:before { + content: "\f4ed"; } + +.fa-creative-commons-remix:before { + content: "\f4ee"; } + +.fa-creative-commons-sa:before { + content: "\f4ef"; } + +.fa-creative-commons-sampling:before { + content: "\f4f0"; } + +.fa-creative-commons-sampling-plus:before { + content: "\f4f1"; } + +.fa-creative-commons-share:before { + content: "\f4f2"; } + +.fa-creative-commons-zero:before { + content: "\f4f3"; } + +.fa-critical-role:before { + content: "\f6c9"; } + +.fa-css3:before { + content: "\f13c"; } + +.fa-css3-alt:before { + content: "\f38b"; } + +.fa-cuttlefish:before { + content: "\f38c"; } + +.fa-d-and-d:before { + content: "\f38d"; } + +.fa-d-and-d-beyond:before { + content: "\f6ca"; } + +.fa-dailymotion:before { + content: "\e052"; } + +.fa-dashcube:before { + content: "\f210"; } + +.fa-deezer:before { + content: "\e077"; } + +.fa-delicious:before { + content: "\f1a5"; } + +.fa-deploydog:before { + content: "\f38e"; } + +.fa-deskpro:before { + content: "\f38f"; } + +.fa-dev:before { + content: "\f6cc"; } + +.fa-deviantart:before { + content: "\f1bd"; } + +.fa-dhl:before { + content: "\f790"; } + +.fa-diaspora:before { + content: "\f791"; } + +.fa-digg:before { + content: "\f1a6"; } + +.fa-digital-ocean:before { + content: "\f391"; } + +.fa-discord:before { + content: "\f392"; } + +.fa-discourse:before { + content: "\f393"; } + +.fa-dochub:before { + content: "\f394"; } + +.fa-docker:before { + content: "\f395"; } + +.fa-draft2digital:before { + content: "\f396"; } + +.fa-dribbble:before { + content: "\f17d"; } + +.fa-dribbble-square:before { + content: "\f397"; } + +.fa-dropbox:before { + content: "\f16b"; } + +.fa-drupal:before { + content: "\f1a9"; } + +.fa-dyalog:before { + content: "\f399"; } + +.fa-earlybirds:before { + content: "\f39a"; } + +.fa-ebay:before { + content: "\f4f4"; } + +.fa-edge:before { + content: "\f282"; } + +.fa-edge-legacy:before { + content: "\e078"; } + +.fa-elementor:before { + content: "\f430"; } + +.fa-ello:before { + content: "\f5f1"; } + +.fa-ember:before { + content: "\f423"; } + +.fa-empire:before { + content: "\f1d1"; } + +.fa-envira:before { + content: "\f299"; } + +.fa-erlang:before { + content: "\f39d"; } + +.fa-ethereum:before { + content: "\f42e"; } + +.fa-etsy:before { + content: "\f2d7"; } + +.fa-evernote:before { + content: "\f839"; } + +.fa-expeditedssl:before { + content: "\f23e"; } + +.fa-facebook:before { + content: "\f09a"; } + +.fa-facebook-f:before { + content: "\f39e"; } + +.fa-facebook-messenger:before { + content: "\f39f"; } + +.fa-facebook-square:before { + content: "\f082"; } + +.fa-fantasy-flight-games:before { + content: "\f6dc"; } + +.fa-fedex:before { + content: "\f797"; } + +.fa-fedora:before { + content: "\f798"; } + +.fa-figma:before { + content: "\f799"; } + +.fa-firefox:before { + content: "\f269"; } + +.fa-firefox-browser:before { + content: "\e007"; } + +.fa-first-order:before { + content: "\f2b0"; } + +.fa-first-order-alt:before { + content: "\f50a"; } + +.fa-firstdraft:before { + content: "\f3a1"; } + +.fa-flickr:before { + content: "\f16e"; } + +.fa-flipboard:before { + content: "\f44d"; } + +.fa-fly:before { + content: "\f417"; } + +.fa-font-awesome:before { + content: "\f2b4"; } + +.fa-font-awesome-flag:before { + content: "\f2b4"; } + +.fa-font-awesome-logo-full:before { + content: "\f2b4"; } + +.fa-fonticons:before { + content: "\f280"; } + +.fa-fonticons-fi:before { + content: "\f3a2"; } + +.fa-fort-awesome:before { + content: "\f286"; } + +.fa-fort-awesome-alt:before { + content: "\f3a3"; } + +.fa-forumbee:before { + content: "\f211"; } + +.fa-foursquare:before { + content: "\f180"; } + +.fa-free-code-camp:before { + content: "\f2c5"; } + +.fa-freebsd:before { + content: "\f3a4"; } + +.fa-fulcrum:before { + content: "\f50b"; } + +.fa-galactic-republic:before { + content: "\f50c"; } + +.fa-galactic-senate:before { + content: "\f50d"; } + +.fa-get-pocket:before { + content: "\f265"; } + +.fa-gg:before { + content: "\f260"; } + +.fa-gg-circle:before { + content: "\f261"; } + +.fa-git:before { + content: "\f1d3"; } + +.fa-git-alt:before { + content: "\f841"; } + +.fa-git-square:before { + content: "\f1d2"; } + +.fa-github:before { + content: "\f09b"; } + +.fa-github-alt:before { + content: "\f113"; } + +.fa-github-square:before { + content: "\f092"; } + +.fa-gitkraken:before { + content: "\f3a6"; } + +.fa-gitlab:before { + content: "\f296"; } + +.fa-gitter:before { + content: "\f426"; } + +.fa-glide:before { + content: "\f2a5"; } + +.fa-glide-g:before { + content: "\f2a6"; } + +.fa-gofore:before { + content: "\f3a7"; } + +.fa-golang:before { + content: "\e40f"; } + +.fa-goodreads:before { + content: "\f3a8"; } + +.fa-goodreads-g:before { + content: "\f3a9"; } + +.fa-google:before { + content: "\f1a0"; } + +.fa-google-drive:before { + content: "\f3aa"; } + +.fa-google-pay:before { + content: "\e079"; } + +.fa-google-play:before { + content: "\f3ab"; } + +.fa-google-plus:before { + content: "\f2b3"; } + +.fa-google-plus-g:before { + content: "\f0d5"; } + +.fa-google-plus-square:before { + content: "\f0d4"; } + +.fa-google-wallet:before { + content: "\f1ee"; } + +.fa-gratipay:before { + content: "\f184"; } + +.fa-grav:before { + content: "\f2d6"; } + +.fa-gripfire:before { + content: "\f3ac"; } + +.fa-grunt:before { + content: "\f3ad"; } + +.fa-guilded:before { + content: "\e07e"; } + +.fa-gulp:before { + content: "\f3ae"; } + +.fa-hacker-news:before { + content: "\f1d4"; } + +.fa-hacker-news-square:before { + content: "\f3af"; } + +.fa-hackerrank:before { + content: "\f5f7"; } + +.fa-hashnode:before { + content: "\e499"; } + +.fa-hips:before { + content: "\f452"; } + +.fa-hire-a-helper:before { + content: "\f3b0"; } + +.fa-hive:before { + content: "\e07f"; } + +.fa-hooli:before { + content: "\f427"; } + +.fa-hornbill:before { + content: "\f592"; } + +.fa-hotjar:before { + content: "\f3b1"; } + +.fa-houzz:before { + content: "\f27c"; } + +.fa-html5:before { + content: "\f13b"; } + +.fa-hubspot:before { + content: "\f3b2"; } + +.fa-ideal:before { + content: "\e013"; } + +.fa-imdb:before { + content: "\f2d8"; } + +.fa-instagram:before { + content: "\f16d"; } + +.fa-instagram-square:before { + content: "\e055"; } + +.fa-instalod:before { + content: "\e081"; } + +.fa-intercom:before { + content: "\f7af"; } + +.fa-internet-explorer:before { + content: "\f26b"; } + +.fa-invision:before { + content: "\f7b0"; } + +.fa-ioxhost:before { + content: "\f208"; } + +.fa-itch-io:before { + content: "\f83a"; } + +.fa-itunes:before { + content: "\f3b4"; } + +.fa-itunes-note:before { + content: "\f3b5"; } + +.fa-java:before { + content: "\f4e4"; } + +.fa-jedi-order:before { + content: "\f50e"; } + +.fa-jenkins:before { + content: "\f3b6"; } + +.fa-jira:before { + content: "\f7b1"; } + +.fa-joget:before { + content: "\f3b7"; } + +.fa-joomla:before { + content: "\f1aa"; } + +.fa-js:before { + content: "\f3b8"; } + +.fa-js-square:before { + content: "\f3b9"; } + +.fa-jsfiddle:before { + content: "\f1cc"; } + +.fa-kaggle:before { + content: "\f5fa"; } + +.fa-keybase:before { + content: "\f4f5"; } + +.fa-keycdn:before { + content: "\f3ba"; } + +.fa-kickstarter:before { + content: "\f3bb"; } + +.fa-kickstarter-k:before { + content: "\f3bc"; } + +.fa-korvue:before { + content: "\f42f"; } + +.fa-laravel:before { + content: "\f3bd"; } + +.fa-lastfm:before { + content: "\f202"; } + +.fa-lastfm-square:before { + content: "\f203"; } + +.fa-leanpub:before { + content: "\f212"; } + +.fa-less:before { + content: "\f41d"; } + +.fa-line:before { + content: "\f3c0"; } + +.fa-linkedin:before { + content: "\f08c"; } + +.fa-linkedin-in:before { + content: "\f0e1"; } + +.fa-linode:before { + content: "\f2b8"; } + +.fa-linux:before { + content: "\f17c"; } + +.fa-lyft:before { + content: "\f3c3"; } + +.fa-magento:before { + content: "\f3c4"; } + +.fa-mailchimp:before { + content: "\f59e"; } + +.fa-mandalorian:before { + content: "\f50f"; } + +.fa-markdown:before { + content: "\f60f"; } + +.fa-mastodon:before { + content: "\f4f6"; } + +.fa-maxcdn:before { + content: "\f136"; } + +.fa-mdb:before { + content: "\f8ca"; } + +.fa-medapps:before { + content: "\f3c6"; } + +.fa-medium:before { + content: "\f23a"; } + +.fa-medium-m:before { + content: "\f23a"; } + +.fa-medrt:before { + content: "\f3c8"; } + +.fa-meetup:before { + content: "\f2e0"; } + +.fa-megaport:before { + content: "\f5a3"; } + +.fa-mendeley:before { + content: "\f7b3"; } + +.fa-microblog:before { + content: "\e01a"; } + +.fa-microsoft:before { + content: "\f3ca"; } + +.fa-mix:before { + content: "\f3cb"; } + +.fa-mixcloud:before { + content: "\f289"; } + +.fa-mixer:before { + content: "\e056"; } + +.fa-mizuni:before { + content: "\f3cc"; } + +.fa-modx:before { + content: "\f285"; } + +.fa-monero:before { + content: "\f3d0"; } + +.fa-napster:before { + content: "\f3d2"; } + +.fa-neos:before { + content: "\f612"; } + +.fa-nfc-directional:before { + content: "\e530"; } + +.fa-nfc-symbol:before { + content: "\e531"; } + +.fa-nimblr:before { + content: "\f5a8"; } + +.fa-node:before { + content: "\f419"; } + +.fa-node-js:before { + content: "\f3d3"; } + +.fa-npm:before { + content: "\f3d4"; } + +.fa-ns8:before { + content: "\f3d5"; } + +.fa-nutritionix:before { + content: "\f3d6"; } + +.fa-octopus-deploy:before { + content: "\e082"; } + +.fa-odnoklassniki:before { + content: "\f263"; } + +.fa-odnoklassniki-square:before { + content: "\f264"; } + +.fa-old-republic:before { + content: "\f510"; } + +.fa-opencart:before { + content: "\f23d"; } + +.fa-openid:before { + content: "\f19b"; } + +.fa-opera:before { + content: "\f26a"; } + +.fa-optin-monster:before { + content: "\f23c"; } + +.fa-orcid:before { + content: "\f8d2"; } + +.fa-osi:before { + content: "\f41a"; } + +.fa-padlet:before { + content: "\e4a0"; } + +.fa-page4:before { + content: "\f3d7"; } + +.fa-pagelines:before { + content: "\f18c"; } + +.fa-palfed:before { + content: "\f3d8"; } + +.fa-patreon:before { + content: "\f3d9"; } + +.fa-paypal:before { + content: "\f1ed"; } + +.fa-perbyte:before { + content: "\e083"; } + +.fa-periscope:before { + content: "\f3da"; } + +.fa-phabricator:before { + content: "\f3db"; } + +.fa-phoenix-framework:before { + content: "\f3dc"; } + +.fa-phoenix-squadron:before { + content: "\f511"; } + +.fa-php:before { + content: "\f457"; } + +.fa-pied-piper:before { + content: "\f2ae"; } + +.fa-pied-piper-alt:before { + content: "\f1a8"; } + +.fa-pied-piper-hat:before { + content: "\f4e5"; } + +.fa-pied-piper-pp:before { + content: "\f1a7"; } + +.fa-pied-piper-square:before { + content: "\e01e"; } + +.fa-pinterest:before { + content: "\f0d2"; } + +.fa-pinterest-p:before { + content: "\f231"; } + +.fa-pinterest-square:before { + content: "\f0d3"; } + +.fa-pix:before { + content: "\e43a"; } + +.fa-playstation:before { + content: "\f3df"; } + +.fa-product-hunt:before { + content: "\f288"; } + +.fa-pushed:before { + content: "\f3e1"; } + +.fa-python:before { + content: "\f3e2"; } + +.fa-qq:before { + content: "\f1d6"; } + +.fa-quinscape:before { + content: "\f459"; } + +.fa-quora:before { + content: "\f2c4"; } + +.fa-r-project:before { + content: "\f4f7"; } + +.fa-raspberry-pi:before { + content: "\f7bb"; } + +.fa-ravelry:before { + content: "\f2d9"; } + +.fa-react:before { + content: "\f41b"; } + +.fa-reacteurope:before { + content: "\f75d"; } + +.fa-readme:before { + content: "\f4d5"; } + +.fa-rebel:before { + content: "\f1d0"; } + +.fa-red-river:before { + content: "\f3e3"; } + +.fa-reddit:before { + content: "\f1a1"; } + +.fa-reddit-alien:before { + content: "\f281"; } + +.fa-reddit-square:before { + content: "\f1a2"; } + +.fa-redhat:before { + content: "\f7bc"; } + +.fa-renren:before { + content: "\f18b"; } + +.fa-replyd:before { + content: "\f3e6"; } + +.fa-researchgate:before { + content: "\f4f8"; } + +.fa-resolving:before { + content: "\f3e7"; } + +.fa-rev:before { + content: "\f5b2"; } + +.fa-rocketchat:before { + content: "\f3e8"; } + +.fa-rockrms:before { + content: "\f3e9"; } + +.fa-rust:before { + content: "\e07a"; } + +.fa-safari:before { + content: "\f267"; } + +.fa-salesforce:before { + content: "\f83b"; } + +.fa-sass:before { + content: "\f41e"; } + +.fa-schlix:before { + content: "\f3ea"; } + +.fa-screenpal:before { + content: "\e570"; } + +.fa-scribd:before { + content: "\f28a"; } + +.fa-searchengin:before { + content: "\f3eb"; } + +.fa-sellcast:before { + content: "\f2da"; } + +.fa-sellsy:before { + content: "\f213"; } + +.fa-servicestack:before { + content: "\f3ec"; } + +.fa-shirtsinbulk:before { + content: "\f214"; } + +.fa-shopify:before { + content: "\e057"; } + +.fa-shopware:before { + content: "\f5b5"; } + +.fa-simplybuilt:before { + content: "\f215"; } + +.fa-sistrix:before { + content: "\f3ee"; } + +.fa-sith:before { + content: "\f512"; } + +.fa-sitrox:before { + content: "\e44a"; } + +.fa-sketch:before { + content: "\f7c6"; } + +.fa-skyatlas:before { + content: "\f216"; } + +.fa-skype:before { + content: "\f17e"; } + +.fa-slack:before { + content: "\f198"; } + +.fa-slack-hash:before { + content: "\f198"; } + +.fa-slideshare:before { + content: "\f1e7"; } + +.fa-snapchat:before { + content: "\f2ab"; } + +.fa-snapchat-ghost:before { + content: "\f2ab"; } + +.fa-snapchat-square:before { + content: "\f2ad"; } + +.fa-soundcloud:before { + content: "\f1be"; } + +.fa-sourcetree:before { + content: "\f7d3"; } + +.fa-speakap:before { + content: "\f3f3"; } + +.fa-speaker-deck:before { + content: "\f83c"; } + +.fa-spotify:before { + content: "\f1bc"; } + +.fa-square-font-awesome:before { + content: "\f425"; } + +.fa-square-font-awesome-stroke:before { + content: "\f35c"; } + +.fa-font-awesome-alt:before { + content: "\f35c"; } + +.fa-squarespace:before { + content: "\f5be"; } + +.fa-stack-exchange:before { + content: "\f18d"; } + +.fa-stack-overflow:before { + content: "\f16c"; } + +.fa-stackpath:before { + content: "\f842"; } + +.fa-staylinked:before { + content: "\f3f5"; } + +.fa-steam:before { + content: "\f1b6"; } + +.fa-steam-square:before { + content: "\f1b7"; } + +.fa-steam-symbol:before { + content: "\f3f6"; } + +.fa-sticker-mule:before { + content: "\f3f7"; } + +.fa-strava:before { + content: "\f428"; } + +.fa-stripe:before { + content: "\f429"; } + +.fa-stripe-s:before { + content: "\f42a"; } + +.fa-studiovinari:before { + content: "\f3f8"; } + +.fa-stumbleupon:before { + content: "\f1a4"; } + +.fa-stumbleupon-circle:before { + content: "\f1a3"; } + +.fa-superpowers:before { + content: "\f2dd"; } + +.fa-supple:before { + content: "\f3f9"; } + +.fa-suse:before { + content: "\f7d6"; } + +.fa-swift:before { + content: "\f8e1"; } + +.fa-symfony:before { + content: "\f83d"; } + +.fa-teamspeak:before { + content: "\f4f9"; } + +.fa-telegram:before { + content: "\f2c6"; } + +.fa-telegram-plane:before { + content: "\f2c6"; } + +.fa-tencent-weibo:before { + content: "\f1d5"; } + +.fa-the-red-yeti:before { + content: "\f69d"; } + +.fa-themeco:before { + content: "\f5c6"; } + +.fa-themeisle:before { + content: "\f2b2"; } + +.fa-think-peaks:before { + content: "\f731"; } + +.fa-tiktok:before { + content: "\e07b"; } + +.fa-trade-federation:before { + content: "\f513"; } + +.fa-trello:before { + content: "\f181"; } + +.fa-tumblr:before { + content: "\f173"; } + +.fa-tumblr-square:before { + content: "\f174"; } + +.fa-twitch:before { + content: "\f1e8"; } + +.fa-twitter:before { + content: "\f099"; } + +.fa-twitter-square:before { + content: "\f081"; } + +.fa-typo3:before { + content: "\f42b"; } + +.fa-uber:before { + content: "\f402"; } + +.fa-ubuntu:before { + content: "\f7df"; } + +.fa-uikit:before { + content: "\f403"; } + +.fa-umbraco:before { + content: "\f8e8"; } + +.fa-uncharted:before { + content: "\e084"; } + +.fa-uniregistry:before { + content: "\f404"; } + +.fa-unity:before { + content: "\e049"; } + +.fa-unsplash:before { + content: "\e07c"; } + +.fa-untappd:before { + content: "\f405"; } + +.fa-ups:before { + content: "\f7e0"; } + +.fa-usb:before { + content: "\f287"; } + +.fa-usps:before { + content: "\f7e1"; } + +.fa-ussunnah:before { + content: "\f407"; } + +.fa-vaadin:before { + content: "\f408"; } + +.fa-viacoin:before { + content: "\f237"; } + +.fa-viadeo:before { + content: "\f2a9"; } + +.fa-viadeo-square:before { + content: "\f2aa"; } + +.fa-viber:before { + content: "\f409"; } + +.fa-vimeo:before { + content: "\f40a"; } + +.fa-vimeo-square:before { + content: "\f194"; } + +.fa-vimeo-v:before { + content: "\f27d"; } + +.fa-vine:before { + content: "\f1ca"; } + +.fa-vk:before { + content: "\f189"; } + +.fa-vnv:before { + content: "\f40b"; } + +.fa-vuejs:before { + content: "\f41f"; } + +.fa-watchman-monitoring:before { + content: "\e087"; } + +.fa-waze:before { + content: "\f83f"; } + +.fa-weebly:before { + content: "\f5cc"; } + +.fa-weibo:before { + content: "\f18a"; } + +.fa-weixin:before { + content: "\f1d7"; } + +.fa-whatsapp:before { + content: "\f232"; } + +.fa-whatsapp-square:before { + content: "\f40c"; } + +.fa-whmcs:before { + content: "\f40d"; } + +.fa-wikipedia-w:before { + content: "\f266"; } + +.fa-windows:before { + content: "\f17a"; } + +.fa-wirsindhandwerk:before { + content: "\e2d0"; } + +.fa-wsh:before { + content: "\e2d0"; } + +.fa-wix:before { + content: "\f5cf"; } + +.fa-wizards-of-the-coast:before { + content: "\f730"; } + +.fa-wodu:before { + content: "\e088"; } + +.fa-wolf-pack-battalion:before { + content: "\f514"; } + +.fa-wordpress:before { + content: "\f19a"; } + +.fa-wordpress-simple:before { + content: "\f411"; } + +.fa-wpbeginner:before { + content: "\f297"; } + +.fa-wpexplorer:before { + content: "\f2de"; } + +.fa-wpforms:before { + content: "\f298"; } + +.fa-wpressr:before { + content: "\f3e4"; } + +.fa-xbox:before { + content: "\f412"; } + +.fa-xing:before { + content: "\f168"; } + +.fa-xing-square:before { + content: "\f169"; } + +.fa-y-combinator:before { + content: "\f23b"; } + +.fa-yahoo:before { + content: "\f19e"; } + +.fa-yammer:before { + content: "\f840"; } + +.fa-yandex:before { + content: "\f413"; } + +.fa-yandex-international:before { + content: "\f414"; } + +.fa-yarn:before { + content: "\f7e3"; } + +.fa-yelp:before { + content: "\f1e9"; } + +.fa-yoast:before { + content: "\f2b1"; } + +.fa-youtube:before { + content: "\f167"; } + +.fa-youtube-square:before { + content: "\f431"; } + +.fa-zhihu:before { + content: "\f63f"; } +:root, :host { + --fa-font-regular: normal 400 1em/1 "Font Awesome 6 Free"; } + +@font-face { + font-family: 'Font Awesome 6 Free'; + font-style: normal; + font-weight: 400; + font-display: block; + src: url("1e21o67/fa-regular-400.woff2") format("woff2"), url("1e21o67/fa-regular-400.ttf") format("truetype"); } + +.far, +.fa-regular { + font-family: 'Font Awesome 6 Free'; + font-weight: 400; } +:root, :host { + --fa-font-solid: normal 900 1em/1 "Font Awesome 6 Free"; } + +@font-face { + font-family: 'Font Awesome 6 Free'; + font-style: normal; + font-weight: 900; + font-display: block; + src: url("1e21o67/fa-solid-900.woff2") format("woff2"), url("1e21o67/fa-solid-900.ttf") format("truetype"); } + +.fas, +.fa-solid { + font-family: 'Font Awesome 6 Free'; + font-weight: 900; } +@font-face { + font-family: "Font Awesome 5 Brands"; + font-display: block; + font-weight: 400; + src: url("1e21o67/fa-brands-400.woff2") format("woff2"), url("1e21o67/fa-brands-400.ttf") format("truetype"); } + +@font-face { + font-family: "Font Awesome 5 Free"; + font-display: block; + font-weight: 900; + src: url("1e21o67/fa-solid-900.woff2") format("woff2"), url("1e21o67/fa-solid-900.ttf") format("truetype"); } + +@font-face { + font-family: "Font Awesome 5 Free"; + font-display: block; + font-weight: 400; + src: url("1e21o67/fa-regular-400.woff2") format("woff2"), url("1e21o67/fa-regular-400.ttf") format("truetype"); } +@font-face { + font-family: "FontAwesome"; + font-display: block; + src: url("1e21o67/fa-solid-900.woff2") format("woff2"), url("1e21o67/fa-solid-900.ttf") format("truetype"); } + +@font-face { + font-family: "FontAwesome"; + font-display: block; + src: url("1e21o67/fa-brands-400.woff2") format("woff2"), url("1e21o67/fa-brands-400.ttf") format("truetype"); } + +@font-face { + font-family: "FontAwesome"; + font-display: block; + src: url("1e21o67/fa-regular-400.woff2") format("woff2"), url("1e21o67/fa-regular-400.ttf") format("truetype"); + unicode-range: U+F003,U+F006,U+F014,U+F016-F017,U+F01A-F01B,U+F01D,U+F022,U+F03E,U+F044,U+F046,U+F05C-F05D,U+F06E,U+F070,U+F087-F088,U+F08A,U+F094,U+F096-F097,U+F09D,U+F0A0,U+F0A2,U+F0A4-F0A7,U+F0C5,U+F0C7,U+F0E5-F0E6,U+F0EB,U+F0F6-F0F8,U+F10C,U+F114-F115,U+F118-F11A,U+F11C-F11D,U+F133,U+F147,U+F14E,U+F150-F152,U+F185-F186,U+F18E,U+F190-F192,U+F196,U+F1C1-F1C9,U+F1D9,U+F1DB,U+F1E3,U+F1EA,U+F1F7,U+F1F9,U+F20A,U+F247-F248,U+F24A,U+F24D,U+F255-F25B,U+F25D,U+F271-F274,U+F278,U+F27B,U+F28C,U+F28E,U+F29C,U+F2B5,U+F2B7,U+F2BA,U+F2BC,U+F2BE,U+F2C0-F2C1,U+F2C3,U+F2D0,U+F2D2,U+F2D4,U+F2DC; } + +@font-face { + font-family: "FontAwesome"; + font-display: block; + src: url("1e21o67/fa-v4compatibility.woff2") format("woff2"), url("1e21o67/fa-v4compatibility.ttf") format("truetype"); + unicode-range: U+F041,U+F047,U+F065-F066,U+F07D-F07E,U+F080,U+F08B,U+F08E,U+F090,U+F09A,U+F0AC,U+F0AE,U+F0B2,U+F0D0,U+F0D6,U+F0E4,U+F0EC,U+F10A-F10B,U+F123,U+F13E,U+F148-F149,U+F14C,U+F156,U+F15E,U+F160-F161,U+F163,U+F175-F178,U+F195,U+F1F8,U+F219,U+F250,U+F252,U+F27A; } diff --git a/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/latex-fontsize.css b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/latex-fontsize.css new file mode 100644 index 00000000..45545ecf --- /dev/null +++ b/docs/site_libs/quarto-contrib/fontawesome6-0.1.0/latex-fontsize.css @@ -0,0 +1,30 @@ +.fa-tiny { + font-size: 0.5em; +} +.fa-scriptsize { + font-size: 0.7em; +} +.fa-footnotesize { + font-size: 0.8em; +} +.fa-small { + font-size: 0.9em; +} +.fa-normalsize { + font-size: 1em; +} +.fa-large { + font-size: 1.2em; +} +.fa-Large { + font-size: 1.5em; +} +.fa-LARGE { + font-size: 1.75em; +} +.fa-huge { + font-size: 2em; +} +.fa-Huge { + font-size: 2.5em; +} diff --git a/docs/site_libs/quarto-html/anchor.min.js b/docs/site_libs/quarto-html/anchor.min.js new file mode 100644 index 00000000..1c2b86fa --- /dev/null +++ b/docs/site_libs/quarto-html/anchor.min.js @@ -0,0 +1,9 @@ +// @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat +// +// AnchorJS - v4.3.1 - 2021-04-17 +// https://www.bryanbraun.com/anchorjs/ +// Copyright (c) 2021 Bryan Braun; Licensed MIT +// +// @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt Expat +!function(A,e){"use strict";"function"==typeof define&&define.amd?define([],e):"object"==typeof module&&module.exports?module.exports=e():(A.AnchorJS=e(),A.anchors=new A.AnchorJS)}(this,function(){"use strict";return function(A){function d(A){A.icon=Object.prototype.hasOwnProperty.call(A,"icon")?A.icon:"",A.visible=Object.prototype.hasOwnProperty.call(A,"visible")?A.visible:"hover",A.placement=Object.prototype.hasOwnProperty.call(A,"placement")?A.placement:"right",A.ariaLabel=Object.prototype.hasOwnProperty.call(A,"ariaLabel")?A.ariaLabel:"Anchor",A.class=Object.prototype.hasOwnProperty.call(A,"class")?A.class:"",A.base=Object.prototype.hasOwnProperty.call(A,"base")?A.base:"",A.truncate=Object.prototype.hasOwnProperty.call(A,"truncate")?Math.floor(A.truncate):64,A.titleText=Object.prototype.hasOwnProperty.call(A,"titleText")?A.titleText:""}function w(A){var e;if("string"==typeof A||A instanceof String)e=[].slice.call(document.querySelectorAll(A));else{if(!(Array.isArray(A)||A instanceof NodeList))throw new TypeError("The selector provided to AnchorJS was invalid.");e=[].slice.call(A)}return e}this.options=A||{},this.elements=[],d(this.options),this.isTouchDevice=function(){return Boolean("ontouchstart"in window||window.TouchEvent||window.DocumentTouch&&document instanceof DocumentTouch)},this.add=function(A){var e,t,o,i,n,s,a,c,r,l,h,u,p=[];if(d(this.options),"touch"===(l=this.options.visible)&&(l=this.isTouchDevice()?"always":"hover"),0===(e=w(A=A||"h2, h3, h4, h5, h6")).length)return this;for(null===document.head.querySelector("style.anchorjs")&&((u=document.createElement("style")).className="anchorjs",u.appendChild(document.createTextNode("")),void 0===(A=document.head.querySelector('[rel="stylesheet"],style'))?document.head.appendChild(u):document.head.insertBefore(u,A),u.sheet.insertRule(".anchorjs-link{opacity:0;text-decoration:none;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}",u.sheet.cssRules.length),u.sheet.insertRule(":hover>.anchorjs-link,.anchorjs-link:focus{opacity:1}",u.sheet.cssRules.length),u.sheet.insertRule("[data-anchorjs-icon]::after{content:attr(data-anchorjs-icon)}",u.sheet.cssRules.length),u.sheet.insertRule('@font-face{font-family:anchorjs-icons;src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}',u.sheet.cssRules.length)),u=document.querySelectorAll("[id]"),t=[].map.call(u,function(A){return A.id}),i=0;i\]./()*\\\n\t\b\v\u00A0]/g,"-").replace(/-{2,}/g,"-").substring(0,this.options.truncate).replace(/^-+|-+$/gm,"").toLowerCase()},this.hasAnchorJSLink=function(A){var e=A.firstChild&&-1<(" "+A.firstChild.className+" ").indexOf(" anchorjs-link "),A=A.lastChild&&-1<(" "+A.lastChild.className+" ").indexOf(" anchorjs-link ");return e||A||!1}}}); +// @license-end \ No newline at end of file diff --git a/docs/site_libs/quarto-html/popper.min.js b/docs/site_libs/quarto-html/popper.min.js new file mode 100644 index 00000000..2269d669 --- /dev/null +++ b/docs/site_libs/quarto-html/popper.min.js @@ -0,0 +1,6 @@ +/** + * @popperjs/core v2.11.4 - MIT License + */ + +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self).Popper={})}(this,(function(e){"use strict";function t(e){if(null==e)return window;if("[object Window]"!==e.toString()){var t=e.ownerDocument;return t&&t.defaultView||window}return e}function n(e){return e instanceof t(e).Element||e instanceof Element}function r(e){return e instanceof t(e).HTMLElement||e instanceof HTMLElement}function o(e){return"undefined"!=typeof ShadowRoot&&(e instanceof t(e).ShadowRoot||e instanceof ShadowRoot)}var i=Math.max,a=Math.min,s=Math.round;function f(e,t){void 0===t&&(t=!1);var n=e.getBoundingClientRect(),o=1,i=1;if(r(e)&&t){var a=e.offsetHeight,f=e.offsetWidth;f>0&&(o=s(n.width)/f||1),a>0&&(i=s(n.height)/a||1)}return{width:n.width/o,height:n.height/i,top:n.top/i,right:n.right/o,bottom:n.bottom/i,left:n.left/o,x:n.left/o,y:n.top/i}}function c(e){var n=t(e);return{scrollLeft:n.pageXOffset,scrollTop:n.pageYOffset}}function p(e){return e?(e.nodeName||"").toLowerCase():null}function u(e){return((n(e)?e.ownerDocument:e.document)||window.document).documentElement}function l(e){return f(u(e)).left+c(e).scrollLeft}function d(e){return t(e).getComputedStyle(e)}function h(e){var t=d(e),n=t.overflow,r=t.overflowX,o=t.overflowY;return/auto|scroll|overlay|hidden/.test(n+o+r)}function m(e,n,o){void 0===o&&(o=!1);var i,a,d=r(n),m=r(n)&&function(e){var t=e.getBoundingClientRect(),n=s(t.width)/e.offsetWidth||1,r=s(t.height)/e.offsetHeight||1;return 1!==n||1!==r}(n),v=u(n),g=f(e,m),y={scrollLeft:0,scrollTop:0},b={x:0,y:0};return(d||!d&&!o)&&(("body"!==p(n)||h(v))&&(y=(i=n)!==t(i)&&r(i)?{scrollLeft:(a=i).scrollLeft,scrollTop:a.scrollTop}:c(i)),r(n)?((b=f(n,!0)).x+=n.clientLeft,b.y+=n.clientTop):v&&(b.x=l(v))),{x:g.left+y.scrollLeft-b.x,y:g.top+y.scrollTop-b.y,width:g.width,height:g.height}}function v(e){var t=f(e),n=e.offsetWidth,r=e.offsetHeight;return Math.abs(t.width-n)<=1&&(n=t.width),Math.abs(t.height-r)<=1&&(r=t.height),{x:e.offsetLeft,y:e.offsetTop,width:n,height:r}}function g(e){return"html"===p(e)?e:e.assignedSlot||e.parentNode||(o(e)?e.host:null)||u(e)}function y(e){return["html","body","#document"].indexOf(p(e))>=0?e.ownerDocument.body:r(e)&&h(e)?e:y(g(e))}function b(e,n){var r;void 0===n&&(n=[]);var o=y(e),i=o===(null==(r=e.ownerDocument)?void 0:r.body),a=t(o),s=i?[a].concat(a.visualViewport||[],h(o)?o:[]):o,f=n.concat(s);return i?f:f.concat(b(g(s)))}function x(e){return["table","td","th"].indexOf(p(e))>=0}function w(e){return r(e)&&"fixed"!==d(e).position?e.offsetParent:null}function O(e){for(var n=t(e),i=w(e);i&&x(i)&&"static"===d(i).position;)i=w(i);return i&&("html"===p(i)||"body"===p(i)&&"static"===d(i).position)?n:i||function(e){var t=-1!==navigator.userAgent.toLowerCase().indexOf("firefox");if(-1!==navigator.userAgent.indexOf("Trident")&&r(e)&&"fixed"===d(e).position)return null;var n=g(e);for(o(n)&&(n=n.host);r(n)&&["html","body"].indexOf(p(n))<0;){var i=d(n);if("none"!==i.transform||"none"!==i.perspective||"paint"===i.contain||-1!==["transform","perspective"].indexOf(i.willChange)||t&&"filter"===i.willChange||t&&i.filter&&"none"!==i.filter)return n;n=n.parentNode}return null}(e)||n}var j="top",E="bottom",D="right",A="left",L="auto",P=[j,E,D,A],M="start",k="end",W="viewport",B="popper",H=P.reduce((function(e,t){return e.concat([t+"-"+M,t+"-"+k])}),[]),T=[].concat(P,[L]).reduce((function(e,t){return e.concat([t,t+"-"+M,t+"-"+k])}),[]),R=["beforeRead","read","afterRead","beforeMain","main","afterMain","beforeWrite","write","afterWrite"];function S(e){var t=new Map,n=new Set,r=[];function o(e){n.add(e.name),[].concat(e.requires||[],e.requiresIfExists||[]).forEach((function(e){if(!n.has(e)){var r=t.get(e);r&&o(r)}})),r.push(e)}return e.forEach((function(e){t.set(e.name,e)})),e.forEach((function(e){n.has(e.name)||o(e)})),r}function C(e){return e.split("-")[0]}function q(e,t){var n=t.getRootNode&&t.getRootNode();if(e.contains(t))return!0;if(n&&o(n)){var r=t;do{if(r&&e.isSameNode(r))return!0;r=r.parentNode||r.host}while(r)}return!1}function V(e){return Object.assign({},e,{left:e.x,top:e.y,right:e.x+e.width,bottom:e.y+e.height})}function N(e,r){return r===W?V(function(e){var n=t(e),r=u(e),o=n.visualViewport,i=r.clientWidth,a=r.clientHeight,s=0,f=0;return o&&(i=o.width,a=o.height,/^((?!chrome|android).)*safari/i.test(navigator.userAgent)||(s=o.offsetLeft,f=o.offsetTop)),{width:i,height:a,x:s+l(e),y:f}}(e)):n(r)?function(e){var t=f(e);return t.top=t.top+e.clientTop,t.left=t.left+e.clientLeft,t.bottom=t.top+e.clientHeight,t.right=t.left+e.clientWidth,t.width=e.clientWidth,t.height=e.clientHeight,t.x=t.left,t.y=t.top,t}(r):V(function(e){var t,n=u(e),r=c(e),o=null==(t=e.ownerDocument)?void 0:t.body,a=i(n.scrollWidth,n.clientWidth,o?o.scrollWidth:0,o?o.clientWidth:0),s=i(n.scrollHeight,n.clientHeight,o?o.scrollHeight:0,o?o.clientHeight:0),f=-r.scrollLeft+l(e),p=-r.scrollTop;return"rtl"===d(o||n).direction&&(f+=i(n.clientWidth,o?o.clientWidth:0)-a),{width:a,height:s,x:f,y:p}}(u(e)))}function I(e,t,o){var s="clippingParents"===t?function(e){var t=b(g(e)),o=["absolute","fixed"].indexOf(d(e).position)>=0&&r(e)?O(e):e;return n(o)?t.filter((function(e){return n(e)&&q(e,o)&&"body"!==p(e)})):[]}(e):[].concat(t),f=[].concat(s,[o]),c=f[0],u=f.reduce((function(t,n){var r=N(e,n);return t.top=i(r.top,t.top),t.right=a(r.right,t.right),t.bottom=a(r.bottom,t.bottom),t.left=i(r.left,t.left),t}),N(e,c));return u.width=u.right-u.left,u.height=u.bottom-u.top,u.x=u.left,u.y=u.top,u}function _(e){return e.split("-")[1]}function F(e){return["top","bottom"].indexOf(e)>=0?"x":"y"}function U(e){var t,n=e.reference,r=e.element,o=e.placement,i=o?C(o):null,a=o?_(o):null,s=n.x+n.width/2-r.width/2,f=n.y+n.height/2-r.height/2;switch(i){case j:t={x:s,y:n.y-r.height};break;case E:t={x:s,y:n.y+n.height};break;case D:t={x:n.x+n.width,y:f};break;case A:t={x:n.x-r.width,y:f};break;default:t={x:n.x,y:n.y}}var c=i?F(i):null;if(null!=c){var p="y"===c?"height":"width";switch(a){case M:t[c]=t[c]-(n[p]/2-r[p]/2);break;case k:t[c]=t[c]+(n[p]/2-r[p]/2)}}return t}function z(e){return Object.assign({},{top:0,right:0,bottom:0,left:0},e)}function X(e,t){return t.reduce((function(t,n){return t[n]=e,t}),{})}function Y(e,t){void 0===t&&(t={});var r=t,o=r.placement,i=void 0===o?e.placement:o,a=r.boundary,s=void 0===a?"clippingParents":a,c=r.rootBoundary,p=void 0===c?W:c,l=r.elementContext,d=void 0===l?B:l,h=r.altBoundary,m=void 0!==h&&h,v=r.padding,g=void 0===v?0:v,y=z("number"!=typeof g?g:X(g,P)),b=d===B?"reference":B,x=e.rects.popper,w=e.elements[m?b:d],O=I(n(w)?w:w.contextElement||u(e.elements.popper),s,p),A=f(e.elements.reference),L=U({reference:A,element:x,strategy:"absolute",placement:i}),M=V(Object.assign({},x,L)),k=d===B?M:A,H={top:O.top-k.top+y.top,bottom:k.bottom-O.bottom+y.bottom,left:O.left-k.left+y.left,right:k.right-O.right+y.right},T=e.modifiersData.offset;if(d===B&&T){var R=T[i];Object.keys(H).forEach((function(e){var t=[D,E].indexOf(e)>=0?1:-1,n=[j,E].indexOf(e)>=0?"y":"x";H[e]+=R[n]*t}))}return H}var G={placement:"bottom",modifiers:[],strategy:"absolute"};function J(){for(var e=arguments.length,t=new Array(e),n=0;n=0?-1:1,i="function"==typeof n?n(Object.assign({},t,{placement:e})):n,a=i[0],s=i[1];return a=a||0,s=(s||0)*o,[A,D].indexOf(r)>=0?{x:s,y:a}:{x:a,y:s}}(n,t.rects,i),e}),{}),s=a[t.placement],f=s.x,c=s.y;null!=t.modifiersData.popperOffsets&&(t.modifiersData.popperOffsets.x+=f,t.modifiersData.popperOffsets.y+=c),t.modifiersData[r]=a}},ie={left:"right",right:"left",bottom:"top",top:"bottom"};function ae(e){return e.replace(/left|right|bottom|top/g,(function(e){return ie[e]}))}var se={start:"end",end:"start"};function fe(e){return e.replace(/start|end/g,(function(e){return se[e]}))}function ce(e,t){void 0===t&&(t={});var n=t,r=n.placement,o=n.boundary,i=n.rootBoundary,a=n.padding,s=n.flipVariations,f=n.allowedAutoPlacements,c=void 0===f?T:f,p=_(r),u=p?s?H:H.filter((function(e){return _(e)===p})):P,l=u.filter((function(e){return c.indexOf(e)>=0}));0===l.length&&(l=u);var d=l.reduce((function(t,n){return t[n]=Y(e,{placement:n,boundary:o,rootBoundary:i,padding:a})[C(n)],t}),{});return Object.keys(d).sort((function(e,t){return d[e]-d[t]}))}var pe={name:"flip",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name;if(!t.modifiersData[r]._skip){for(var o=n.mainAxis,i=void 0===o||o,a=n.altAxis,s=void 0===a||a,f=n.fallbackPlacements,c=n.padding,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.flipVariations,h=void 0===d||d,m=n.allowedAutoPlacements,v=t.options.placement,g=C(v),y=f||(g===v||!h?[ae(v)]:function(e){if(C(e)===L)return[];var t=ae(e);return[fe(e),t,fe(t)]}(v)),b=[v].concat(y).reduce((function(e,n){return e.concat(C(n)===L?ce(t,{placement:n,boundary:p,rootBoundary:u,padding:c,flipVariations:h,allowedAutoPlacements:m}):n)}),[]),x=t.rects.reference,w=t.rects.popper,O=new Map,P=!0,k=b[0],W=0;W=0,S=R?"width":"height",q=Y(t,{placement:B,boundary:p,rootBoundary:u,altBoundary:l,padding:c}),V=R?T?D:A:T?E:j;x[S]>w[S]&&(V=ae(V));var N=ae(V),I=[];if(i&&I.push(q[H]<=0),s&&I.push(q[V]<=0,q[N]<=0),I.every((function(e){return e}))){k=B,P=!1;break}O.set(B,I)}if(P)for(var F=function(e){var t=b.find((function(t){var n=O.get(t);if(n)return n.slice(0,e).every((function(e){return e}))}));if(t)return k=t,"break"},U=h?3:1;U>0;U--){if("break"===F(U))break}t.placement!==k&&(t.modifiersData[r]._skip=!0,t.placement=k,t.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function ue(e,t,n){return i(e,a(t,n))}var le={name:"preventOverflow",enabled:!0,phase:"main",fn:function(e){var t=e.state,n=e.options,r=e.name,o=n.mainAxis,s=void 0===o||o,f=n.altAxis,c=void 0!==f&&f,p=n.boundary,u=n.rootBoundary,l=n.altBoundary,d=n.padding,h=n.tether,m=void 0===h||h,g=n.tetherOffset,y=void 0===g?0:g,b=Y(t,{boundary:p,rootBoundary:u,padding:d,altBoundary:l}),x=C(t.placement),w=_(t.placement),L=!w,P=F(x),k="x"===P?"y":"x",W=t.modifiersData.popperOffsets,B=t.rects.reference,H=t.rects.popper,T="function"==typeof y?y(Object.assign({},t.rects,{placement:t.placement})):y,R="number"==typeof T?{mainAxis:T,altAxis:T}:Object.assign({mainAxis:0,altAxis:0},T),S=t.modifiersData.offset?t.modifiersData.offset[t.placement]:null,q={x:0,y:0};if(W){if(s){var V,N="y"===P?j:A,I="y"===P?E:D,U="y"===P?"height":"width",z=W[P],X=z+b[N],G=z-b[I],J=m?-H[U]/2:0,K=w===M?B[U]:H[U],Q=w===M?-H[U]:-B[U],Z=t.elements.arrow,$=m&&Z?v(Z):{width:0,height:0},ee=t.modifiersData["arrow#persistent"]?t.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},te=ee[N],ne=ee[I],re=ue(0,B[U],$[U]),oe=L?B[U]/2-J-re-te-R.mainAxis:K-re-te-R.mainAxis,ie=L?-B[U]/2+J+re+ne+R.mainAxis:Q+re+ne+R.mainAxis,ae=t.elements.arrow&&O(t.elements.arrow),se=ae?"y"===P?ae.clientTop||0:ae.clientLeft||0:0,fe=null!=(V=null==S?void 0:S[P])?V:0,ce=z+ie-fe,pe=ue(m?a(X,z+oe-fe-se):X,z,m?i(G,ce):G);W[P]=pe,q[P]=pe-z}if(c){var le,de="x"===P?j:A,he="x"===P?E:D,me=W[k],ve="y"===k?"height":"width",ge=me+b[de],ye=me-b[he],be=-1!==[j,A].indexOf(x),xe=null!=(le=null==S?void 0:S[k])?le:0,we=be?ge:me-B[ve]-H[ve]-xe+R.altAxis,Oe=be?me+B[ve]+H[ve]-xe-R.altAxis:ye,je=m&&be?function(e,t,n){var r=ue(e,t,n);return r>n?n:r}(we,me,Oe):ue(m?we:ge,me,m?Oe:ye);W[k]=je,q[k]=je-me}t.modifiersData[r]=q}},requiresIfExists:["offset"]};var de={name:"arrow",enabled:!0,phase:"main",fn:function(e){var t,n=e.state,r=e.name,o=e.options,i=n.elements.arrow,a=n.modifiersData.popperOffsets,s=C(n.placement),f=F(s),c=[A,D].indexOf(s)>=0?"height":"width";if(i&&a){var p=function(e,t){return z("number"!=typeof(e="function"==typeof e?e(Object.assign({},t.rects,{placement:t.placement})):e)?e:X(e,P))}(o.padding,n),u=v(i),l="y"===f?j:A,d="y"===f?E:D,h=n.rects.reference[c]+n.rects.reference[f]-a[f]-n.rects.popper[c],m=a[f]-n.rects.reference[f],g=O(i),y=g?"y"===f?g.clientHeight||0:g.clientWidth||0:0,b=h/2-m/2,x=p[l],w=y-u[c]-p[d],L=y/2-u[c]/2+b,M=ue(x,L,w),k=f;n.modifiersData[r]=((t={})[k]=M,t.centerOffset=M-L,t)}},effect:function(e){var t=e.state,n=e.options.element,r=void 0===n?"[data-popper-arrow]":n;null!=r&&("string"!=typeof r||(r=t.elements.popper.querySelector(r)))&&q(t.elements.popper,r)&&(t.elements.arrow=r)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function he(e,t,n){return void 0===n&&(n={x:0,y:0}),{top:e.top-t.height-n.y,right:e.right-t.width+n.x,bottom:e.bottom-t.height+n.y,left:e.left-t.width-n.x}}function me(e){return[j,D,E,A].some((function(t){return e[t]>=0}))}var ve={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(e){var t=e.state,n=e.name,r=t.rects.reference,o=t.rects.popper,i=t.modifiersData.preventOverflow,a=Y(t,{elementContext:"reference"}),s=Y(t,{altBoundary:!0}),f=he(a,r),c=he(s,o,i),p=me(f),u=me(c);t.modifiersData[n]={referenceClippingOffsets:f,popperEscapeOffsets:c,isReferenceHidden:p,hasPopperEscaped:u},t.attributes.popper=Object.assign({},t.attributes.popper,{"data-popper-reference-hidden":p,"data-popper-escaped":u})}},ge=K({defaultModifiers:[Z,$,ne,re]}),ye=[Z,$,ne,re,oe,pe,le,de,ve],be=K({defaultModifiers:ye});e.applyStyles=re,e.arrow=de,e.computeStyles=ne,e.createPopper=be,e.createPopperLite=ge,e.defaultModifiers=ye,e.detectOverflow=Y,e.eventListeners=Z,e.flip=pe,e.hide=ve,e.offset=oe,e.popperGenerator=K,e.popperOffsets=$,e.preventOverflow=le,Object.defineProperty(e,"__esModule",{value:!0})})); + diff --git a/docs/site_libs/quarto-html/quarto-syntax-highlighting.css b/docs/site_libs/quarto-html/quarto-syntax-highlighting.css new file mode 100644 index 00000000..711fa298 --- /dev/null +++ b/docs/site_libs/quarto-html/quarto-syntax-highlighting.css @@ -0,0 +1,203 @@ +/* quarto syntax highlight colors */ +:root { + --quarto-hl-ot-color: #003B4F; + --quarto-hl-at-color: #657422; + --quarto-hl-ss-color: #20794D; + --quarto-hl-an-color: #5E5E5E; + --quarto-hl-fu-color: #4758AB; + --quarto-hl-st-color: #20794D; + --quarto-hl-cf-color: #003B4F; + --quarto-hl-op-color: #5E5E5E; + --quarto-hl-er-color: #AD0000; + --quarto-hl-bn-color: #AD0000; + --quarto-hl-al-color: #AD0000; + --quarto-hl-va-color: #111111; + --quarto-hl-bu-color: inherit; + --quarto-hl-ex-color: inherit; + --quarto-hl-pp-color: #AD0000; + --quarto-hl-in-color: #5E5E5E; + --quarto-hl-vs-color: #20794D; + --quarto-hl-wa-color: #5E5E5E; + --quarto-hl-do-color: #5E5E5E; + --quarto-hl-im-color: #00769E; + --quarto-hl-ch-color: #20794D; + --quarto-hl-dt-color: #AD0000; + --quarto-hl-fl-color: #AD0000; + --quarto-hl-co-color: #5E5E5E; + --quarto-hl-cv-color: #5E5E5E; + --quarto-hl-cn-color: #8f5902; + --quarto-hl-sc-color: #5E5E5E; + --quarto-hl-dv-color: #AD0000; + --quarto-hl-kw-color: #003B4F; +} + +/* other quarto variables */ +:root { + --quarto-font-monospace: "Source Code Pro", monospace; +} + +pre > code.sourceCode > span { + color: #003B4F; +} + +code span { + color: #003B4F; +} + +code.sourceCode > span { + color: #003B4F; +} + +div.sourceCode, +div.sourceCode pre.sourceCode { + color: #003B4F; +} + +code span.ot { + color: #003B4F; + font-style: inherit; +} + +code span.at { + color: #657422; + font-style: inherit; +} + +code span.ss { + color: #20794D; + font-style: inherit; +} + +code span.an { + color: #5E5E5E; + font-style: inherit; +} + +code span.fu { + color: #4758AB; + font-style: inherit; +} + +code span.st { + color: #20794D; + font-style: inherit; +} + +code span.cf { + color: #003B4F; + font-style: inherit; +} + +code span.op { + color: #5E5E5E; + font-style: inherit; +} + +code span.er { + color: #AD0000; + font-style: inherit; +} + +code span.bn { + color: #AD0000; + font-style: inherit; +} + +code span.al { + color: #AD0000; + font-style: inherit; +} + +code span.va { + color: #111111; + font-style: inherit; +} + +code span.bu { + font-style: inherit; +} + +code span.ex { + font-style: inherit; +} + +code span.pp { + color: #AD0000; + font-style: inherit; +} + +code span.in { + color: #5E5E5E; + font-style: inherit; +} + +code span.vs { + color: #20794D; + font-style: inherit; +} + +code span.wa { + color: #5E5E5E; + font-style: italic; +} + +code span.do { + color: #5E5E5E; + font-style: italic; +} + +code span.im { + color: #00769E; + font-style: inherit; +} + +code span.ch { + color: #20794D; + font-style: inherit; +} + +code span.dt { + color: #AD0000; + font-style: inherit; +} + +code span.fl { + color: #AD0000; + font-style: inherit; +} + +code span.co { + color: #5E5E5E; + font-style: inherit; +} + +code span.cv { + color: #5E5E5E; + font-style: italic; +} + +code span.cn { + color: #8f5902; + font-style: inherit; +} + +code span.sc { + color: #5E5E5E; + font-style: inherit; +} + +code span.dv { + color: #AD0000; + font-style: inherit; +} + +code span.kw { + color: #003B4F; + font-style: inherit; +} + +.prevent-inlining { + content: " { + // Find any conflicting margin elements and add margins to the + // top to prevent overlap + const marginChildren = window.document.querySelectorAll( + ".column-margin.column-container > * " + ); + + let lastBottom = 0; + for (const marginChild of marginChildren) { + if (marginChild.offsetParent !== null) { + // clear the top margin so we recompute it + marginChild.style.marginTop = null; + const top = marginChild.getBoundingClientRect().top + window.scrollY; + console.log({ + childtop: marginChild.getBoundingClientRect().top, + scroll: window.scrollY, + top, + lastBottom, + }); + if (top < lastBottom) { + const margin = lastBottom - top; + marginChild.style.marginTop = `${margin}px`; + } + const styles = window.getComputedStyle(marginChild); + const marginTop = parseFloat(styles["marginTop"]); + + console.log({ + top, + height: marginChild.getBoundingClientRect().height, + marginTop, + total: top + marginChild.getBoundingClientRect().height + marginTop, + }); + lastBottom = top + marginChild.getBoundingClientRect().height + marginTop; + } + } +}; + +window.document.addEventListener("DOMContentLoaded", function (_event) { + // Recompute the position of margin elements anytime the body size changes + if (window.ResizeObserver) { + const resizeObserver = new window.ResizeObserver( + throttle(layoutMarginEls, 50) + ); + resizeObserver.observe(window.document.body); + } + + const tocEl = window.document.querySelector('nav.toc-active[role="doc-toc"]'); + const sidebarEl = window.document.getElementById("quarto-sidebar"); + const leftTocEl = window.document.getElementById("quarto-sidebar-toc-left"); + const marginSidebarEl = window.document.getElementById( + "quarto-margin-sidebar" + ); + // function to determine whether the element has a previous sibling that is active + const prevSiblingIsActiveLink = (el) => { + const sibling = el.previousElementSibling; + if (sibling && sibling.tagName === "A") { + return sibling.classList.contains("active"); + } else { + return false; + } + }; + + // fire slideEnter for bootstrap tab activations (for htmlwidget resize behavior) + function fireSlideEnter(e) { + const event = window.document.createEvent("Event"); + event.initEvent("slideenter", true, true); + window.document.dispatchEvent(event); + } + const tabs = window.document.querySelectorAll('a[data-bs-toggle="tab"]'); + tabs.forEach((tab) => { + tab.addEventListener("shown.bs.tab", fireSlideEnter); + }); + + // fire slideEnter for tabby tab activations (for htmlwidget resize behavior) + document.addEventListener("tabby", fireSlideEnter, false); + + // Track scrolling and mark TOC links as active + // get table of contents and sidebar (bail if we don't have at least one) + const tocLinks = tocEl + ? [...tocEl.querySelectorAll("a[data-scroll-target]")] + : []; + const makeActive = (link) => tocLinks[link].classList.add("active"); + const removeActive = (link) => tocLinks[link].classList.remove("active"); + const removeAllActive = () => + [...Array(tocLinks.length).keys()].forEach((link) => removeActive(link)); + + // activate the anchor for a section associated with this TOC entry + tocLinks.forEach((link) => { + link.addEventListener("click", () => { + if (link.href.indexOf("#") !== -1) { + const anchor = link.href.split("#")[1]; + const heading = window.document.querySelector( + `[data-anchor-id=${anchor}]` + ); + if (heading) { + // Add the class + heading.classList.add("reveal-anchorjs-link"); + + // function to show the anchor + const handleMouseout = () => { + heading.classList.remove("reveal-anchorjs-link"); + heading.removeEventListener("mouseout", handleMouseout); + }; + + // add a function to clear the anchor when the user mouses out of it + heading.addEventListener("mouseout", handleMouseout); + } + } + }); + }); + + const sections = tocLinks.map((link) => { + const target = link.getAttribute("data-scroll-target"); + if (target.startsWith("#")) { + return window.document.getElementById(decodeURI(`${target.slice(1)}`)); + } else { + return window.document.querySelector(decodeURI(`${target}`)); + } + }); + + const sectionMargin = 200; + let currentActive = 0; + // track whether we've initialized state the first time + let init = false; + + const updateActiveLink = () => { + // The index from bottom to top (e.g. reversed list) + let sectionIndex = -1; + if ( + window.innerHeight + window.pageYOffset >= + window.document.body.offsetHeight + ) { + sectionIndex = 0; + } else { + sectionIndex = [...sections].reverse().findIndex((section) => { + if (section) { + return window.pageYOffset >= section.offsetTop - sectionMargin; + } else { + return false; + } + }); + } + if (sectionIndex > -1) { + const current = sections.length - sectionIndex - 1; + if (current !== currentActive) { + removeAllActive(); + currentActive = current; + makeActive(current); + if (init) { + window.dispatchEvent(sectionChanged); + } + init = true; + } + } + }; + + const inHiddenRegion = (top, bottom, hiddenRegions) => { + for (const region of hiddenRegions) { + if (top <= region.bottom && bottom >= region.top) { + return true; + } + } + return false; + }; + + const categorySelector = "header.quarto-title-block .quarto-category"; + const activateCategories = (href) => { + // Find any categories + // Surround them with a link pointing back to: + // #category=Authoring + try { + const categoryEls = window.document.querySelectorAll(categorySelector); + for (const categoryEl of categoryEls) { + const categoryText = categoryEl.textContent; + if (categoryText) { + const link = `${href}#category=${encodeURIComponent(categoryText)}`; + const linkEl = window.document.createElement("a"); + linkEl.setAttribute("href", link); + for (const child of categoryEl.childNodes) { + linkEl.append(child); + } + categoryEl.appendChild(linkEl); + } + } + } catch { + // Ignore errors + } + }; + function hasTitleCategories() { + return window.document.querySelector(categorySelector) !== null; + } + + function offsetRelativeUrl(url) { + const offset = getMeta("quarto:offset"); + return offset ? offset + url : url; + } + + function offsetAbsoluteUrl(url) { + const offset = getMeta("quarto:offset"); + const baseUrl = new URL(offset, window.location); + + const projRelativeUrl = url.replace(baseUrl, ""); + if (projRelativeUrl.startsWith("/")) { + return projRelativeUrl; + } else { + return "/" + projRelativeUrl; + } + } + + // read a meta tag value + function getMeta(metaName) { + const metas = window.document.getElementsByTagName("meta"); + for (let i = 0; i < metas.length; i++) { + if (metas[i].getAttribute("name") === metaName) { + return metas[i].getAttribute("content"); + } + } + return ""; + } + + async function findAndActivateCategories() { + const currentPagePath = offsetAbsoluteUrl(window.location.href); + const response = await fetch(offsetRelativeUrl("listings.json")); + if (response.status == 200) { + return response.json().then(function (listingPaths) { + const listingHrefs = []; + for (const listingPath of listingPaths) { + const pathWithoutLeadingSlash = listingPath.listing.substring(1); + for (const item of listingPath.items) { + if ( + item === currentPagePath || + item === currentPagePath + "index.html" + ) { + // Resolve this path against the offset to be sure + // we already are using the correct path to the listing + // (this adjusts the listing urls to be rooted against + // whatever root the page is actually running against) + const relative = offsetRelativeUrl(pathWithoutLeadingSlash); + const baseUrl = window.location; + const resolvedPath = new URL(relative, baseUrl); + listingHrefs.push(resolvedPath.pathname); + break; + } + } + } + + // Look up the tree for a nearby linting and use that if we find one + const nearestListing = findNearestParentListing( + offsetAbsoluteUrl(window.location.pathname), + listingHrefs + ); + if (nearestListing) { + activateCategories(nearestListing); + } else { + // See if the referrer is a listing page for this item + const referredRelativePath = offsetAbsoluteUrl(document.referrer); + const referrerListing = listingHrefs.find((listingHref) => { + const isListingReferrer = + listingHref === referredRelativePath || + listingHref === referredRelativePath + "index.html"; + return isListingReferrer; + }); + + if (referrerListing) { + // Try to use the referrer if possible + activateCategories(referrerListing); + } else if (listingHrefs.length > 0) { + // Otherwise, just fall back to the first listing + activateCategories(listingHrefs[0]); + } + } + }); + } + } + if (hasTitleCategories()) { + findAndActivateCategories(); + } + + const findNearestParentListing = (href, listingHrefs) => { + if (!href || !listingHrefs) { + return undefined; + } + // Look up the tree for a nearby linting and use that if we find one + const relativeParts = href.substring(1).split("/"); + while (relativeParts.length > 0) { + const path = relativeParts.join("/"); + for (const listingHref of listingHrefs) { + if (listingHref.startsWith(path)) { + return listingHref; + } + } + relativeParts.pop(); + } + + return undefined; + }; + + const manageSidebarVisiblity = (el, placeholderDescriptor) => { + let isVisible = true; + let elRect; + + return (hiddenRegions) => { + if (el === null) { + return; + } + + // Find the last element of the TOC + const lastChildEl = el.lastElementChild; + + if (lastChildEl) { + // Converts the sidebar to a menu + const convertToMenu = () => { + for (const child of el.children) { + child.style.opacity = 0; + child.style.overflow = "hidden"; + } + + nexttick(() => { + const toggleContainer = window.document.createElement("div"); + toggleContainer.style.width = "100%"; + toggleContainer.classList.add("zindex-over-content"); + toggleContainer.classList.add("quarto-sidebar-toggle"); + toggleContainer.classList.add("headroom-target"); // Marks this to be managed by headeroom + toggleContainer.id = placeholderDescriptor.id; + toggleContainer.style.position = "fixed"; + + const toggleIcon = window.document.createElement("i"); + toggleIcon.classList.add("quarto-sidebar-toggle-icon"); + toggleIcon.classList.add("bi"); + toggleIcon.classList.add("bi-caret-down-fill"); + + const toggleTitle = window.document.createElement("div"); + const titleEl = window.document.body.querySelector( + placeholderDescriptor.titleSelector + ); + if (titleEl) { + toggleTitle.append( + titleEl.textContent || titleEl.innerText, + toggleIcon + ); + } + toggleTitle.classList.add("zindex-over-content"); + toggleTitle.classList.add("quarto-sidebar-toggle-title"); + toggleContainer.append(toggleTitle); + + const toggleContents = window.document.createElement("div"); + toggleContents.classList = el.classList; + toggleContents.classList.add("zindex-over-content"); + toggleContents.classList.add("quarto-sidebar-toggle-contents"); + for (const child of el.children) { + if (child.id === "toc-title") { + continue; + } + + const clone = child.cloneNode(true); + clone.style.opacity = 1; + clone.style.display = null; + toggleContents.append(clone); + } + toggleContents.style.height = "0px"; + const positionToggle = () => { + // position the element (top left of parent, same width as parent) + if (!elRect) { + elRect = el.getBoundingClientRect(); + } + toggleContainer.style.left = `${elRect.left}px`; + toggleContainer.style.top = `${elRect.top}px`; + toggleContainer.style.width = `${elRect.width}px`; + }; + positionToggle(); + + toggleContainer.append(toggleContents); + el.parentElement.prepend(toggleContainer); + + // Process clicks + let tocShowing = false; + // Allow the caller to control whether this is dismissed + // when it is clicked (e.g. sidebar navigation supports + // opening and closing the nav tree, so don't dismiss on click) + const clickEl = placeholderDescriptor.dismissOnClick + ? toggleContainer + : toggleTitle; + + const closeToggle = () => { + if (tocShowing) { + toggleContainer.classList.remove("expanded"); + toggleContents.style.height = "0px"; + tocShowing = false; + } + }; + + // Get rid of any expanded toggle if the user scrolls + window.document.addEventListener( + "scroll", + throttle(() => { + closeToggle(); + }, 50) + ); + + // Handle positioning of the toggle + window.addEventListener( + "resize", + throttle(() => { + elRect = undefined; + positionToggle(); + }, 50) + ); + + window.addEventListener("quarto-hrChanged", () => { + elRect = undefined; + }); + + // Process the click + clickEl.onclick = () => { + if (!tocShowing) { + toggleContainer.classList.add("expanded"); + toggleContents.style.height = null; + tocShowing = true; + } else { + closeToggle(); + } + }; + }); + }; + + // Converts a sidebar from a menu back to a sidebar + const convertToSidebar = () => { + for (const child of el.children) { + child.style.opacity = 1; + child.style.overflow = null; + } + + const placeholderEl = window.document.getElementById( + placeholderDescriptor.id + ); + if (placeholderEl) { + placeholderEl.remove(); + } + + el.classList.remove("rollup"); + }; + + if (isReaderMode()) { + convertToMenu(); + isVisible = false; + } else { + // Find the top and bottom o the element that is being managed + const elTop = el.offsetTop; + const elBottom = + elTop + lastChildEl.offsetTop + lastChildEl.offsetHeight; + + if (!isVisible) { + // If the element is current not visible reveal if there are + // no conflicts with overlay regions + if (!inHiddenRegion(elTop, elBottom, hiddenRegions)) { + convertToSidebar(); + isVisible = true; + } + } else { + // If the element is visible, hide it if it conflicts with overlay regions + // and insert a placeholder toggle (or if we're in reader mode) + if (inHiddenRegion(elTop, elBottom, hiddenRegions)) { + convertToMenu(); + isVisible = false; + } + } + } + } + }; + }; + + const tabEls = document.querySelectorAll('a[data-bs-toggle="tab"]'); + for (const tabEl of tabEls) { + const id = tabEl.getAttribute("data-bs-target"); + if (id) { + const columnEl = document.querySelector( + `${id} .column-margin, .tabset-margin-content` + ); + if (columnEl) + tabEl.addEventListener("shown.bs.tab", function (event) { + const el = event.srcElement; + if (el) { + const visibleCls = `${el.id}-margin-content`; + // walk up until we find a parent tabset + let panelTabsetEl = el.parentElement; + while (panelTabsetEl) { + if (panelTabsetEl.classList.contains("panel-tabset")) { + break; + } + panelTabsetEl = panelTabsetEl.parentElement; + } + + if (panelTabsetEl) { + const prevSib = panelTabsetEl.previousElementSibling; + if ( + prevSib && + prevSib.classList.contains("tabset-margin-container") + ) { + const childNodes = prevSib.querySelectorAll( + ".tabset-margin-content" + ); + for (const childEl of childNodes) { + if (childEl.classList.contains(visibleCls)) { + childEl.classList.remove("collapse"); + } else { + childEl.classList.add("collapse"); + } + } + } + } + } + + layoutMarginEls(); + }); + } + } + + // Manage the visibility of the toc and the sidebar + const marginScrollVisibility = manageSidebarVisiblity(marginSidebarEl, { + id: "quarto-toc-toggle", + titleSelector: "#toc-title", + dismissOnClick: true, + }); + const sidebarScrollVisiblity = manageSidebarVisiblity(sidebarEl, { + id: "quarto-sidebarnav-toggle", + titleSelector: ".title", + dismissOnClick: false, + }); + let tocLeftScrollVisibility; + if (leftTocEl) { + tocLeftScrollVisibility = manageSidebarVisiblity(leftTocEl, { + id: "quarto-lefttoc-toggle", + titleSelector: "#toc-title", + dismissOnClick: true, + }); + } + + // Find the first element that uses formatting in special columns + const conflictingEls = window.document.body.querySelectorAll( + '[class^="column-"], [class*=" column-"], aside, [class*="margin-caption"], [class*=" margin-caption"], [class*="margin-ref"], [class*=" margin-ref"]' + ); + + // Filter all the possibly conflicting elements into ones + // the do conflict on the left or ride side + const arrConflictingEls = Array.from(conflictingEls); + const leftSideConflictEls = arrConflictingEls.filter((el) => { + if (el.tagName === "ASIDE") { + return false; + } + return Array.from(el.classList).find((className) => { + return ( + className !== "column-body" && + className.startsWith("column-") && + !className.endsWith("right") && + !className.endsWith("container") && + className !== "column-margin" + ); + }); + }); + const rightSideConflictEls = arrConflictingEls.filter((el) => { + if (el.tagName === "ASIDE") { + return true; + } + + const hasMarginCaption = Array.from(el.classList).find((className) => { + return className == "margin-caption"; + }); + if (hasMarginCaption) { + return true; + } + + return Array.from(el.classList).find((className) => { + return ( + className !== "column-body" && + !className.endsWith("container") && + className.startsWith("column-") && + !className.endsWith("left") + ); + }); + }); + + const kOverlapPaddingSize = 10; + function toRegions(els) { + return els.map((el) => { + const boundRect = el.getBoundingClientRect(); + const top = + boundRect.top + + document.documentElement.scrollTop - + kOverlapPaddingSize; + return { + top, + bottom: top + el.scrollHeight + 2 * kOverlapPaddingSize, + }; + }); + } + + let hasObserved = false; + const visibleItemObserver = (els) => { + let visibleElements = [...els]; + const intersectionObserver = new IntersectionObserver( + (entries, _observer) => { + entries.forEach((entry) => { + if (entry.isIntersecting) { + if (visibleElements.indexOf(entry.target) === -1) { + visibleElements.push(entry.target); + } + } else { + visibleElements = visibleElements.filter((visibleEntry) => { + return visibleEntry !== entry; + }); + } + }); + + if (!hasObserved) { + hideOverlappedSidebars(); + } + hasObserved = true; + }, + {} + ); + els.forEach((el) => { + intersectionObserver.observe(el); + }); + + return { + getVisibleEntries: () => { + return visibleElements; + }, + }; + }; + + const rightElementObserver = visibleItemObserver(rightSideConflictEls); + const leftElementObserver = visibleItemObserver(leftSideConflictEls); + + const hideOverlappedSidebars = () => { + marginScrollVisibility(toRegions(rightElementObserver.getVisibleEntries())); + sidebarScrollVisiblity(toRegions(leftElementObserver.getVisibleEntries())); + if (tocLeftScrollVisibility) { + tocLeftScrollVisibility( + toRegions(leftElementObserver.getVisibleEntries()) + ); + } + }; + + window.quartoToggleReader = () => { + // Applies a slow class (or removes it) + // to update the transition speed + const slowTransition = (slow) => { + const manageTransition = (id, slow) => { + const el = document.getElementById(id); + if (el) { + if (slow) { + el.classList.add("slow"); + } else { + el.classList.remove("slow"); + } + } + }; + + manageTransition("TOC", slow); + manageTransition("quarto-sidebar", slow); + }; + const readerMode = !isReaderMode(); + setReaderModeValue(readerMode); + + // If we're entering reader mode, slow the transition + if (readerMode) { + slowTransition(readerMode); + } + highlightReaderToggle(readerMode); + hideOverlappedSidebars(); + + // If we're exiting reader mode, restore the non-slow transition + if (!readerMode) { + slowTransition(!readerMode); + } + }; + + const highlightReaderToggle = (readerMode) => { + const els = document.querySelectorAll(".quarto-reader-toggle"); + if (els) { + els.forEach((el) => { + if (readerMode) { + el.classList.add("reader"); + } else { + el.classList.remove("reader"); + } + }); + } + }; + + const setReaderModeValue = (val) => { + if (window.location.protocol !== "file:") { + window.localStorage.setItem("quarto-reader-mode", val); + } else { + localReaderMode = val; + } + }; + + const isReaderMode = () => { + if (window.location.protocol !== "file:") { + return window.localStorage.getItem("quarto-reader-mode") === "true"; + } else { + return localReaderMode; + } + }; + let localReaderMode = null; + + const tocOpenDepthStr = tocEl?.getAttribute("data-toc-expanded"); + const tocOpenDepth = tocOpenDepthStr ? Number(tocOpenDepthStr) : 1; + + // Walk the TOC and collapse/expand nodes + // Nodes are expanded if: + // - they are top level + // - they have children that are 'active' links + // - they are directly below an link that is 'active' + const walk = (el, depth) => { + // Tick depth when we enter a UL + if (el.tagName === "UL") { + depth = depth + 1; + } + + // It this is active link + let isActiveNode = false; + if (el.tagName === "A" && el.classList.contains("active")) { + isActiveNode = true; + } + + // See if there is an active child to this element + let hasActiveChild = false; + for (child of el.children) { + hasActiveChild = walk(child, depth) || hasActiveChild; + } + + // Process the collapse state if this is an UL + if (el.tagName === "UL") { + if (tocOpenDepth === -1 && depth > 1) { + el.classList.add("collapse"); + } else if ( + depth <= tocOpenDepth || + hasActiveChild || + prevSiblingIsActiveLink(el) + ) { + el.classList.remove("collapse"); + } else { + el.classList.add("collapse"); + } + + // untick depth when we leave a UL + depth = depth - 1; + } + return hasActiveChild || isActiveNode; + }; + + // walk the TOC and expand / collapse any items that should be shown + + if (tocEl) { + walk(tocEl, 0); + updateActiveLink(); + } + + // Throttle the scroll event and walk peridiocally + window.document.addEventListener( + "scroll", + throttle(() => { + if (tocEl) { + updateActiveLink(); + walk(tocEl, 0); + } + if (!isReaderMode()) { + hideOverlappedSidebars(); + } + }, 5) + ); + window.addEventListener( + "resize", + throttle(() => { + if (!isReaderMode()) { + hideOverlappedSidebars(); + } + }, 10) + ); + hideOverlappedSidebars(); + highlightReaderToggle(isReaderMode()); +}); + +// grouped tabsets +window.addEventListener("pageshow", (_event) => { + function getTabSettings() { + const data = localStorage.getItem("quarto-persistent-tabsets-data"); + if (!data) { + localStorage.setItem("quarto-persistent-tabsets-data", "{}"); + return {}; + } + if (data) { + return JSON.parse(data); + } + } + + function setTabSettings(data) { + localStorage.setItem( + "quarto-persistent-tabsets-data", + JSON.stringify(data) + ); + } + + function setTabState(groupName, groupValue) { + const data = getTabSettings(); + data[groupName] = groupValue; + setTabSettings(data); + } + + function toggleTab(tab, active) { + const tabPanelId = tab.getAttribute("aria-controls"); + const tabPanel = document.getElementById(tabPanelId); + if (active) { + tab.classList.add("active"); + tabPanel.classList.add("active"); + } else { + tab.classList.remove("active"); + tabPanel.classList.remove("active"); + } + } + + function toggleAll(selectedGroup, selectorsToSync) { + for (const [thisGroup, tabs] of Object.entries(selectorsToSync)) { + const active = selectedGroup === thisGroup; + for (const tab of tabs) { + toggleTab(tab, active); + } + } + } + + function findSelectorsToSyncByLanguage() { + const result = {}; + const tabs = Array.from( + document.querySelectorAll(`div[data-group] a[id^='tabset-']`) + ); + for (const item of tabs) { + const div = item.parentElement.parentElement.parentElement; + const group = div.getAttribute("data-group"); + if (!result[group]) { + result[group] = {}; + } + const selectorsToSync = result[group]; + const value = item.innerHTML; + if (!selectorsToSync[value]) { + selectorsToSync[value] = []; + } + selectorsToSync[value].push(item); + } + return result; + } + + function setupSelectorSync() { + const selectorsToSync = findSelectorsToSyncByLanguage(); + Object.entries(selectorsToSync).forEach(([group, tabSetsByValue]) => { + Object.entries(tabSetsByValue).forEach(([value, items]) => { + items.forEach((item) => { + item.addEventListener("click", (_event) => { + setTabState(group, value); + toggleAll(value, selectorsToSync[group]); + }); + }); + }); + }); + return selectorsToSync; + } + + const selectorsToSync = setupSelectorSync(); + for (const [group, selectedName] of Object.entries(getTabSettings())) { + const selectors = selectorsToSync[group]; + // it's possible that stale state gives us empty selections, so we explicitly check here. + if (selectors) { + toggleAll(selectedName, selectors); + } + } +}); + +function throttle(func, wait) { + let waiting = false; + return function () { + if (!waiting) { + func.apply(this, arguments); + waiting = true; + setTimeout(function () { + waiting = false; + }, wait); + } + }; +} + +function nexttick(func) { + return setTimeout(func, 0); +} diff --git a/docs/site_libs/quarto-html/tippy.css b/docs/site_libs/quarto-html/tippy.css new file mode 100644 index 00000000..e6ae635c --- /dev/null +++ b/docs/site_libs/quarto-html/tippy.css @@ -0,0 +1 @@ +.tippy-box[data-animation=fade][data-state=hidden]{opacity:0}[data-tippy-root]{max-width:calc(100vw - 10px)}.tippy-box{position:relative;background-color:#333;color:#fff;border-radius:4px;font-size:14px;line-height:1.4;white-space:normal;outline:0;transition-property:transform,visibility,opacity}.tippy-box[data-placement^=top]>.tippy-arrow{bottom:0}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-7px;left:0;border-width:8px 8px 0;border-top-color:initial;transform-origin:center top}.tippy-box[data-placement^=bottom]>.tippy-arrow{top:0}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-7px;left:0;border-width:0 8px 8px;border-bottom-color:initial;transform-origin:center bottom}.tippy-box[data-placement^=left]>.tippy-arrow{right:0}.tippy-box[data-placement^=left]>.tippy-arrow:before{border-width:8px 0 8px 8px;border-left-color:initial;right:-7px;transform-origin:center left}.tippy-box[data-placement^=right]>.tippy-arrow{left:0}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-7px;border-width:8px 8px 8px 0;border-right-color:initial;transform-origin:center right}.tippy-box[data-inertia][data-state=visible]{transition-timing-function:cubic-bezier(.54,1.5,.38,1.11)}.tippy-arrow{width:16px;height:16px;color:#333}.tippy-arrow:before{content:"";position:absolute;border-color:transparent;border-style:solid}.tippy-content{position:relative;padding:5px 9px;z-index:1} \ No newline at end of file diff --git a/docs/site_libs/quarto-html/tippy.umd.min.js b/docs/site_libs/quarto-html/tippy.umd.min.js new file mode 100644 index 00000000..ca292be3 --- /dev/null +++ b/docs/site_libs/quarto-html/tippy.umd.min.js @@ -0,0 +1,2 @@ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t(require("@popperjs/core")):"function"==typeof define&&define.amd?define(["@popperjs/core"],t):(e=e||self).tippy=t(e.Popper)}(this,(function(e){"use strict";var t={passive:!0,capture:!0},n=function(){return document.body};function r(e,t,n){if(Array.isArray(e)){var r=e[t];return null==r?Array.isArray(n)?n[t]:n:r}return e}function o(e,t){var n={}.toString.call(e);return 0===n.indexOf("[object")&&n.indexOf(t+"]")>-1}function i(e,t){return"function"==typeof e?e.apply(void 0,t):e}function a(e,t){return 0===t?e:function(r){clearTimeout(n),n=setTimeout((function(){e(r)}),t)};var n}function s(e,t){var n=Object.assign({},e);return t.forEach((function(e){delete n[e]})),n}function u(e){return[].concat(e)}function c(e,t){-1===e.indexOf(t)&&e.push(t)}function p(e){return e.split("-")[0]}function f(e){return[].slice.call(e)}function l(e){return Object.keys(e).reduce((function(t,n){return void 0!==e[n]&&(t[n]=e[n]),t}),{})}function d(){return document.createElement("div")}function v(e){return["Element","Fragment"].some((function(t){return o(e,t)}))}function m(e){return o(e,"MouseEvent")}function g(e){return!(!e||!e._tippy||e._tippy.reference!==e)}function h(e){return v(e)?[e]:function(e){return o(e,"NodeList")}(e)?f(e):Array.isArray(e)?e:f(document.querySelectorAll(e))}function b(e,t){e.forEach((function(e){e&&(e.style.transitionDuration=t+"ms")}))}function y(e,t){e.forEach((function(e){e&&e.setAttribute("data-state",t)}))}function w(e){var t,n=u(e)[0];return null!=n&&null!=(t=n.ownerDocument)&&t.body?n.ownerDocument:document}function E(e,t,n){var r=t+"EventListener";["transitionend","webkitTransitionEnd"].forEach((function(t){e[r](t,n)}))}function O(e,t){for(var n=t;n;){var r;if(e.contains(n))return!0;n=null==n.getRootNode||null==(r=n.getRootNode())?void 0:r.host}return!1}var x={isTouch:!1},C=0;function T(){x.isTouch||(x.isTouch=!0,window.performance&&document.addEventListener("mousemove",A))}function A(){var e=performance.now();e-C<20&&(x.isTouch=!1,document.removeEventListener("mousemove",A)),C=e}function L(){var e=document.activeElement;if(g(e)){var t=e._tippy;e.blur&&!t.state.isVisible&&e.blur()}}var D=!!("undefined"!=typeof window&&"undefined"!=typeof document)&&!!window.msCrypto,R=Object.assign({appendTo:n,aria:{content:"auto",expanded:"auto"},delay:0,duration:[300,250],getReferenceClientRect:null,hideOnClick:!0,ignoreAttributes:!1,interactive:!1,interactiveBorder:2,interactiveDebounce:0,moveTransition:"",offset:[0,10],onAfterUpdate:function(){},onBeforeUpdate:function(){},onCreate:function(){},onDestroy:function(){},onHidden:function(){},onHide:function(){},onMount:function(){},onShow:function(){},onShown:function(){},onTrigger:function(){},onUntrigger:function(){},onClickOutside:function(){},placement:"top",plugins:[],popperOptions:{},render:null,showOnCreate:!1,touch:!0,trigger:"mouseenter focus",triggerTarget:null},{animateFill:!1,followCursor:!1,inlinePositioning:!1,sticky:!1},{allowHTML:!1,animation:"fade",arrow:!0,content:"",inertia:!1,maxWidth:350,role:"tooltip",theme:"",zIndex:9999}),k=Object.keys(R);function P(e){var t=(e.plugins||[]).reduce((function(t,n){var r,o=n.name,i=n.defaultValue;o&&(t[o]=void 0!==e[o]?e[o]:null!=(r=R[o])?r:i);return t}),{});return Object.assign({},e,t)}function j(e,t){var n=Object.assign({},t,{content:i(t.content,[e])},t.ignoreAttributes?{}:function(e,t){return(t?Object.keys(P(Object.assign({},R,{plugins:t}))):k).reduce((function(t,n){var r=(e.getAttribute("data-tippy-"+n)||"").trim();if(!r)return t;if("content"===n)t[n]=r;else try{t[n]=JSON.parse(r)}catch(e){t[n]=r}return t}),{})}(e,t.plugins));return n.aria=Object.assign({},R.aria,n.aria),n.aria={expanded:"auto"===n.aria.expanded?t.interactive:n.aria.expanded,content:"auto"===n.aria.content?t.interactive?null:"describedby":n.aria.content},n}function M(e,t){e.innerHTML=t}function V(e){var t=d();return!0===e?t.className="tippy-arrow":(t.className="tippy-svg-arrow",v(e)?t.appendChild(e):M(t,e)),t}function I(e,t){v(t.content)?(M(e,""),e.appendChild(t.content)):"function"!=typeof t.content&&(t.allowHTML?M(e,t.content):e.textContent=t.content)}function S(e){var t=e.firstElementChild,n=f(t.children);return{box:t,content:n.find((function(e){return e.classList.contains("tippy-content")})),arrow:n.find((function(e){return e.classList.contains("tippy-arrow")||e.classList.contains("tippy-svg-arrow")})),backdrop:n.find((function(e){return e.classList.contains("tippy-backdrop")}))}}function N(e){var t=d(),n=d();n.className="tippy-box",n.setAttribute("data-state","hidden"),n.setAttribute("tabindex","-1");var r=d();function o(n,r){var o=S(t),i=o.box,a=o.content,s=o.arrow;r.theme?i.setAttribute("data-theme",r.theme):i.removeAttribute("data-theme"),"string"==typeof r.animation?i.setAttribute("data-animation",r.animation):i.removeAttribute("data-animation"),r.inertia?i.setAttribute("data-inertia",""):i.removeAttribute("data-inertia"),i.style.maxWidth="number"==typeof r.maxWidth?r.maxWidth+"px":r.maxWidth,r.role?i.setAttribute("role",r.role):i.removeAttribute("role"),n.content===r.content&&n.allowHTML===r.allowHTML||I(a,e.props),r.arrow?s?n.arrow!==r.arrow&&(i.removeChild(s),i.appendChild(V(r.arrow))):i.appendChild(V(r.arrow)):s&&i.removeChild(s)}return r.className="tippy-content",r.setAttribute("data-state","hidden"),I(r,e.props),t.appendChild(n),n.appendChild(r),o(e.props,e.props),{popper:t,onUpdate:o}}N.$$tippy=!0;var B=1,H=[],U=[];function _(o,s){var v,g,h,C,T,A,L,k,M=j(o,Object.assign({},R,P(l(s)))),V=!1,I=!1,N=!1,_=!1,F=[],W=a(we,M.interactiveDebounce),X=B++,Y=(k=M.plugins).filter((function(e,t){return k.indexOf(e)===t})),$={id:X,reference:o,popper:d(),popperInstance:null,props:M,state:{isEnabled:!0,isVisible:!1,isDestroyed:!1,isMounted:!1,isShown:!1},plugins:Y,clearDelayTimeouts:function(){clearTimeout(v),clearTimeout(g),cancelAnimationFrame(h)},setProps:function(e){if($.state.isDestroyed)return;ae("onBeforeUpdate",[$,e]),be();var t=$.props,n=j(o,Object.assign({},t,l(e),{ignoreAttributes:!0}));$.props=n,he(),t.interactiveDebounce!==n.interactiveDebounce&&(ce(),W=a(we,n.interactiveDebounce));t.triggerTarget&&!n.triggerTarget?u(t.triggerTarget).forEach((function(e){e.removeAttribute("aria-expanded")})):n.triggerTarget&&o.removeAttribute("aria-expanded");ue(),ie(),J&&J(t,n);$.popperInstance&&(Ce(),Ae().forEach((function(e){requestAnimationFrame(e._tippy.popperInstance.forceUpdate)})));ae("onAfterUpdate",[$,e])},setContent:function(e){$.setProps({content:e})},show:function(){var e=$.state.isVisible,t=$.state.isDestroyed,o=!$.state.isEnabled,a=x.isTouch&&!$.props.touch,s=r($.props.duration,0,R.duration);if(e||t||o||a)return;if(te().hasAttribute("disabled"))return;if(ae("onShow",[$],!1),!1===$.props.onShow($))return;$.state.isVisible=!0,ee()&&(z.style.visibility="visible");ie(),de(),$.state.isMounted||(z.style.transition="none");if(ee()){var u=re(),p=u.box,f=u.content;b([p,f],0)}A=function(){var e;if($.state.isVisible&&!_){if(_=!0,z.offsetHeight,z.style.transition=$.props.moveTransition,ee()&&$.props.animation){var t=re(),n=t.box,r=t.content;b([n,r],s),y([n,r],"visible")}se(),ue(),c(U,$),null==(e=$.popperInstance)||e.forceUpdate(),ae("onMount",[$]),$.props.animation&&ee()&&function(e,t){me(e,t)}(s,(function(){$.state.isShown=!0,ae("onShown",[$])}))}},function(){var e,t=$.props.appendTo,r=te();e=$.props.interactive&&t===n||"parent"===t?r.parentNode:i(t,[r]);e.contains(z)||e.appendChild(z);$.state.isMounted=!0,Ce()}()},hide:function(){var e=!$.state.isVisible,t=$.state.isDestroyed,n=!$.state.isEnabled,o=r($.props.duration,1,R.duration);if(e||t||n)return;if(ae("onHide",[$],!1),!1===$.props.onHide($))return;$.state.isVisible=!1,$.state.isShown=!1,_=!1,V=!1,ee()&&(z.style.visibility="hidden");if(ce(),ve(),ie(!0),ee()){var i=re(),a=i.box,s=i.content;$.props.animation&&(b([a,s],o),y([a,s],"hidden"))}se(),ue(),$.props.animation?ee()&&function(e,t){me(e,(function(){!$.state.isVisible&&z.parentNode&&z.parentNode.contains(z)&&t()}))}(o,$.unmount):$.unmount()},hideWithInteractivity:function(e){ne().addEventListener("mousemove",W),c(H,W),W(e)},enable:function(){$.state.isEnabled=!0},disable:function(){$.hide(),$.state.isEnabled=!1},unmount:function(){$.state.isVisible&&$.hide();if(!$.state.isMounted)return;Te(),Ae().forEach((function(e){e._tippy.unmount()})),z.parentNode&&z.parentNode.removeChild(z);U=U.filter((function(e){return e!==$})),$.state.isMounted=!1,ae("onHidden",[$])},destroy:function(){if($.state.isDestroyed)return;$.clearDelayTimeouts(),$.unmount(),be(),delete o._tippy,$.state.isDestroyed=!0,ae("onDestroy",[$])}};if(!M.render)return $;var q=M.render($),z=q.popper,J=q.onUpdate;z.setAttribute("data-tippy-root",""),z.id="tippy-"+$.id,$.popper=z,o._tippy=$,z._tippy=$;var G=Y.map((function(e){return e.fn($)})),K=o.hasAttribute("aria-expanded");return he(),ue(),ie(),ae("onCreate",[$]),M.showOnCreate&&Le(),z.addEventListener("mouseenter",(function(){$.props.interactive&&$.state.isVisible&&$.clearDelayTimeouts()})),z.addEventListener("mouseleave",(function(){$.props.interactive&&$.props.trigger.indexOf("mouseenter")>=0&&ne().addEventListener("mousemove",W)})),$;function Q(){var e=$.props.touch;return Array.isArray(e)?e:[e,0]}function Z(){return"hold"===Q()[0]}function ee(){var e;return!(null==(e=$.props.render)||!e.$$tippy)}function te(){return L||o}function ne(){var e=te().parentNode;return e?w(e):document}function re(){return S(z)}function oe(e){return $.state.isMounted&&!$.state.isVisible||x.isTouch||C&&"focus"===C.type?0:r($.props.delay,e?0:1,R.delay)}function ie(e){void 0===e&&(e=!1),z.style.pointerEvents=$.props.interactive&&!e?"":"none",z.style.zIndex=""+$.props.zIndex}function ae(e,t,n){var r;(void 0===n&&(n=!0),G.forEach((function(n){n[e]&&n[e].apply(n,t)})),n)&&(r=$.props)[e].apply(r,t)}function se(){var e=$.props.aria;if(e.content){var t="aria-"+e.content,n=z.id;u($.props.triggerTarget||o).forEach((function(e){var r=e.getAttribute(t);if($.state.isVisible)e.setAttribute(t,r?r+" "+n:n);else{var o=r&&r.replace(n,"").trim();o?e.setAttribute(t,o):e.removeAttribute(t)}}))}}function ue(){!K&&$.props.aria.expanded&&u($.props.triggerTarget||o).forEach((function(e){$.props.interactive?e.setAttribute("aria-expanded",$.state.isVisible&&e===te()?"true":"false"):e.removeAttribute("aria-expanded")}))}function ce(){ne().removeEventListener("mousemove",W),H=H.filter((function(e){return e!==W}))}function pe(e){if(!x.isTouch||!N&&"mousedown"!==e.type){var t=e.composedPath&&e.composedPath()[0]||e.target;if(!$.props.interactive||!O(z,t)){if(u($.props.triggerTarget||o).some((function(e){return O(e,t)}))){if(x.isTouch)return;if($.state.isVisible&&$.props.trigger.indexOf("click")>=0)return}else ae("onClickOutside",[$,e]);!0===$.props.hideOnClick&&($.clearDelayTimeouts(),$.hide(),I=!0,setTimeout((function(){I=!1})),$.state.isMounted||ve())}}}function fe(){N=!0}function le(){N=!1}function de(){var e=ne();e.addEventListener("mousedown",pe,!0),e.addEventListener("touchend",pe,t),e.addEventListener("touchstart",le,t),e.addEventListener("touchmove",fe,t)}function ve(){var e=ne();e.removeEventListener("mousedown",pe,!0),e.removeEventListener("touchend",pe,t),e.removeEventListener("touchstart",le,t),e.removeEventListener("touchmove",fe,t)}function me(e,t){var n=re().box;function r(e){e.target===n&&(E(n,"remove",r),t())}if(0===e)return t();E(n,"remove",T),E(n,"add",r),T=r}function ge(e,t,n){void 0===n&&(n=!1),u($.props.triggerTarget||o).forEach((function(r){r.addEventListener(e,t,n),F.push({node:r,eventType:e,handler:t,options:n})}))}function he(){var e;Z()&&(ge("touchstart",ye,{passive:!0}),ge("touchend",Ee,{passive:!0})),(e=$.props.trigger,e.split(/\s+/).filter(Boolean)).forEach((function(e){if("manual"!==e)switch(ge(e,ye),e){case"mouseenter":ge("mouseleave",Ee);break;case"focus":ge(D?"focusout":"blur",Oe);break;case"focusin":ge("focusout",Oe)}}))}function be(){F.forEach((function(e){var t=e.node,n=e.eventType,r=e.handler,o=e.options;t.removeEventListener(n,r,o)})),F=[]}function ye(e){var t,n=!1;if($.state.isEnabled&&!xe(e)&&!I){var r="focus"===(null==(t=C)?void 0:t.type);C=e,L=e.currentTarget,ue(),!$.state.isVisible&&m(e)&&H.forEach((function(t){return t(e)})),"click"===e.type&&($.props.trigger.indexOf("mouseenter")<0||V)&&!1!==$.props.hideOnClick&&$.state.isVisible?n=!0:Le(e),"click"===e.type&&(V=!n),n&&!r&&De(e)}}function we(e){var t=e.target,n=te().contains(t)||z.contains(t);"mousemove"===e.type&&n||function(e,t){var n=t.clientX,r=t.clientY;return e.every((function(e){var t=e.popperRect,o=e.popperState,i=e.props.interactiveBorder,a=p(o.placement),s=o.modifiersData.offset;if(!s)return!0;var u="bottom"===a?s.top.y:0,c="top"===a?s.bottom.y:0,f="right"===a?s.left.x:0,l="left"===a?s.right.x:0,d=t.top-r+u>i,v=r-t.bottom-c>i,m=t.left-n+f>i,g=n-t.right-l>i;return d||v||m||g}))}(Ae().concat(z).map((function(e){var t,n=null==(t=e._tippy.popperInstance)?void 0:t.state;return n?{popperRect:e.getBoundingClientRect(),popperState:n,props:M}:null})).filter(Boolean),e)&&(ce(),De(e))}function Ee(e){xe(e)||$.props.trigger.indexOf("click")>=0&&V||($.props.interactive?$.hideWithInteractivity(e):De(e))}function Oe(e){$.props.trigger.indexOf("focusin")<0&&e.target!==te()||$.props.interactive&&e.relatedTarget&&z.contains(e.relatedTarget)||De(e)}function xe(e){return!!x.isTouch&&Z()!==e.type.indexOf("touch")>=0}function Ce(){Te();var t=$.props,n=t.popperOptions,r=t.placement,i=t.offset,a=t.getReferenceClientRect,s=t.moveTransition,u=ee()?S(z).arrow:null,c=a?{getBoundingClientRect:a,contextElement:a.contextElement||te()}:o,p=[{name:"offset",options:{offset:i}},{name:"preventOverflow",options:{padding:{top:2,bottom:2,left:5,right:5}}},{name:"flip",options:{padding:5}},{name:"computeStyles",options:{adaptive:!s}},{name:"$$tippy",enabled:!0,phase:"beforeWrite",requires:["computeStyles"],fn:function(e){var t=e.state;if(ee()){var n=re().box;["placement","reference-hidden","escaped"].forEach((function(e){"placement"===e?n.setAttribute("data-placement",t.placement):t.attributes.popper["data-popper-"+e]?n.setAttribute("data-"+e,""):n.removeAttribute("data-"+e)})),t.attributes.popper={}}}}];ee()&&u&&p.push({name:"arrow",options:{element:u,padding:3}}),p.push.apply(p,(null==n?void 0:n.modifiers)||[]),$.popperInstance=e.createPopper(c,z,Object.assign({},n,{placement:r,onFirstUpdate:A,modifiers:p}))}function Te(){$.popperInstance&&($.popperInstance.destroy(),$.popperInstance=null)}function Ae(){return f(z.querySelectorAll("[data-tippy-root]"))}function Le(e){$.clearDelayTimeouts(),e&&ae("onTrigger",[$,e]),de();var t=oe(!0),n=Q(),r=n[0],o=n[1];x.isTouch&&"hold"===r&&o&&(t=o),t?v=setTimeout((function(){$.show()}),t):$.show()}function De(e){if($.clearDelayTimeouts(),ae("onUntrigger",[$,e]),$.state.isVisible){if(!($.props.trigger.indexOf("mouseenter")>=0&&$.props.trigger.indexOf("click")>=0&&["mouseleave","mousemove"].indexOf(e.type)>=0&&V)){var t=oe(!1);t?g=setTimeout((function(){$.state.isVisible&&$.hide()}),t):h=requestAnimationFrame((function(){$.hide()}))}}else ve()}}function F(e,n){void 0===n&&(n={});var r=R.plugins.concat(n.plugins||[]);document.addEventListener("touchstart",T,t),window.addEventListener("blur",L);var o=Object.assign({},n,{plugins:r}),i=h(e).reduce((function(e,t){var n=t&&_(t,o);return n&&e.push(n),e}),[]);return v(e)?i[0]:i}F.defaultProps=R,F.setDefaultProps=function(e){Object.keys(e).forEach((function(t){R[t]=e[t]}))},F.currentInput=x;var W=Object.assign({},e.applyStyles,{effect:function(e){var t=e.state,n={popper:{position:t.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};Object.assign(t.elements.popper.style,n.popper),t.styles=n,t.elements.arrow&&Object.assign(t.elements.arrow.style,n.arrow)}}),X={mouseover:"mouseenter",focusin:"focus",click:"click"};var Y={name:"animateFill",defaultValue:!1,fn:function(e){var t;if(null==(t=e.props.render)||!t.$$tippy)return{};var n=S(e.popper),r=n.box,o=n.content,i=e.props.animateFill?function(){var e=d();return e.className="tippy-backdrop",y([e],"hidden"),e}():null;return{onCreate:function(){i&&(r.insertBefore(i,r.firstElementChild),r.setAttribute("data-animatefill",""),r.style.overflow="hidden",e.setProps({arrow:!1,animation:"shift-away"}))},onMount:function(){if(i){var e=r.style.transitionDuration,t=Number(e.replace("ms",""));o.style.transitionDelay=Math.round(t/10)+"ms",i.style.transitionDuration=e,y([i],"visible")}},onShow:function(){i&&(i.style.transitionDuration="0ms")},onHide:function(){i&&y([i],"hidden")}}}};var $={clientX:0,clientY:0},q=[];function z(e){var t=e.clientX,n=e.clientY;$={clientX:t,clientY:n}}var J={name:"followCursor",defaultValue:!1,fn:function(e){var t=e.reference,n=w(e.props.triggerTarget||t),r=!1,o=!1,i=!0,a=e.props;function s(){return"initial"===e.props.followCursor&&e.state.isVisible}function u(){n.addEventListener("mousemove",f)}function c(){n.removeEventListener("mousemove",f)}function p(){r=!0,e.setProps({getReferenceClientRect:null}),r=!1}function f(n){var r=!n.target||t.contains(n.target),o=e.props.followCursor,i=n.clientX,a=n.clientY,s=t.getBoundingClientRect(),u=i-s.left,c=a-s.top;!r&&e.props.interactive||e.setProps({getReferenceClientRect:function(){var e=t.getBoundingClientRect(),n=i,r=a;"initial"===o&&(n=e.left+u,r=e.top+c);var s="horizontal"===o?e.top:r,p="vertical"===o?e.right:n,f="horizontal"===o?e.bottom:r,l="vertical"===o?e.left:n;return{width:p-l,height:f-s,top:s,right:p,bottom:f,left:l}}})}function l(){e.props.followCursor&&(q.push({instance:e,doc:n}),function(e){e.addEventListener("mousemove",z)}(n))}function d(){0===(q=q.filter((function(t){return t.instance!==e}))).filter((function(e){return e.doc===n})).length&&function(e){e.removeEventListener("mousemove",z)}(n)}return{onCreate:l,onDestroy:d,onBeforeUpdate:function(){a=e.props},onAfterUpdate:function(t,n){var i=n.followCursor;r||void 0!==i&&a.followCursor!==i&&(d(),i?(l(),!e.state.isMounted||o||s()||u()):(c(),p()))},onMount:function(){e.props.followCursor&&!o&&(i&&(f($),i=!1),s()||u())},onTrigger:function(e,t){m(t)&&($={clientX:t.clientX,clientY:t.clientY}),o="focus"===t.type},onHidden:function(){e.props.followCursor&&(p(),c(),i=!0)}}}};var G={name:"inlinePositioning",defaultValue:!1,fn:function(e){var t,n=e.reference;var r=-1,o=!1,i=[],a={name:"tippyInlinePositioning",enabled:!0,phase:"afterWrite",fn:function(o){var a=o.state;e.props.inlinePositioning&&(-1!==i.indexOf(a.placement)&&(i=[]),t!==a.placement&&-1===i.indexOf(a.placement)&&(i.push(a.placement),e.setProps({getReferenceClientRect:function(){return function(e){return function(e,t,n,r){if(n.length<2||null===e)return t;if(2===n.length&&r>=0&&n[0].left>n[1].right)return n[r]||t;switch(e){case"top":case"bottom":var o=n[0],i=n[n.length-1],a="top"===e,s=o.top,u=i.bottom,c=a?o.left:i.left,p=a?o.right:i.right;return{top:s,bottom:u,left:c,right:p,width:p-c,height:u-s};case"left":case"right":var f=Math.min.apply(Math,n.map((function(e){return e.left}))),l=Math.max.apply(Math,n.map((function(e){return e.right}))),d=n.filter((function(t){return"left"===e?t.left===f:t.right===l})),v=d[0].top,m=d[d.length-1].bottom;return{top:v,bottom:m,left:f,right:l,width:l-f,height:m-v};default:return t}}(p(e),n.getBoundingClientRect(),f(n.getClientRects()),r)}(a.placement)}})),t=a.placement)}};function s(){var t;o||(t=function(e,t){var n;return{popperOptions:Object.assign({},e.popperOptions,{modifiers:[].concat(((null==(n=e.popperOptions)?void 0:n.modifiers)||[]).filter((function(e){return e.name!==t.name})),[t])})}}(e.props,a),o=!0,e.setProps(t),o=!1)}return{onCreate:s,onAfterUpdate:s,onTrigger:function(t,n){if(m(n)){var o=f(e.reference.getClientRects()),i=o.find((function(e){return e.left-2<=n.clientX&&e.right+2>=n.clientX&&e.top-2<=n.clientY&&e.bottom+2>=n.clientY})),a=o.indexOf(i);r=a>-1?a:r}},onHidden:function(){r=-1}}}};var K={name:"sticky",defaultValue:!1,fn:function(e){var t=e.reference,n=e.popper;function r(t){return!0===e.props.sticky||e.props.sticky===t}var o=null,i=null;function a(){var s=r("reference")?(e.popperInstance?e.popperInstance.state.elements.reference:t).getBoundingClientRect():null,u=r("popper")?n.getBoundingClientRect():null;(s&&Q(o,s)||u&&Q(i,u))&&e.popperInstance&&e.popperInstance.update(),o=s,i=u,e.state.isMounted&&requestAnimationFrame(a)}return{onMount:function(){e.props.sticky&&a()}}}};function Q(e,t){return!e||!t||(e.top!==t.top||e.right!==t.right||e.bottom!==t.bottom||e.left!==t.left)}return F.setDefaultProps({plugins:[Y,J,G,K],render:N}),F.createSingleton=function(e,t){var n;void 0===t&&(t={});var r,o=e,i=[],a=[],c=t.overrides,p=[],f=!1;function l(){a=o.map((function(e){return u(e.props.triggerTarget||e.reference)})).reduce((function(e,t){return e.concat(t)}),[])}function v(){i=o.map((function(e){return e.reference}))}function m(e){o.forEach((function(t){e?t.enable():t.disable()}))}function g(e){return o.map((function(t){var n=t.setProps;return t.setProps=function(o){n(o),t.reference===r&&e.setProps(o)},function(){t.setProps=n}}))}function h(e,t){var n=a.indexOf(t);if(t!==r){r=t;var s=(c||[]).concat("content").reduce((function(e,t){return e[t]=o[n].props[t],e}),{});e.setProps(Object.assign({},s,{getReferenceClientRect:"function"==typeof s.getReferenceClientRect?s.getReferenceClientRect:function(){var e;return null==(e=i[n])?void 0:e.getBoundingClientRect()}}))}}m(!1),v(),l();var b={fn:function(){return{onDestroy:function(){m(!0)},onHidden:function(){r=null},onClickOutside:function(e){e.props.showOnCreate&&!f&&(f=!0,r=null)},onShow:function(e){e.props.showOnCreate&&!f&&(f=!0,h(e,i[0]))},onTrigger:function(e,t){h(e,t.currentTarget)}}}},y=F(d(),Object.assign({},s(t,["overrides"]),{plugins:[b].concat(t.plugins||[]),triggerTarget:a,popperOptions:Object.assign({},t.popperOptions,{modifiers:[].concat((null==(n=t.popperOptions)?void 0:n.modifiers)||[],[W])})})),w=y.show;y.show=function(e){if(w(),!r&&null==e)return h(y,i[0]);if(!r||null!=e){if("number"==typeof e)return i[e]&&h(y,i[e]);if(o.indexOf(e)>=0){var t=e.reference;return h(y,t)}return i.indexOf(e)>=0?h(y,e):void 0}},y.showNext=function(){var e=i[0];if(!r)return y.show(0);var t=i.indexOf(r);y.show(i[t+1]||e)},y.showPrevious=function(){var e=i[i.length-1];if(!r)return y.show(e);var t=i.indexOf(r),n=i[t-1]||e;y.show(n)};var E=y.setProps;return y.setProps=function(e){c=e.overrides||c,E(e)},y.setInstances=function(e){m(!0),p.forEach((function(e){return e()})),o=e,m(!1),v(),l(),p=g(y),y.setProps({triggerTarget:a})},p=g(y),y},F.delegate=function(e,n){var r=[],o=[],i=!1,a=n.target,c=s(n,["target"]),p=Object.assign({},c,{trigger:"manual",touch:!1}),f=Object.assign({touch:R.touch},c,{showOnCreate:!0}),l=F(e,p);function d(e){if(e.target&&!i){var t=e.target.closest(a);if(t){var r=t.getAttribute("data-tippy-trigger")||n.trigger||R.trigger;if(!t._tippy&&!("touchstart"===e.type&&"boolean"==typeof f.touch||"touchstart"!==e.type&&r.indexOf(X[e.type])<0)){var s=F(t,f);s&&(o=o.concat(s))}}}}function v(e,t,n,o){void 0===o&&(o=!1),e.addEventListener(t,n,o),r.push({node:e,eventType:t,handler:n,options:o})}return u(l).forEach((function(e){var n=e.destroy,a=e.enable,s=e.disable;e.destroy=function(e){void 0===e&&(e=!0),e&&o.forEach((function(e){e.destroy()})),o=[],r.forEach((function(e){var t=e.node,n=e.eventType,r=e.handler,o=e.options;t.removeEventListener(n,r,o)})),r=[],n()},e.enable=function(){a(),o.forEach((function(e){return e.enable()})),i=!1},e.disable=function(){s(),o.forEach((function(e){return e.disable()})),i=!0},function(e){var n=e.reference;v(n,"touchstart",d,t),v(n,"mouseover",d),v(n,"focusin",d),v(n,"click",d)}(e)})),l},F.hideAll=function(e){var t=void 0===e?{}:e,n=t.exclude,r=t.duration;U.forEach((function(e){var t=!1;if(n&&(t=g(n)?e.reference===n:e.popper===n.popper),!t){var o=e.props.duration;e.setProps({duration:r}),e.hide(),e.state.isDestroyed||e.setProps({duration:o})}}))},F.roundArrow='',F})); + diff --git a/docs/site_libs/quarto-listing/list.min.js b/docs/site_libs/quarto-listing/list.min.js new file mode 100644 index 00000000..81318815 --- /dev/null +++ b/docs/site_libs/quarto-listing/list.min.js @@ -0,0 +1,2 @@ +var List;List=function(){var t={"./src/add-async.js":function(t){t.exports=function(t){return function e(r,n,s){var i=r.splice(0,50);s=(s=s||[]).concat(t.add(i)),r.length>0?setTimeout((function(){e(r,n,s)}),1):(t.update(),n(s))}}},"./src/filter.js":function(t){t.exports=function(t){return t.handlers.filterStart=t.handlers.filterStart||[],t.handlers.filterComplete=t.handlers.filterComplete||[],function(e){if(t.trigger("filterStart"),t.i=1,t.reset.filter(),void 0===e)t.filtered=!1;else{t.filtered=!0;for(var r=t.items,n=0,s=r.length;nv.page,a=new g(t[s],void 0,n),v.items.push(a),r.push(a)}return v.update(),r}m(t.slice(0),e)}},this.show=function(t,e){return this.i=t,this.page=e,v.update(),v},this.remove=function(t,e,r){for(var n=0,s=0,i=v.items.length;s-1&&r.splice(n,1),v},this.trigger=function(t){for(var e=v.handlers[t].length;e--;)v.handlers[t][e](v);return v},this.reset={filter:function(){for(var t=v.items,e=t.length;e--;)t[e].filtered=!1;return v},search:function(){for(var t=v.items,e=t.length;e--;)t[e].found=!1;return v}},this.update=function(){var t=v.items,e=t.length;v.visibleItems=[],v.matchingItems=[],v.templater.clear();for(var r=0;r=v.i&&v.visibleItems.lengthe},innerWindow:function(t,e,r){return t>=e-r&&t<=e+r},dotted:function(t,e,r,n,s,i,a){return this.dottedLeft(t,e,r,n,s,i)||this.dottedRight(t,e,r,n,s,i,a)},dottedLeft:function(t,e,r,n,s,i){return e==r+1&&!this.innerWindow(e,s,i)&&!this.right(e,n)},dottedRight:function(t,e,r,n,s,i,a){return!t.items[a-1].values().dotted&&(e==n&&!this.innerWindow(e,s,i)&&!this.right(e,n))}};return function(e){var n=new i(t.listContainer.id,{listClass:e.paginationClass||"pagination",item:e.item||"
  • ",valueNames:["page","dotted"],searchClass:"pagination-search-that-is-not-supposed-to-exist",sortClass:"pagination-sort-that-is-not-supposed-to-exist"});s.bind(n.listContainer,"click",(function(e){var r=e.target||e.srcElement,n=t.utils.getAttribute(r,"data-page"),s=t.utils.getAttribute(r,"data-i");s&&t.show((s-1)*n+1,n)})),t.on("updated",(function(){r(n,e)})),r(n,e)}}},"./src/parse.js":function(t,e,r){t.exports=function(t){var e=r("./src/item.js")(t),n=function(r,n){for(var s=0,i=r.length;s0?setTimeout((function(){e(r,s)}),1):(t.update(),t.trigger("parseComplete"))};return t.handlers.parseComplete=t.handlers.parseComplete||[],function(){var e=function(t){for(var e=t.childNodes,r=[],n=0,s=e.length;n]/g.exec(t)){var e=document.createElement("tbody");return e.innerHTML=t,e.firstElementChild}if(-1!==t.indexOf("<")){var r=document.createElement("div");return r.innerHTML=t,r.firstElementChild}}},a=function(e,r,n){var s=void 0,i=function(e){for(var r=0,n=t.valueNames.length;r=1;)t.list.removeChild(t.list.firstChild)},function(){var r;if("function"!=typeof t.item){if(!(r="string"==typeof t.item?-1===t.item.indexOf("<")?document.getElementById(t.item):i(t.item):s()))throw new Error("The list needs to have at least one item on init otherwise you'll have to add a template.");r=n(r,t.valueNames),e=function(){return r.cloneNode(!0)}}else e=function(e){var r=t.item(e);return i(r)}}()};t.exports=function(t){return new e(t)}},"./src/utils/classes.js":function(t,e,r){var n=r("./src/utils/index-of.js"),s=/\s+/;Object.prototype.toString;function i(t){if(!t||!t.nodeType)throw new Error("A DOM element reference is required");this.el=t,this.list=t.classList}t.exports=function(t){return new i(t)},i.prototype.add=function(t){if(this.list)return this.list.add(t),this;var e=this.array();return~n(e,t)||e.push(t),this.el.className=e.join(" "),this},i.prototype.remove=function(t){if(this.list)return this.list.remove(t),this;var e=this.array(),r=n(e,t);return~r&&e.splice(r,1),this.el.className=e.join(" "),this},i.prototype.toggle=function(t,e){return this.list?(void 0!==e?e!==this.list.toggle(t,e)&&this.list.toggle(t):this.list.toggle(t),this):(void 0!==e?e?this.add(t):this.remove(t):this.has(t)?this.remove(t):this.add(t),this)},i.prototype.array=function(){var t=(this.el.getAttribute("class")||"").replace(/^\s+|\s+$/g,"").split(s);return""===t[0]&&t.shift(),t},i.prototype.has=i.prototype.contains=function(t){return this.list?this.list.contains(t):!!~n(this.array(),t)}},"./src/utils/events.js":function(t,e,r){var n=window.addEventListener?"addEventListener":"attachEvent",s=window.removeEventListener?"removeEventListener":"detachEvent",i="addEventListener"!==n?"on":"",a=r("./src/utils/to-array.js");e.bind=function(t,e,r,s){for(var o=0,l=(t=a(t)).length;o32)return!1;var a=n,o=function(){var t,r={};for(t=0;t=p;b--){var j=o[t.charAt(b-1)];if(C[b]=0===m?(C[b+1]<<1|1)&j:(C[b+1]<<1|1)&j|(v[b+1]|v[b])<<1|1|v[b+1],C[b]&d){var x=l(m,b-1);if(x<=u){if(u=x,!((c=b-1)>a))break;p=Math.max(1,2*a-c)}}}if(l(m+1,a)>u)break;v=C}return!(c<0)}},"./src/utils/get-attribute.js":function(t){t.exports=function(t,e){var r=t.getAttribute&&t.getAttribute(e)||null;if(!r)for(var n=t.attributes,s=n.length,i=0;i=48&&t<=57}function i(t,e){for(var i=(t+="").length,a=(e+="").length,o=0,l=0;o=i&&l=a?-1:l>=a&&o=i?1:i-a}i.caseInsensitive=i.i=function(t,e){return i((""+t).toLowerCase(),(""+e).toLowerCase())},Object.defineProperties(i,{alphabet:{get:function(){return e},set:function(t){r=[];var s=0;if(e=t)for(;s { + if (categoriesLoaded) { + activateCategory(category); + setCategoryHash(category); + } +}; + +window["quarto-listing-loaded"] = () => { + // Process any existing hash + const hash = getHash(); + + if (hash) { + // If there is a category, switch to that + if (hash.category) { + activateCategory(hash.category); + } + // Paginate a specific listing + const listingIds = Object.keys(window["quarto-listings"]); + for (const listingId of listingIds) { + const page = hash[getListingPageKey(listingId)]; + if (page) { + showPage(listingId, page); + } + } + } + + const listingIds = Object.keys(window["quarto-listings"]); + for (const listingId of listingIds) { + // The actual list + const list = window["quarto-listings"][listingId]; + + // Update the handlers for pagination events + refreshPaginationHandlers(listingId); + + // Render any visible items that need it + renderVisibleProgressiveImages(list); + + // Whenever the list is updated, we also need to + // attach handlers to the new pagination elements + // and refresh any newly visible items. + list.on("updated", function () { + renderVisibleProgressiveImages(list); + setTimeout(() => refreshPaginationHandlers(listingId)); + + // Show or hide the no matching message + toggleNoMatchingMessage(list); + }); + } +}; + +window.document.addEventListener("DOMContentLoaded", function (_event) { + // Attach click handlers to categories + const categoryEls = window.document.querySelectorAll( + ".quarto-listing-category .category" + ); + + for (const categoryEl of categoryEls) { + const category = categoryEl.getAttribute("data-category"); + categoryEl.onclick = () => { + activateCategory(category); + setCategoryHash(category); + }; + } + + // Attach a click handler to the category title + // (there should be only one, but since it is a class name, handle N) + const categoryTitleEls = window.document.querySelectorAll( + ".quarto-listing-category-title" + ); + for (const categoryTitleEl of categoryTitleEls) { + categoryTitleEl.onclick = () => { + activateCategory(""); + setCategoryHash(""); + }; + } + + categoriesLoaded = true; +}); + +function toggleNoMatchingMessage(list) { + const selector = `#${list.listContainer.id} .listing-no-matching`; + const noMatchingEl = window.document.querySelector(selector); + if (noMatchingEl) { + if (list.visibleItems.length === 0) { + noMatchingEl.classList.remove("d-none"); + } else { + if (!noMatchingEl.classList.contains("d-none")) { + noMatchingEl.classList.add("d-none"); + } + } + } +} + +function setCategoryHash(category) { + setHash({ category }); +} + +function setPageHash(listingId, page) { + const currentHash = getHash() || {}; + currentHash[getListingPageKey(listingId)] = page; + setHash(currentHash); +} + +function getListingPageKey(listingId) { + return `${listingId}-page`; +} + +function refreshPaginationHandlers(listingId) { + const listingEl = window.document.getElementById(listingId); + const paginationEls = listingEl.querySelectorAll( + ".pagination li.page-item:not(.disabled) .page.page-link" + ); + for (const paginationEl of paginationEls) { + paginationEl.onclick = (sender) => { + setPageHash(listingId, sender.target.getAttribute("data-i")); + showPage(listingId, sender.target.getAttribute("data-i")); + return false; + }; + } +} + +function renderVisibleProgressiveImages(list) { + // Run through the visible items and render any progressive images + for (const item of list.visibleItems) { + const itemEl = item.elm; + if (itemEl) { + const progressiveImgs = itemEl.querySelectorAll( + `img[${kProgressiveAttr}]` + ); + for (const progressiveImg of progressiveImgs) { + const srcValue = progressiveImg.getAttribute(kProgressiveAttr); + if (srcValue) { + progressiveImg.setAttribute("src", srcValue); + } + progressiveImg.removeAttribute(kProgressiveAttr); + } + } + } +} + +function getHash() { + // Hashes are of the form + // #name:value|name1:value1|name2:value2 + const currentUrl = new URL(window.location); + const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined; + return parseHash(hashRaw); +} + +const kAnd = "&"; +const kEquals = "="; + +function parseHash(hash) { + if (!hash) { + return undefined; + } + const hasValuesStrs = hash.split(kAnd); + const hashValues = hasValuesStrs + .map((hashValueStr) => { + const vals = hashValueStr.split(kEquals); + if (vals.length === 2) { + return { name: vals[0], value: vals[1] }; + } else { + return undefined; + } + }) + .filter((value) => { + return value !== undefined; + }); + + const hashObj = {}; + hashValues.forEach((hashValue) => { + hashObj[hashValue.name] = decodeURIComponent(hashValue.value); + }); + return hashObj; +} + +function makeHash(obj) { + return Object.keys(obj) + .map((key) => { + return `${key}${kEquals}${obj[key]}`; + }) + .join(kAnd); +} + +function setHash(obj) { + const hash = makeHash(obj); + window.history.pushState(null, null, `#${hash}`); +} + +function showPage(listingId, page) { + const list = window["quarto-listings"][listingId]; + if (list) { + list.show((page - 1) * list.page + 1, list.page); + } +} + +function activateCategory(category) { + // Deactivate existing categories + const activeEls = window.document.querySelectorAll( + ".quarto-listing-category .category.active" + ); + for (const activeEl of activeEls) { + activeEl.classList.remove("active"); + } + + // Activate this category + const categoryEl = window.document.querySelector( + `.quarto-listing-category .category[data-category='${category}'` + ); + if (categoryEl) { + categoryEl.classList.add("active"); + } + + // Filter the listings to this category + filterListingCategory(category); +} + +function filterListingCategory(category) { + const listingIds = Object.keys(window["quarto-listings"]); + for (const listingId of listingIds) { + const list = window["quarto-listings"][listingId]; + if (list) { + if (category === "") { + // resets the filter + list.filter(); + } else { + // filter to this category + list.filter(function (item) { + const itemValues = item.values(); + if (itemValues.categories !== null) { + const categories = itemValues.categories.split(","); + return categories.includes(category); + } else { + return false; + } + }); + } + } + } +} diff --git a/docs/site_libs/quarto-nav/quarto-nav.js b/docs/site_libs/quarto-nav/quarto-nav.js new file mode 100644 index 00000000..3b21201f --- /dev/null +++ b/docs/site_libs/quarto-nav/quarto-nav.js @@ -0,0 +1,277 @@ +const headroomChanged = new CustomEvent("quarto-hrChanged", { + detail: {}, + bubbles: true, + cancelable: false, + composed: false, +}); + +window.document.addEventListener("DOMContentLoaded", function () { + let init = false; + + // Manage the back to top button, if one is present. + let lastScrollTop = window.pageYOffset || document.documentElement.scrollTop; + const scrollDownBuffer = 5; + const scrollUpBuffer = 35; + const btn = document.getElementById("quarto-back-to-top"); + const hideBackToTop = () => { + btn.style.display = "none"; + }; + const showBackToTop = () => { + btn.style.display = "inline-block"; + }; + if (btn) { + window.document.addEventListener( + "scroll", + function () { + const currentScrollTop = + window.pageYOffset || document.documentElement.scrollTop; + + // Shows and hides the button 'intelligently' as the user scrolls + if (currentScrollTop - scrollDownBuffer > lastScrollTop) { + hideBackToTop(); + lastScrollTop = currentScrollTop <= 0 ? 0 : currentScrollTop; + } else if (currentScrollTop < lastScrollTop - scrollUpBuffer) { + showBackToTop(); + lastScrollTop = currentScrollTop <= 0 ? 0 : currentScrollTop; + } + + // Show the button at the bottom, hides it at the top + if (currentScrollTop <= 0) { + hideBackToTop(); + } else if ( + window.innerHeight + currentScrollTop >= + document.body.offsetHeight + ) { + showBackToTop(); + } + }, + false + ); + } + + function throttle(func, wait) { + var timeout; + return function () { + const context = this; + const args = arguments; + const later = function () { + clearTimeout(timeout); + timeout = null; + func.apply(context, args); + }; + + if (!timeout) { + timeout = setTimeout(later, wait); + } + }; + } + + function headerOffset() { + // Set an offset if there is are fixed top navbar + const headerEl = window.document.querySelector("header.fixed-top"); + if (headerEl) { + return headerEl.clientHeight; + } else { + return 0; + } + } + + function footerOffset() { + const footerEl = window.document.querySelector("footer.footer"); + if (footerEl) { + return footerEl.clientHeight; + } else { + return 0; + } + } + + function updateDocumentOffsetWithoutAnimation() { + updateDocumentOffset(false); + } + + function updateDocumentOffset(animated) { + // set body offset + const topOffset = headerOffset(); + const bodyOffset = topOffset + footerOffset(); + const bodyEl = window.document.body; + bodyEl.setAttribute("data-bs-offset", topOffset); + bodyEl.style.paddingTop = topOffset + "px"; + + // deal with sidebar offsets + const sidebars = window.document.querySelectorAll( + ".sidebar, .headroom-target" + ); + sidebars.forEach((sidebar) => { + if (!animated) { + sidebar.classList.add("notransition"); + // Remove the no transition class after the animation has time to complete + setTimeout(function () { + sidebar.classList.remove("notransition"); + }, 201); + } + + if (window.Headroom && sidebar.classList.contains("sidebar-unpinned")) { + sidebar.style.top = "0"; + sidebar.style.maxHeight = "100vh"; + } else { + sidebar.style.top = topOffset + "px"; + sidebar.style.maxHeight = "calc(100vh - " + topOffset + "px)"; + } + }); + + // allow space for footer + const mainContainer = window.document.querySelector(".quarto-container"); + if (mainContainer) { + mainContainer.style.minHeight = "calc(100vh - " + bodyOffset + "px)"; + } + + // link offset + let linkStyle = window.document.querySelector("#quarto-target-style"); + if (!linkStyle) { + linkStyle = window.document.createElement("style"); + linkStyle.setAttribute("id", "quarto-target-style"); + window.document.head.appendChild(linkStyle); + } + while (linkStyle.firstChild) { + linkStyle.removeChild(linkStyle.firstChild); + } + if (topOffset > 0) { + linkStyle.appendChild( + window.document.createTextNode(` + section:target::before { + content: ""; + display: block; + height: ${topOffset}px; + margin: -${topOffset}px 0 0; + }`) + ); + } + if (init) { + window.dispatchEvent(headroomChanged); + } + init = true; + } + + // initialize headroom + var header = window.document.querySelector("#quarto-header"); + if (header && window.Headroom) { + const headroom = new window.Headroom(header, { + tolerance: 5, + onPin: function () { + const sidebars = window.document.querySelectorAll( + ".sidebar, .headroom-target" + ); + sidebars.forEach((sidebar) => { + sidebar.classList.remove("sidebar-unpinned"); + }); + updateDocumentOffset(); + }, + onUnpin: function () { + const sidebars = window.document.querySelectorAll( + ".sidebar, .headroom-target" + ); + sidebars.forEach((sidebar) => { + sidebar.classList.add("sidebar-unpinned"); + }); + updateDocumentOffset(); + }, + }); + headroom.init(); + + let frozen = false; + window.quartoToggleHeadroom = function () { + if (frozen) { + headroom.unfreeze(); + frozen = false; + } else { + headroom.freeze(); + frozen = true; + } + }; + } + + window.addEventListener( + "hashchange", + function (e) { + if ( + getComputedStyle(document.documentElement).scrollBehavior !== "smooth" + ) { + window.scrollTo(0, window.pageYOffset - headerOffset()); + } + }, + false + ); + + // Observe size changed for the header + const headerEl = window.document.querySelector("header.fixed-top"); + if (headerEl && window.ResizeObserver) { + const observer = new window.ResizeObserver( + updateDocumentOffsetWithoutAnimation + ); + observer.observe(headerEl, { + attributes: true, + childList: true, + characterData: true, + }); + } else { + window.addEventListener( + "resize", + throttle(updateDocumentOffsetWithoutAnimation, 50) + ); + } + setTimeout(updateDocumentOffsetWithoutAnimation, 250); + + // fixup index.html links if we aren't on the filesystem + if (window.location.protocol !== "file:") { + const links = window.document.querySelectorAll("a"); + for (let i = 0; i < links.length; i++) { + if (links[i].href) { + links[i].href = links[i].href.replace(/\/index\.html/, "/"); + } + } + + // Fixup any sharing links that require urls + // Append url to any sharing urls + const sharingLinks = window.document.querySelectorAll( + "a.sidebar-tools-main-item" + ); + for (let i = 0; i < sharingLinks.length; i++) { + const sharingLink = sharingLinks[i]; + const href = sharingLink.getAttribute("href"); + if (href) { + sharingLink.setAttribute( + "href", + href.replace("|url|", window.location.href) + ); + } + } + + // Scroll the active navigation item into view, if necessary + const navSidebar = window.document.querySelector("nav#quarto-sidebar"); + if (navSidebar) { + // Find the active item + const activeItem = navSidebar.querySelector("li.sidebar-item a.active"); + if (activeItem) { + // Wait for the scroll height and height to resolve by observing size changes on the + // nav element that is scrollable + const resizeObserver = new ResizeObserver((_entries) => { + // The bottom of the element + const elBottom = activeItem.offsetTop; + const viewBottom = navSidebar.scrollTop + navSidebar.clientHeight; + + // The element height and scroll height are the same, then we are still loading + if (viewBottom !== navSidebar.scrollHeight) { + // Determine if the item isn't visible and scroll to it + if (elBottom >= viewBottom) { + navSidebar.scrollTop = elBottom; + } + + // stop observing now since we've completed the scroll + resizeObserver.unobserve(navSidebar); + } + }); + resizeObserver.observe(navSidebar); + } + } + } +}); diff --git a/docs/site_libs/quarto-search/autocomplete.umd.js b/docs/site_libs/quarto-search/autocomplete.umd.js new file mode 100644 index 00000000..619c57cc --- /dev/null +++ b/docs/site_libs/quarto-search/autocomplete.umd.js @@ -0,0 +1,3 @@ +/*! @algolia/autocomplete-js 1.7.3 | MIT License | © Algolia, Inc. and contributors | https://github.com/algolia/autocomplete */ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?t(exports):"function"==typeof define&&define.amd?define(["exports"],t):t((e="undefined"!=typeof globalThis?globalThis:e||self)["@algolia/autocomplete-js"]={})}(this,(function(e){"use strict";function t(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function n(e){for(var n=1;n=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}function a(e,t){return function(e){if(Array.isArray(e))return e}(e)||function(e,t){var n=null==e?null:"undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(null==n)return;var r,o,i=[],u=!0,a=!1;try{for(n=n.call(e);!(u=(r=n.next()).done)&&(i.push(r.value),!t||i.length!==t);u=!0);}catch(e){a=!0,o=e}finally{try{u||null==n.return||n.return()}finally{if(a)throw o}}return i}(e,t)||l(e,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function c(e){return function(e){if(Array.isArray(e))return s(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||l(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function l(e,t){if(e){if("string"==typeof e)return s(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);return"Object"===n&&e.constructor&&(n=e.constructor.name),"Map"===n||"Set"===n?Array.from(e):"Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)?s(e,t):void 0}}function s(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n=n?null===r?null:0:o}function S(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function I(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function E(e,t){var n=[];return Promise.resolve(e(t)).then((function(e){return Promise.all(e.filter((function(e){return Boolean(e)})).map((function(e){if(e.sourceId,n.includes(e.sourceId))throw new Error("[Autocomplete] The `sourceId` ".concat(JSON.stringify(e.sourceId)," is not unique."));n.push(e.sourceId);var t=function(e){for(var t=1;te.length)&&(t=e.length);for(var n=0,r=new Array(t);ne.length)&&(t=e.length);for(var n=0,r=new Array(t);n=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var ae,ce,le,se=null,pe=(ae=-1,ce=-1,le=void 0,function(e){var t=++ae;return Promise.resolve(e).then((function(e){return le&&t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var ye=["props","refresh","store"],be=["inputElement","formElement","panelElement"],Oe=["inputElement"],_e=["inputElement","maxLength"],Pe=["item","source"];function je(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function we(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}function Ee(e){var t=e.props,n=e.refresh,r=e.store,o=Ie(e,ye);return{getEnvironmentProps:function(e){var n=e.inputElement,o=e.formElement,i=e.panelElement;function u(e){!r.getState().isOpen&&r.pendingRequests.isEmpty()||e.target===n||!1===[o,i].some((function(t){return n=t,r=e.target,n===r||n.contains(r);var n,r}))&&(r.dispatch("blur",null),t.debug||r.pendingRequests.cancelAll())}return we({onTouchStart:u,onMouseDown:u,onTouchMove:function(e){!1!==r.getState().isOpen&&n===t.environment.document.activeElement&&e.target!==n&&n.blur()}},Ie(e,be))},getRootProps:function(e){return we({role:"combobox","aria-expanded":r.getState().isOpen,"aria-haspopup":"listbox","aria-owns":r.getState().isOpen?"".concat(t.id,"-list"):void 0,"aria-labelledby":"".concat(t.id,"-label")},e)},getFormProps:function(e){return e.inputElement,we({action:"",noValidate:!0,role:"search",onSubmit:function(i){var u;i.preventDefault(),t.onSubmit(we({event:i,refresh:n,state:r.getState()},o)),r.dispatch("submit",null),null===(u=e.inputElement)||void 0===u||u.blur()},onReset:function(i){var u;i.preventDefault(),t.onReset(we({event:i,refresh:n,state:r.getState()},o)),r.dispatch("reset",null),null===(u=e.inputElement)||void 0===u||u.focus()}},Ie(e,Oe))},getLabelProps:function(e){return we({htmlFor:"".concat(t.id,"-input"),id:"".concat(t.id,"-label")},e)},getInputProps:function(e){var i;function u(e){(t.openOnFocus||Boolean(r.getState().query))&&fe(we({event:e,props:t,query:r.getState().completion||r.getState().query,refresh:n,store:r},o)),r.dispatch("focus",null)}var a=e||{};a.inputElement;var c=a.maxLength,l=void 0===c?512:c,s=Ie(a,_e),p=A(r.getState()),f=function(e){return Boolean(e&&e.match(C))}((null===(i=t.environment.navigator)||void 0===i?void 0:i.userAgent)||""),d=null!=p&&p.itemUrl&&!f?"go":"search";return we({"aria-autocomplete":"both","aria-activedescendant":r.getState().isOpen&&null!==r.getState().activeItemId?"".concat(t.id,"-item-").concat(r.getState().activeItemId):void 0,"aria-controls":r.getState().isOpen?"".concat(t.id,"-list"):void 0,"aria-labelledby":"".concat(t.id,"-label"),value:r.getState().completion||r.getState().query,id:"".concat(t.id,"-input"),autoComplete:"off",autoCorrect:"off",autoCapitalize:"off",enterKeyHint:d,spellCheck:"false",autoFocus:t.autoFocus,placeholder:t.placeholder,maxLength:l,type:"search",onChange:function(e){fe(we({event:e,props:t,query:e.currentTarget.value.slice(0,l),refresh:n,store:r},o))},onKeyDown:function(e){!function(e){var t=e.event,n=e.props,r=e.refresh,o=e.store,i=ge(e,de);if("ArrowUp"===t.key||"ArrowDown"===t.key){var u=function(){var e=n.environment.document.getElementById("".concat(n.id,"-item-").concat(o.getState().activeItemId));e&&(e.scrollIntoViewIfNeeded?e.scrollIntoViewIfNeeded(!1):e.scrollIntoView(!1))},a=function(){var e=A(o.getState());if(null!==o.getState().activeItemId&&e){var n=e.item,u=e.itemInputValue,a=e.itemUrl,c=e.source;c.onActive(ve({event:t,item:n,itemInputValue:u,itemUrl:a,refresh:r,source:c,state:o.getState()},i))}};t.preventDefault(),!1===o.getState().isOpen&&(n.openOnFocus||Boolean(o.getState().query))?fe(ve({event:t,props:n,query:o.getState().query,refresh:r,store:o},i)).then((function(){o.dispatch(t.key,{nextActiveItemId:n.defaultActiveItemId}),a(),setTimeout(u,0)})):(o.dispatch(t.key,{}),a(),u())}else if("Escape"===t.key)t.preventDefault(),o.dispatch(t.key,null),o.pendingRequests.cancelAll();else if("Tab"===t.key)o.dispatch("blur",null),o.pendingRequests.cancelAll();else if("Enter"===t.key){if(null===o.getState().activeItemId||o.getState().collections.every((function(e){return 0===e.items.length})))return void(n.debug||o.pendingRequests.cancelAll());t.preventDefault();var c=A(o.getState()),l=c.item,s=c.itemInputValue,p=c.itemUrl,f=c.source;if(t.metaKey||t.ctrlKey)void 0!==p&&(f.onSelect(ve({event:t,item:l,itemInputValue:s,itemUrl:p,refresh:r,source:f,state:o.getState()},i)),n.navigator.navigateNewTab({itemUrl:p,item:l,state:o.getState()}));else if(t.shiftKey)void 0!==p&&(f.onSelect(ve({event:t,item:l,itemInputValue:s,itemUrl:p,refresh:r,source:f,state:o.getState()},i)),n.navigator.navigateNewWindow({itemUrl:p,item:l,state:o.getState()}));else if(t.altKey);else{if(void 0!==p)return f.onSelect(ve({event:t,item:l,itemInputValue:s,itemUrl:p,refresh:r,source:f,state:o.getState()},i)),void n.navigator.navigate({itemUrl:p,item:l,state:o.getState()});fe(ve({event:t,nextState:{isOpen:!1},props:n,query:s,refresh:r,store:o},i)).then((function(){f.onSelect(ve({event:t,item:l,itemInputValue:s,itemUrl:p,refresh:r,source:f,state:o.getState()},i))}))}}}(we({event:e,props:t,refresh:n,store:r},o))},onFocus:u,onBlur:y,onClick:function(n){e.inputElement!==t.environment.document.activeElement||r.getState().isOpen||u(n)}},s)},getPanelProps:function(e){return we({onMouseDown:function(e){e.preventDefault()},onMouseLeave:function(){r.dispatch("mouseleave",null)}},e)},getListProps:function(e){return we({role:"listbox","aria-labelledby":"".concat(t.id,"-label"),id:"".concat(t.id,"-list")},e)},getItemProps:function(e){var i=e.item,u=e.source,a=Ie(e,Pe);return we({id:"".concat(t.id,"-item-").concat(i.__autocomplete_id),role:"option","aria-selected":r.getState().activeItemId===i.__autocomplete_id,onMouseMove:function(e){if(i.__autocomplete_id!==r.getState().activeItemId){r.dispatch("mousemove",i.__autocomplete_id);var t=A(r.getState());if(null!==r.getState().activeItemId&&t){var u=t.item,a=t.itemInputValue,c=t.itemUrl,l=t.source;l.onActive(we({event:e,item:u,itemInputValue:a,itemUrl:c,refresh:n,source:l,state:r.getState()},o))}}},onMouseDown:function(e){e.preventDefault()},onClick:function(e){var a=u.getItemInputValue({item:i,state:r.getState()}),c=u.getItemUrl({item:i,state:r.getState()});(c?Promise.resolve():fe(we({event:e,nextState:{isOpen:!1},props:t,query:a,refresh:n,store:r},o))).then((function(){u.onSelect(we({event:e,item:i,itemInputValue:a,itemUrl:c,refresh:n,source:u,state:r.getState()},o))}))}},a)}}}function Ae(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function Ce(e){for(var t=1;t0},reshape:function(e){return e.sources}},e),{},{id:null!==(n=e.id)&&void 0!==n?n:v(),plugins:o,initialState:H({activeItemId:null,query:"",completion:null,collections:[],isOpen:!1,status:"idle",context:{}},e.initialState),onStateChange:function(t){var n;null===(n=e.onStateChange)||void 0===n||n.call(e,t),o.forEach((function(e){var n;return null===(n=e.onStateChange)||void 0===n?void 0:n.call(e,t)}))},onSubmit:function(t){var n;null===(n=e.onSubmit)||void 0===n||n.call(e,t),o.forEach((function(e){var n;return null===(n=e.onSubmit)||void 0===n?void 0:n.call(e,t)}))},onReset:function(t){var n;null===(n=e.onReset)||void 0===n||n.call(e,t),o.forEach((function(e){var n;return null===(n=e.onReset)||void 0===n?void 0:n.call(e,t)}))},getSources:function(n){return Promise.all([].concat(F(o.map((function(e){return e.getSources}))),[e.getSources]).filter(Boolean).map((function(e){return E(e,n)}))).then((function(e){return d(e)})).then((function(e){return e.map((function(e){return H(H({},e),{},{onSelect:function(n){e.onSelect(n),t.forEach((function(e){var t;return null===(t=e.onSelect)||void 0===t?void 0:t.call(e,n)}))},onActive:function(n){e.onActive(n),t.forEach((function(e){var t;return null===(t=e.onActive)||void 0===t?void 0:t.call(e,n)}))}})}))}))},navigator:H({navigate:function(e){var t=e.itemUrl;r.location.assign(t)},navigateNewTab:function(e){var t=e.itemUrl,n=r.open(t,"_blank","noopener");null==n||n.focus()},navigateNewWindow:function(e){var t=e.itemUrl;r.open(t,"_blank","noopener")}},e.navigator)})}(e,t),r=R(Te,n,(function(e){var t=e.prevState,r=e.state;n.onStateChange(Be({prevState:t,state:r,refresh:u},o))})),o=function(e){var t=e.store;return{setActiveItemId:function(e){t.dispatch("setActiveItemId",e)},setQuery:function(e){t.dispatch("setQuery",e)},setCollections:function(e){var n=0,r=e.map((function(e){return L(L({},e),{},{items:d(e.items).map((function(e){return L(L({},e),{},{__autocomplete_id:n++})}))})}));t.dispatch("setCollections",r)},setIsOpen:function(e){t.dispatch("setIsOpen",e)},setStatus:function(e){t.dispatch("setStatus",e)},setContext:function(e){t.dispatch("setContext",e)}}}({store:r}),i=Ee(Be({props:n,refresh:u,store:r},o));function u(){return fe(Be({event:new Event("input"),nextState:{isOpen:r.getState().isOpen},props:n,query:r.getState().query,refresh:u,store:r},o))}return n.plugins.forEach((function(e){var n;return null===(n=e.subscribe)||void 0===n?void 0:n.call(e,Be(Be({},o),{},{refresh:u,onSelect:function(e){t.push({onSelect:e})},onActive:function(e){t.push({onActive:e})}}))})),function(e){var t,n,r=e.metadata,o=e.environment;if(null===(t=o.navigator)||void 0===t||null===(n=t.userAgent)||void 0===n?void 0:n.includes("Algolia Crawler")){var i=o.document.createElement("meta"),u=o.document.querySelector("head");i.name="algolia:metadata",setTimeout((function(){i.content=JSON.stringify(r),u.appendChild(i)}),0)}}({metadata:ke({plugins:n.plugins,options:e}),environment:n.environment}),Be(Be({refresh:u},i),o)}var Ue=function(e,t,n,r){var o;t[0]=0;for(var i=1;i=5&&((o||!e&&5===r)&&(u.push(r,0,o,n),r=6),e&&(u.push(r,e,0,n),r=6)),o=""},c=0;c"===t?(r=1,o=""):o=t+o[0]:i?t===i?i="":o+=t:'"'===t||"'"===t?i=t:">"===t?(a(),r=1):r&&("="===t?(r=5,n=o,o=""):"/"===t&&(r<5||">"===e[c][l+1])?(a(),3===r&&(u=u[0]),r=u,(u=u[0]).push(2,0,r),r=0):" "===t||"\t"===t||"\n"===t||"\r"===t?(a(),r=2):o+=t),3===r&&"!--"===o&&(r=4,u=u[0])}return a(),u}(e)),t),arguments,[])).length>1?t:t[0]}var We=function(e){var t=e.environment,n=t.document.createElementNS("http://www.w3.org/2000/svg","svg");n.setAttribute("class","aa-ClearIcon"),n.setAttribute("viewBox","0 0 24 24"),n.setAttribute("width","18"),n.setAttribute("height","18"),n.setAttribute("fill","currentColor");var r=t.document.createElementNS("http://www.w3.org/2000/svg","path");return r.setAttribute("d","M5.293 6.707l5.293 5.293-5.293 5.293c-0.391 0.391-0.391 1.024 0 1.414s1.024 0.391 1.414 0l5.293-5.293 5.293 5.293c0.391 0.391 1.024 0.391 1.414 0s0.391-1.024 0-1.414l-5.293-5.293 5.293-5.293c0.391-0.391 0.391-1.024 0-1.414s-1.024-0.391-1.414 0l-5.293 5.293-5.293-5.293c-0.391-0.391-1.024-0.391-1.414 0s-0.391 1.024 0 1.414z"),n.appendChild(r),n};function Qe(e,t){if("string"==typeof t){var n=e.document.querySelector(t);return"The element ".concat(JSON.stringify(t)," is not in the document."),n}return t}function $e(){for(var e=arguments.length,t=new Array(e),n=0;n2&&(u.children=arguments.length>3?lt.call(arguments,2):n),"function"==typeof e&&null!=e.defaultProps)for(i in e.defaultProps)void 0===u[i]&&(u[i]=e.defaultProps[i]);return _t(e,u,r,o,null)}function _t(e,t,n,r,o){var i={type:e,props:t,key:n,ref:r,__k:null,__:null,__b:0,__e:null,__d:void 0,__c:null,__h:null,constructor:void 0,__v:null==o?++pt:o};return null==o&&null!=st.vnode&&st.vnode(i),i}function Pt(e){return e.children}function jt(e,t){this.props=e,this.context=t}function wt(e,t){if(null==t)return e.__?wt(e.__,e.__.__k.indexOf(e)+1):null;for(var n;t0?_t(d.type,d.props,d.key,null,d.__v):d)){if(d.__=n,d.__b=n.__b+1,null===(f=g[s])||f&&d.key==f.key&&d.type===f.type)g[s]=void 0;else for(p=0;p0&&void 0!==arguments[0]?arguments[0]:[];return{get:function(){return e},add:function(t){var n=e[e.length-1];(null==n?void 0:n.isHighlighted)===t.isHighlighted?e[e.length-1]={value:n.value+t.value,isHighlighted:n.isHighlighted}:e.push(t)}}}(n?[{value:n,isHighlighted:!1}]:[]);return t.forEach((function(e){var t=e.split(Ht);r.add({value:t[0],isHighlighted:!0}),""!==t[1]&&r.add({value:t[1],isHighlighted:!1})})),r.get()}function Wt(e){return function(e){if(Array.isArray(e))return Qt(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||function(e,t){if(!e)return;if("string"==typeof e)return Qt(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);"Object"===n&&e.constructor&&(n=e.constructor.name);if("Map"===n||"Set"===n)return Array.from(e);if("Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return Qt(e,t)}(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function Qt(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n",""":'"',"'":"'"},Gt=new RegExp(/\w/i),Kt=/&(amp|quot|lt|gt|#39);/g,Jt=RegExp(Kt.source);function Yt(e,t){var n,r,o,i=e[t],u=(null===(n=e[t+1])||void 0===n?void 0:n.isHighlighted)||!0,a=(null===(r=e[t-1])||void 0===r?void 0:r.isHighlighted)||!0;return Gt.test((o=i.value)&&Jt.test(o)?o.replace(Kt,(function(e){return zt[e]})):o)||a!==u?i.isHighlighted:a}function Xt(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function Zt(e){for(var t=1;te.length)&&(t=e.length);for(var n=0,r=new Array(t);n=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}function mn(e){return function(e){if(Array.isArray(e))return vn(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||function(e,t){if(!e)return;if("string"==typeof e)return vn(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);"Object"===n&&e.constructor&&(n=e.constructor.name);if("Map"===n||"Set"===n)return Array.from(e);if("Arguments"===n||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return vn(e,t)}(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function vn(e,t){(null==t||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n0;if(!O.value.core.openOnFocus&&!t.query)return n;var r=Boolean(h.current||O.value.renderer.renderNoResults);return!n&&r||n},__autocomplete_metadata:{userAgents:Sn,options:e}}))})),j=p(n({collections:[],completion:null,context:{},isOpen:!1,query:"",activeItemId:null,status:"idle"},O.value.core.initialState)),w={getEnvironmentProps:O.value.renderer.getEnvironmentProps,getFormProps:O.value.renderer.getFormProps,getInputProps:O.value.renderer.getInputProps,getItemProps:O.value.renderer.getItemProps,getLabelProps:O.value.renderer.getLabelProps,getListProps:O.value.renderer.getListProps,getPanelProps:O.value.renderer.getPanelProps,getRootProps:O.value.renderer.getRootProps},S={setActiveItemId:P.value.setActiveItemId,setQuery:P.value.setQuery,setCollections:P.value.setCollections,setIsOpen:P.value.setIsOpen,setStatus:P.value.setStatus,setContext:P.value.setContext,refresh:P.value.refresh},I=d((function(){return Ve.bind(O.value.renderer.renderer.createElement)})),E=d((function(){return ct({autocomplete:P.value,autocompleteScopeApi:S,classNames:O.value.renderer.classNames,environment:O.value.core.environment,isDetached:_.value,placeholder:O.value.core.placeholder,propGetters:w,setIsModalOpen:k,state:j.current,translations:O.value.renderer.translations})}));function A(){tt(E.value.panel,{style:_.value?{}:wn({panelPlacement:O.value.renderer.panelPlacement,container:E.value.root,form:E.value.form,environment:O.value.core.environment})})}function C(e){j.current=e;var t={autocomplete:P.value,autocompleteScopeApi:S,classNames:O.value.renderer.classNames,components:O.value.renderer.components,container:O.value.renderer.container,html:I.value,dom:E.value,panelContainer:_.value?E.value.detachedContainer:O.value.renderer.panelContainer,propGetters:w,state:j.current,renderer:O.value.renderer.renderer},r=!g(e)&&!h.current&&O.value.renderer.renderNoResults||O.value.renderer.render;!function(e){var t=e.autocomplete,r=e.autocompleteScopeApi,o=e.dom,i=e.propGetters,u=e.state;nt(o.root,i.getRootProps(n({state:u,props:t.getRootProps({})},r))),nt(o.input,i.getInputProps(n({state:u,props:t.getInputProps({inputElement:o.input}),inputElement:o.input},r))),tt(o.label,{hidden:"stalled"===u.status}),tt(o.loadingIndicator,{hidden:"stalled"!==u.status}),tt(o.clearButton,{hidden:!u.query})}(t),function(e,t){var r=t.autocomplete,o=t.autocompleteScopeApi,u=t.classNames,a=t.html,c=t.dom,l=t.panelContainer,s=t.propGetters,p=t.state,f=t.components,d=t.renderer;if(p.isOpen){l.contains(c.panel)||"loading"===p.status||l.appendChild(c.panel),c.panel.classList.toggle("aa-Panel--stalled","stalled"===p.status);var m=p.collections.filter((function(e){var t=e.source,n=e.items;return t.templates.noResults||n.length>0})).map((function(e,t){var c=e.source,l=e.items;return d.createElement("section",{key:t,className:u.source,"data-autocomplete-source-id":c.sourceId},c.templates.header&&d.createElement("div",{className:u.sourceHeader},c.templates.header({components:f,createElement:d.createElement,Fragment:d.Fragment,items:l,source:c,state:p,html:a})),c.templates.noResults&&0===l.length?d.createElement("div",{className:u.sourceNoResults},c.templates.noResults({components:f,createElement:d.createElement,Fragment:d.Fragment,source:c,state:p,html:a})):d.createElement("ul",i({className:u.list},s.getListProps(n({state:p,props:r.getListProps({})},o))),l.map((function(e){var t=r.getItemProps({item:e,source:c});return d.createElement("li",i({key:t.id,className:u.item},s.getItemProps(n({state:p,props:t},o))),c.templates.item({components:f,createElement:d.createElement,Fragment:d.Fragment,item:e,state:p,html:a}))}))),c.templates.footer&&d.createElement("div",{className:u.sourceFooter},c.templates.footer({components:f,createElement:d.createElement,Fragment:d.Fragment,items:l,source:c,state:p,html:a})))})),v=d.createElement(d.Fragment,null,d.createElement("div",{className:u.panelLayout},m),d.createElement("div",{className:"aa-GradientBottom"})),h=m.reduce((function(e,t){return e[t.props["data-autocomplete-source-id"]]=t,e}),{});e(n(n({children:v,state:p,sections:m,elements:h},d),{},{components:f,html:a},o),c.panel)}else l.contains(c.panel)&&l.removeChild(c.panel)}(r,t)}function D(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};c();var t=O.value.renderer,n=t.components,r=u(t,In);y.current=Ge(r,O.value.core,{components:Ke(n,(function(e){return!e.value.hasOwnProperty("__autocomplete_componentName")})),initialState:j.current},e),m(),l(),P.value.refresh().then((function(){C(j.current)}))}function k(e){requestAnimationFrame((function(){var t=O.value.core.environment.document.body.contains(E.value.detachedOverlay);e!==t&&(e?(O.value.core.environment.document.body.appendChild(E.value.detachedOverlay),O.value.core.environment.document.body.classList.add("aa-Detached"),E.value.input.focus()):(O.value.core.environment.document.body.removeChild(E.value.detachedOverlay),O.value.core.environment.document.body.classList.remove("aa-Detached"),P.value.setQuery(""),P.value.refresh()))}))}return a((function(){var e=P.value.getEnvironmentProps({formElement:E.value.form,panelElement:E.value.panel,inputElement:E.value.input});return tt(O.value.core.environment,e),function(){tt(O.value.core.environment,Object.keys(e).reduce((function(e,t){return n(n({},e),{},o({},t,void 0))}),{}))}})),a((function(){var e=_.value?O.value.core.environment.document.body:O.value.renderer.panelContainer,t=_.value?E.value.detachedOverlay:E.value.panel;return _.value&&j.current.isOpen&&k(!0),C(j.current),function(){e.contains(t)&&e.removeChild(t)}})),a((function(){var e=O.value.renderer.container;return e.appendChild(E.value.root),function(){e.removeChild(E.value.root)}})),a((function(){var e=f((function(e){C(e.state)}),0);return b.current=function(t){var n=t.state,r=t.prevState;(_.value&&r.isOpen!==n.isOpen&&k(n.isOpen),_.value||!n.isOpen||r.isOpen||A(),n.query!==r.query)&&O.value.core.environment.document.querySelectorAll(".aa-Panel--scrollable").forEach((function(e){0!==e.scrollTop&&(e.scrollTop=0)}));e({state:n})},function(){b.current=void 0}})),a((function(){var e=f((function(){var e=_.value;_.value=O.value.core.environment.matchMedia(O.value.renderer.detachedMediaQuery).matches,e!==_.value?D({}):requestAnimationFrame(A)}),20);return O.value.core.environment.addEventListener("resize",e),function(){O.value.core.environment.removeEventListener("resize",e)}})),a((function(){if(!_.value)return function(){};function e(e){E.value.detachedContainer.classList.toggle("aa-DetachedContainer--modal",e)}function t(t){e(t.matches)}var n=O.value.core.environment.matchMedia(getComputedStyle(O.value.core.environment.document.documentElement).getPropertyValue("--aa-detached-modal-media-query"));e(n.matches);var r=Boolean(n.addEventListener);return r?n.addEventListener("change",t):n.addListener(t),function(){r?n.removeEventListener("change",t):n.removeListener(t)}})),a((function(){return requestAnimationFrame(A),function(){}})),n(n({},S),{},{update:D,destroy:function(){c()}})},e.getAlgoliaFacets=function(e){var t=En({transformResponse:function(e){return e.facetHits}}),r=e.queries.map((function(e){return n(n({},e),{},{type:"facet"})}));return t(n(n({},e),{},{queries:r}))},e.getAlgoliaResults=An,Object.defineProperty(e,"__esModule",{value:!0})})); + diff --git a/docs/site_libs/quarto-search/fuse.min.js b/docs/site_libs/quarto-search/fuse.min.js new file mode 100644 index 00000000..adc28356 --- /dev/null +++ b/docs/site_libs/quarto-search/fuse.min.js @@ -0,0 +1,9 @@ +/** + * Fuse.js v6.6.2 - Lightweight fuzzy-search (http://fusejs.io) + * + * Copyright (c) 2022 Kiro Risk (http://kiro.me) + * All Rights Reserved. Apache Software License 2.0 + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +var e,t;e=this,t=function(){"use strict";function e(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function t(t){for(var n=1;ne.length)&&(t=e.length);for(var n=0,r=new Array(t);n0&&void 0!==arguments[0]?arguments[0]:1,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:3,n=new Map,r=Math.pow(10,t);return{get:function(t){var i=t.match(C).length;if(n.has(i))return n.get(i);var o=1/Math.pow(i,.5*e),c=parseFloat(Math.round(o*r)/r);return n.set(i,c),c},clear:function(){n.clear()}}}var $=function(){function e(){var t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},n=t.getFn,i=void 0===n?I.getFn:n,o=t.fieldNormWeight,c=void 0===o?I.fieldNormWeight:o;r(this,e),this.norm=E(c,3),this.getFn=i,this.isCreated=!1,this.setIndexRecords()}return o(e,[{key:"setSources",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.docs=e}},{key:"setIndexRecords",value:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.records=e}},{key:"setKeys",value:function(){var e=this,t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[];this.keys=t,this._keysMap={},t.forEach((function(t,n){e._keysMap[t.id]=n}))}},{key:"create",value:function(){var e=this;!this.isCreated&&this.docs.length&&(this.isCreated=!0,g(this.docs[0])?this.docs.forEach((function(t,n){e._addString(t,n)})):this.docs.forEach((function(t,n){e._addObject(t,n)})),this.norm.clear())}},{key:"add",value:function(e){var t=this.size();g(e)?this._addString(e,t):this._addObject(e,t)}},{key:"removeAt",value:function(e){this.records.splice(e,1);for(var t=e,n=this.size();t2&&void 0!==arguments[2]?arguments[2]:{},r=n.getFn,i=void 0===r?I.getFn:r,o=n.fieldNormWeight,c=void 0===o?I.fieldNormWeight:o,a=new $({getFn:i,fieldNormWeight:c});return a.setKeys(e.map(_)),a.setSources(t),a.create(),a}function R(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},n=t.errors,r=void 0===n?0:n,i=t.currentLocation,o=void 0===i?0:i,c=t.expectedLocation,a=void 0===c?0:c,s=t.distance,u=void 0===s?I.distance:s,h=t.ignoreLocation,l=void 0===h?I.ignoreLocation:h,f=r/e.length;if(l)return f;var d=Math.abs(a-o);return u?f+d/u:d?1:f}function N(){for(var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:[],t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:I.minMatchCharLength,n=[],r=-1,i=-1,o=0,c=e.length;o=t&&n.push([r,i]),r=-1)}return e[o-1]&&o-r>=t&&n.push([r,o-1]),n}var P=32;function W(e){for(var t={},n=0,r=e.length;n1&&void 0!==arguments[1]?arguments[1]:{},o=i.location,c=void 0===o?I.location:o,a=i.threshold,s=void 0===a?I.threshold:a,u=i.distance,h=void 0===u?I.distance:u,l=i.includeMatches,f=void 0===l?I.includeMatches:l,d=i.findAllMatches,v=void 0===d?I.findAllMatches:d,g=i.minMatchCharLength,y=void 0===g?I.minMatchCharLength:g,p=i.isCaseSensitive,m=void 0===p?I.isCaseSensitive:p,k=i.ignoreLocation,M=void 0===k?I.ignoreLocation:k;if(r(this,e),this.options={location:c,threshold:s,distance:h,includeMatches:f,findAllMatches:v,minMatchCharLength:y,isCaseSensitive:m,ignoreLocation:M},this.pattern=m?t:t.toLowerCase(),this.chunks=[],this.pattern.length){var b=function(e,t){n.chunks.push({pattern:e,alphabet:W(e),startIndex:t})},x=this.pattern.length;if(x>P){for(var w=0,L=x%P,S=x-L;w3&&void 0!==arguments[3]?arguments[3]:{},i=r.location,o=void 0===i?I.location:i,c=r.distance,a=void 0===c?I.distance:c,s=r.threshold,u=void 0===s?I.threshold:s,h=r.findAllMatches,l=void 0===h?I.findAllMatches:h,f=r.minMatchCharLength,d=void 0===f?I.minMatchCharLength:f,v=r.includeMatches,g=void 0===v?I.includeMatches:v,y=r.ignoreLocation,p=void 0===y?I.ignoreLocation:y;if(t.length>P)throw new Error(w(P));for(var m,k=t.length,M=e.length,b=Math.max(0,Math.min(o,M)),x=u,L=b,S=d>1||g,_=S?Array(M):[];(m=e.indexOf(t,L))>-1;){var O=R(t,{currentLocation:m,expectedLocation:b,distance:a,ignoreLocation:p});if(x=Math.min(O,x),L=m+k,S)for(var j=0;j=z;q-=1){var B=q-1,J=n[e.charAt(B)];if(S&&(_[B]=+!!J),K[q]=(K[q+1]<<1|1)&J,F&&(K[q]|=(A[q+1]|A[q])<<1|1|A[q+1]),K[q]&$&&(C=R(t,{errors:F,currentLocation:B,expectedLocation:b,distance:a,ignoreLocation:p}))<=x){if(x=C,(L=B)<=b)break;z=Math.max(1,2*b-L)}}if(R(t,{errors:F+1,currentLocation:b,expectedLocation:b,distance:a,ignoreLocation:p})>x)break;A=K}var U={isMatch:L>=0,score:Math.max(.001,C)};if(S){var V=N(_,d);V.length?g&&(U.indices=V):U.isMatch=!1}return U}(e,n,i,{location:c+o,distance:a,threshold:s,findAllMatches:u,minMatchCharLength:h,includeMatches:r,ignoreLocation:l}),p=y.isMatch,m=y.score,k=y.indices;p&&(g=!0),v+=m,p&&k&&(d=[].concat(f(d),f(k)))}));var y={isMatch:g,score:g?v/this.chunks.length:1};return g&&r&&(y.indices=d),y}}]),e}(),z=function(){function e(t){r(this,e),this.pattern=t}return o(e,[{key:"search",value:function(){}}],[{key:"isMultiMatch",value:function(e){return D(e,this.multiRegex)}},{key:"isSingleMatch",value:function(e){return D(e,this.singleRegex)}}]),e}();function D(e,t){var n=e.match(t);return n?n[1]:null}var K=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=e===this.pattern;return{isMatch:t,score:t?0:1,indices:[0,this.pattern.length-1]}}}],[{key:"type",get:function(){return"exact"}},{key:"multiRegex",get:function(){return/^="(.*)"$/}},{key:"singleRegex",get:function(){return/^=(.*)$/}}]),n}(z),q=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=-1===e.indexOf(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,e.length-1]}}}],[{key:"type",get:function(){return"inverse-exact"}},{key:"multiRegex",get:function(){return/^!"(.*)"$/}},{key:"singleRegex",get:function(){return/^!(.*)$/}}]),n}(z),B=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=e.startsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,this.pattern.length-1]}}}],[{key:"type",get:function(){return"prefix-exact"}},{key:"multiRegex",get:function(){return/^\^"(.*)"$/}},{key:"singleRegex",get:function(){return/^\^(.*)$/}}]),n}(z),J=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=!e.startsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,e.length-1]}}}],[{key:"type",get:function(){return"inverse-prefix-exact"}},{key:"multiRegex",get:function(){return/^!\^"(.*)"$/}},{key:"singleRegex",get:function(){return/^!\^(.*)$/}}]),n}(z),U=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=e.endsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[e.length-this.pattern.length,e.length-1]}}}],[{key:"type",get:function(){return"suffix-exact"}},{key:"multiRegex",get:function(){return/^"(.*)"\$$/}},{key:"singleRegex",get:function(){return/^(.*)\$$/}}]),n}(z),V=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){var t=!e.endsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,e.length-1]}}}],[{key:"type",get:function(){return"inverse-suffix-exact"}},{key:"multiRegex",get:function(){return/^!"(.*)"\$$/}},{key:"singleRegex",get:function(){return/^!(.*)\$$/}}]),n}(z),G=function(e){a(n,e);var t=l(n);function n(e){var i,o=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},c=o.location,a=void 0===c?I.location:c,s=o.threshold,u=void 0===s?I.threshold:s,h=o.distance,l=void 0===h?I.distance:h,f=o.includeMatches,d=void 0===f?I.includeMatches:f,v=o.findAllMatches,g=void 0===v?I.findAllMatches:v,y=o.minMatchCharLength,p=void 0===y?I.minMatchCharLength:y,m=o.isCaseSensitive,k=void 0===m?I.isCaseSensitive:m,M=o.ignoreLocation,b=void 0===M?I.ignoreLocation:M;return r(this,n),(i=t.call(this,e))._bitapSearch=new T(e,{location:a,threshold:u,distance:l,includeMatches:d,findAllMatches:g,minMatchCharLength:p,isCaseSensitive:k,ignoreLocation:b}),i}return o(n,[{key:"search",value:function(e){return this._bitapSearch.searchIn(e)}}],[{key:"type",get:function(){return"fuzzy"}},{key:"multiRegex",get:function(){return/^"(.*)"$/}},{key:"singleRegex",get:function(){return/^(.*)$/}}]),n}(z),H=function(e){a(n,e);var t=l(n);function n(e){return r(this,n),t.call(this,e)}return o(n,[{key:"search",value:function(e){for(var t,n=0,r=[],i=this.pattern.length;(t=e.indexOf(this.pattern,n))>-1;)n=t+i,r.push([t,n-1]);var o=!!r.length;return{isMatch:o,score:o?0:1,indices:r}}}],[{key:"type",get:function(){return"include"}},{key:"multiRegex",get:function(){return/^'"(.*)"$/}},{key:"singleRegex",get:function(){return/^'(.*)$/}}]),n}(z),Q=[K,H,B,J,V,U,q,G],X=Q.length,Y=/ +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/;function Z(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};return e.split("|").map((function(e){for(var n=e.trim().split(Y).filter((function(e){return e&&!!e.trim()})),r=[],i=0,o=n.length;i1&&void 0!==arguments[1]?arguments[1]:{},i=n.isCaseSensitive,o=void 0===i?I.isCaseSensitive:i,c=n.includeMatches,a=void 0===c?I.includeMatches:c,s=n.minMatchCharLength,u=void 0===s?I.minMatchCharLength:s,h=n.ignoreLocation,l=void 0===h?I.ignoreLocation:h,f=n.findAllMatches,d=void 0===f?I.findAllMatches:f,v=n.location,g=void 0===v?I.location:v,y=n.threshold,p=void 0===y?I.threshold:y,m=n.distance,k=void 0===m?I.distance:m;r(this,e),this.query=null,this.options={isCaseSensitive:o,includeMatches:a,minMatchCharLength:u,findAllMatches:d,ignoreLocation:l,location:g,threshold:p,distance:k},this.pattern=o?t:t.toLowerCase(),this.query=Z(this.pattern,this.options)}return o(e,[{key:"searchIn",value:function(e){var t=this.query;if(!t)return{isMatch:!1,score:1};var n=this.options,r=n.includeMatches;e=n.isCaseSensitive?e:e.toLowerCase();for(var i=0,o=[],c=0,a=0,s=t.length;a-1&&(n.refIndex=e.idx),t.matches.push(n)}}))}function ve(e,t){t.score=e.score}function ge(e,t){var n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{},r=n.includeMatches,i=void 0===r?I.includeMatches:r,o=n.includeScore,c=void 0===o?I.includeScore:o,a=[];return i&&a.push(de),c&&a.push(ve),e.map((function(e){var n=e.idx,r={item:t[n],refIndex:n};return a.length&&a.forEach((function(t){t(e,r)})),r}))}var ye=function(){function e(n){var i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},o=arguments.length>2?arguments[2]:void 0;r(this,e),this.options=t(t({},I),i),this.options.useExtendedSearch,this._keyStore=new S(this.options.keys),this.setCollection(n,o)}return o(e,[{key:"setCollection",value:function(e,t){if(this._docs=e,t&&!(t instanceof $))throw new Error("Incorrect 'index' type");this._myIndex=t||F(this.options.keys,this._docs,{getFn:this.options.getFn,fieldNormWeight:this.options.fieldNormWeight})}},{key:"add",value:function(e){k(e)&&(this._docs.push(e),this._myIndex.add(e))}},{key:"remove",value:function(){for(var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:function(){return!1},t=[],n=0,r=this._docs.length;n1&&void 0!==arguments[1]?arguments[1]:{},n=t.limit,r=void 0===n?-1:n,i=this.options,o=i.includeMatches,c=i.includeScore,a=i.shouldSort,s=i.sortFn,u=i.ignoreFieldNorm,h=g(e)?g(this._docs[0])?this._searchStringList(e):this._searchObjectList(e):this._searchLogical(e);return fe(h,{ignoreFieldNorm:u}),a&&h.sort(s),y(r)&&r>-1&&(h=h.slice(0,r)),ge(h,this._docs,{includeMatches:o,includeScore:c})}},{key:"_searchStringList",value:function(e){var t=re(e,this.options),n=this._myIndex.records,r=[];return n.forEach((function(e){var n=e.v,i=e.i,o=e.n;if(k(n)){var c=t.searchIn(n),a=c.isMatch,s=c.score,u=c.indices;a&&r.push({item:n,idx:i,matches:[{score:s,value:n,norm:o,indices:u}]})}})),r}},{key:"_searchLogical",value:function(e){var t=this,n=function(e,t){var n=(arguments.length>2&&void 0!==arguments[2]?arguments[2]:{}).auto,r=void 0===n||n,i=function e(n){var i=Object.keys(n),o=ue(n);if(!o&&i.length>1&&!se(n))return e(le(n));if(he(n)){var c=o?n[ce]:i[0],a=o?n[ae]:n[c];if(!g(a))throw new Error(x(c));var s={keyId:j(c),pattern:a};return r&&(s.searcher=re(a,t)),s}var u={children:[],operator:i[0]};return i.forEach((function(t){var r=n[t];v(r)&&r.forEach((function(t){u.children.push(e(t))}))})),u};return se(e)||(e=le(e)),i(e)}(e,this.options),r=function e(n,r,i){if(!n.children){var o=n.keyId,c=n.searcher,a=t._findMatches({key:t._keyStore.get(o),value:t._myIndex.getValueForItemAtKeyId(r,o),searcher:c});return a&&a.length?[{idx:i,item:r,matches:a}]:[]}for(var s=[],u=0,h=n.children.length;u1&&void 0!==arguments[1]?arguments[1]:{},n=t.getFn,r=void 0===n?I.getFn:n,i=t.fieldNormWeight,o=void 0===i?I.fieldNormWeight:i,c=e.keys,a=e.records,s=new $({getFn:r,fieldNormWeight:o});return s.setKeys(c),s.setIndexRecords(a),s},ye.config=I,function(){ne.push.apply(ne,arguments)}(te),ye},"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e="undefined"!=typeof globalThis?globalThis:e||self).Fuse=t(); \ No newline at end of file diff --git a/docs/site_libs/quarto-search/quarto-search.js b/docs/site_libs/quarto-search/quarto-search.js new file mode 100644 index 00000000..f5d852d1 --- /dev/null +++ b/docs/site_libs/quarto-search/quarto-search.js @@ -0,0 +1,1140 @@ +const kQueryArg = "q"; +const kResultsArg = "show-results"; + +// If items don't provide a URL, then both the navigator and the onSelect +// function aren't called (and therefore, the default implementation is used) +// +// We're using this sentinel URL to signal to those handlers that this +// item is a more item (along with the type) and can be handled appropriately +const kItemTypeMoreHref = "0767FDFD-0422-4E5A-BC8A-3BE11E5BBA05"; + +window.document.addEventListener("DOMContentLoaded", function (_event) { + // Ensure that search is available on this page. If it isn't, + // should return early and not do anything + var searchEl = window.document.getElementById("quarto-search"); + if (!searchEl) return; + + const { autocomplete } = window["@algolia/autocomplete-js"]; + + let quartoSearchOptions = {}; + let language = {}; + const searchOptionEl = window.document.getElementById( + "quarto-search-options" + ); + if (searchOptionEl) { + const jsonStr = searchOptionEl.textContent; + quartoSearchOptions = JSON.parse(jsonStr); + language = quartoSearchOptions.language; + } + + // note the search mode + if (quartoSearchOptions.type === "overlay") { + searchEl.classList.add("type-overlay"); + } else { + searchEl.classList.add("type-textbox"); + } + + // Used to determine highlighting behavior for this page + // A `q` query param is expected when the user follows a search + // to this page + const currentUrl = new URL(window.location); + const query = currentUrl.searchParams.get(kQueryArg); + const showSearchResults = currentUrl.searchParams.get(kResultsArg); + const mainEl = window.document.querySelector("main"); + + // highlight matches on the page + if (query !== null && mainEl) { + // perform any highlighting + highlight(escapeRegExp(query), mainEl); + + // fix up the URL to remove the q query param + const replacementUrl = new URL(window.location); + replacementUrl.searchParams.delete(kQueryArg); + window.history.replaceState({}, "", replacementUrl); + } + + // function to clear highlighting on the page when the search query changes + // (e.g. if the user edits the query or clears it) + let highlighting = true; + const resetHighlighting = (searchTerm) => { + if (mainEl && highlighting && query !== null && searchTerm !== query) { + clearHighlight(query, mainEl); + highlighting = false; + } + }; + + // Clear search highlighting when the user scrolls sufficiently + const resetFn = () => { + resetHighlighting(""); + window.removeEventListener("quarto-hrChanged", resetFn); + window.removeEventListener("quarto-sectionChanged", resetFn); + }; + + // Register this event after the initial scrolling and settling of events + // on the page + window.addEventListener("quarto-hrChanged", resetFn); + window.addEventListener("quarto-sectionChanged", resetFn); + + // Responsively switch to overlay mode if the search is present on the navbar + // Note that switching the sidebar to overlay mode requires more coordinate (not just + // the media query since we generate different HTML for sidebar overlays than we do + // for sidebar input UI) + const detachedMediaQuery = + quartoSearchOptions.type === "overlay" ? "all" : "(max-width: 991px)"; + + // If configured, include the analytics client to send insights + const plugins = configurePlugins(quartoSearchOptions); + + let lastState = null; + const { setIsOpen, setQuery, setCollections } = autocomplete({ + container: searchEl, + detachedMediaQuery: detachedMediaQuery, + defaultActiveItemId: 0, + panelContainer: "#quarto-search-results", + panelPlacement: quartoSearchOptions["panel-placement"], + debug: false, + openOnFocus: true, + plugins, + classNames: { + form: "d-flex", + }, + translations: { + clearButtonTitle: language["search-clear-button-title"], + detachedCancelButtonText: language["search-detached-cancel-button-title"], + submitButtonTitle: language["search-submit-button-title"], + }, + initialState: { + query, + }, + getItemUrl({ item }) { + return item.href; + }, + onStateChange({ state }) { + // Perhaps reset highlighting + resetHighlighting(state.query); + + // If the panel just opened, ensure the panel is positioned properly + if (state.isOpen) { + if (lastState && !lastState.isOpen) { + setTimeout(() => { + positionPanel(quartoSearchOptions["panel-placement"]); + }, 150); + } + } + + // Perhaps show the copy link + showCopyLink(state.query, quartoSearchOptions); + + lastState = state; + }, + reshape({ sources, state }) { + return sources.map((source) => { + try { + const items = source.getItems(); + + // Validate the items + validateItems(items); + + // group the items by document + const groupedItems = new Map(); + items.forEach((item) => { + const hrefParts = item.href.split("#"); + const baseHref = hrefParts[0]; + const isDocumentItem = hrefParts.length === 1; + + const items = groupedItems.get(baseHref); + if (!items) { + groupedItems.set(baseHref, [item]); + } else { + // If the href for this item matches the document + // exactly, place this item first as it is the item that represents + // the document itself + if (isDocumentItem) { + items.unshift(item); + } else { + items.push(item); + } + groupedItems.set(baseHref, items); + } + }); + + const reshapedItems = []; + let count = 1; + for (const [_key, value] of groupedItems) { + const firstItem = value[0]; + reshapedItems.push({ + ...firstItem, + type: kItemTypeDoc, + }); + + const collapseMatches = quartoSearchOptions["collapse-after"]; + const collapseCount = + typeof collapseMatches === "number" ? collapseMatches : 1; + + if (value.length > 1) { + const target = `search-more-${count}`; + const isExpanded = + state.context.expanded && + state.context.expanded.includes(target); + + const remainingCount = value.length - collapseCount; + + for (let i = 1; i < value.length; i++) { + if (collapseMatches && i === collapseCount) { + reshapedItems.push({ + target, + title: isExpanded + ? language["search-hide-matches-text"] + : remainingCount === 1 + ? `${remainingCount} ${language["search-more-match-text"]}` + : `${remainingCount} ${language["search-more-matches-text"]}`, + type: kItemTypeMore, + href: kItemTypeMoreHref, + }); + } + + if (isExpanded || !collapseMatches || i < collapseCount) { + reshapedItems.push({ + ...value[i], + type: kItemTypeItem, + target, + }); + } + } + } + count += 1; + } + + return { + ...source, + getItems() { + return reshapedItems; + }, + }; + } catch (error) { + // Some form of error occurred + return { + ...source, + getItems() { + return [ + { + title: error.name || "An Error Occurred While Searching", + text: + error.message || + "An unknown error occurred while attempting to perform the requested search.", + type: kItemTypeError, + }, + ]; + }, + }; + } + }); + }, + navigator: { + navigate({ itemUrl }) { + if (itemUrl !== offsetURL(kItemTypeMoreHref)) { + window.location.assign(itemUrl); + } + }, + navigateNewTab({ itemUrl }) { + if (itemUrl !== offsetURL(kItemTypeMoreHref)) { + const windowReference = window.open(itemUrl, "_blank", "noopener"); + if (windowReference) { + windowReference.focus(); + } + } + }, + navigateNewWindow({ itemUrl }) { + if (itemUrl !== offsetURL(kItemTypeMoreHref)) { + window.open(itemUrl, "_blank", "noopener"); + } + }, + }, + getSources({ state, setContext, setActiveItemId, refresh }) { + return [ + { + sourceId: "documents", + getItemUrl({ item }) { + if (item.href) { + return offsetURL(item.href); + } else { + return undefined; + } + }, + onSelect({ + item, + state, + setContext, + setIsOpen, + setActiveItemId, + refresh, + }) { + if (item.type === kItemTypeMore) { + toggleExpanded(item, state, setContext, setActiveItemId, refresh); + + // Toggle more + setIsOpen(true); + } + }, + getItems({ query }) { + if (query === null || query === "") { + return []; + } + + const limit = quartoSearchOptions.limit; + if (quartoSearchOptions.algolia) { + return algoliaSearch(query, limit, quartoSearchOptions.algolia); + } else { + // Fuse search options + const fuseSearchOptions = { + isCaseSensitive: false, + shouldSort: true, + minMatchCharLength: 2, + limit: limit, + }; + + return readSearchData().then(function (fuse) { + return fuseSearch(query, fuse, fuseSearchOptions); + }); + } + }, + templates: { + noResults({ createElement }) { + const hasQuery = lastState.query; + + return createElement( + "div", + { + class: `quarto-search-no-results${ + hasQuery ? "" : " no-query" + }`, + }, + language["search-no-results-text"] + ); + }, + header({ items, createElement }) { + // count the documents + const count = items.filter((item) => { + return item.type === kItemTypeDoc; + }).length; + + if (count > 0) { + return createElement( + "div", + { class: "search-result-header" }, + `${count} ${language["search-matching-documents-text"]}` + ); + } else { + return createElement( + "div", + { class: "search-result-header-no-results" }, + `` + ); + } + }, + footer({ _items, createElement }) { + if ( + quartoSearchOptions.algolia && + quartoSearchOptions.algolia["show-logo"] + ) { + const libDir = quartoSearchOptions.algolia["libDir"]; + const logo = createElement("img", { + src: offsetURL( + `${libDir}/quarto-search/search-by-algolia.svg` + ), + class: "algolia-search-logo", + }); + return createElement( + "a", + { href: "http://www.algolia.com/" }, + logo + ); + } + }, + + item({ item, createElement }) { + return renderItem( + item, + createElement, + state, + setActiveItemId, + setContext, + refresh + ); + }, + }, + }, + ]; + }, + }); + + window.quartoOpenSearch = () => { + setIsOpen(false); + setIsOpen(true); + focusSearchInput(); + }; + + // Remove the labeleledby attribute since it is pointing + // to a non-existent label + if (quartoSearchOptions.type === "overlay") { + const inputEl = window.document.querySelector( + "#quarto-search .aa-Autocomplete" + ); + if (inputEl) { + inputEl.removeAttribute("aria-labelledby"); + } + } + + // If the main document scrolls dismiss the search results + // (otherwise, since they're floating in the document they can scroll with the document) + window.document.body.onscroll = () => { + setIsOpen(false); + }; + + if (showSearchResults) { + setIsOpen(true); + focusSearchInput(); + } +}); + +function configurePlugins(quartoSearchOptions) { + const autocompletePlugins = []; + const algoliaOptions = quartoSearchOptions.algolia; + if ( + algoliaOptions && + algoliaOptions["analytics-events"] && + algoliaOptions["search-only-api-key"] && + algoliaOptions["application-id"] + ) { + const apiKey = algoliaOptions["search-only-api-key"]; + const appId = algoliaOptions["application-id"]; + + // Aloglia insights may not be loaded because they require cookie consent + // Use deferred loading so events will start being recorded when/if consent + // is granted. + const algoliaInsightsDeferredPlugin = deferredLoadPlugin(() => { + if ( + window.aa && + window["@algolia/autocomplete-plugin-algolia-insights"] + ) { + window.aa("init", { + appId, + apiKey, + useCookie: true, + }); + + const { createAlgoliaInsightsPlugin } = + window["@algolia/autocomplete-plugin-algolia-insights"]; + // Register the insights client + const algoliaInsightsPlugin = createAlgoliaInsightsPlugin({ + insightsClient: window.aa, + onItemsChange({ insights, insightsEvents }) { + const events = insightsEvents.map((event) => { + const maxEvents = event.objectIDs.slice(0, 20); + return { + ...event, + objectIDs: maxEvents, + }; + }); + + insights.viewedObjectIDs(...events); + }, + }); + return algoliaInsightsPlugin; + } + }); + + // Add the plugin + autocompletePlugins.push(algoliaInsightsDeferredPlugin); + return autocompletePlugins; + } +} + +// For plugins that may not load immediately, create a wrapper +// plugin and forward events and plugin data once the plugin +// is initialized. This is useful for cases like cookie consent +// which may prevent the analytics insights event plugin from initializing +// immediately. +function deferredLoadPlugin(createPlugin) { + let plugin = undefined; + let subscribeObj = undefined; + const wrappedPlugin = () => { + if (!plugin && subscribeObj) { + plugin = createPlugin(); + if (plugin && plugin.subscribe) { + plugin.subscribe(subscribeObj); + } + } + return plugin; + }; + + return { + subscribe: (obj) => { + subscribeObj = obj; + }, + onStateChange: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.onStateChange) { + plugin.onStateChange(obj); + } + }, + onSubmit: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.onSubmit) { + plugin.onSubmit(obj); + } + }, + onReset: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.onReset) { + plugin.onReset(obj); + } + }, + getSources: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.getSources) { + return plugin.getSources(obj); + } else { + return Promise.resolve([]); + } + }, + data: (obj) => { + const plugin = wrappedPlugin(); + if (plugin && plugin.data) { + plugin.data(obj); + } + }, + }; +} + +function validateItems(items) { + // Validate the first item + if (items.length > 0) { + const item = items[0]; + const missingFields = []; + if (item.href == undefined) { + missingFields.push("href"); + } + if (!item.title == undefined) { + missingFields.push("title"); + } + if (!item.text == undefined) { + missingFields.push("text"); + } + + if (missingFields.length === 1) { + throw { + name: `Error: Search index is missing the ${missingFields[0]} field.`, + message: `The items being returned for this search do not include all the required fields. Please ensure that your index items include the ${missingFields[0]} field or use index-fields in your _quarto.yml file to specify the field names.`, + }; + } else if (missingFields.length > 1) { + const missingFieldList = missingFields + .map((field) => { + return `${field}`; + }) + .join(", "); + + throw { + name: `Error: Search index is missing the following fields: ${missingFieldList}.`, + message: `The items being returned for this search do not include all the required fields. Please ensure that your index items includes the following fields: ${missingFieldList}, or use index-fields in your _quarto.yml file to specify the field names.`, + }; + } + } +} + +let lastQuery = null; +function showCopyLink(query, options) { + const language = options.language; + lastQuery = query; + // Insert share icon + const inputSuffixEl = window.document.body.querySelector( + ".aa-Form .aa-InputWrapperSuffix" + ); + + if (inputSuffixEl) { + let copyButtonEl = window.document.body.querySelector( + ".aa-Form .aa-InputWrapperSuffix .aa-CopyButton" + ); + + if (copyButtonEl === null) { + copyButtonEl = window.document.createElement("button"); + copyButtonEl.setAttribute("class", "aa-CopyButton"); + copyButtonEl.setAttribute("type", "button"); + copyButtonEl.setAttribute("title", language["search-copy-link-title"]); + copyButtonEl.onmousedown = (e) => { + e.preventDefault(); + e.stopPropagation(); + }; + + const linkIcon = "bi-clipboard"; + const checkIcon = "bi-check2"; + + const shareIconEl = window.document.createElement("i"); + shareIconEl.setAttribute("class", `bi ${linkIcon}`); + copyButtonEl.appendChild(shareIconEl); + inputSuffixEl.prepend(copyButtonEl); + + const clipboard = new window.ClipboardJS(".aa-CopyButton", { + text: function (_trigger) { + const copyUrl = new URL(window.location); + copyUrl.searchParams.set(kQueryArg, lastQuery); + copyUrl.searchParams.set(kResultsArg, "1"); + return copyUrl.toString(); + }, + }); + clipboard.on("success", function (e) { + // Focus the input + + // button target + const button = e.trigger; + const icon = button.querySelector("i.bi"); + + // flash "checked" + icon.classList.add(checkIcon); + icon.classList.remove(linkIcon); + setTimeout(function () { + icon.classList.remove(checkIcon); + icon.classList.add(linkIcon); + }, 1000); + }); + } + + // If there is a query, show the link icon + if (copyButtonEl) { + if (lastQuery && options["copy-button"]) { + copyButtonEl.style.display = "flex"; + } else { + copyButtonEl.style.display = "none"; + } + } + } +} + +/* Search Index Handling */ +// create the index +var fuseIndex = undefined; +async function readSearchData() { + // Initialize the search index on demand + if (fuseIndex === undefined) { + // create fuse index + const options = { + keys: [ + { name: "title", weight: 20 }, + { name: "section", weight: 20 }, + { name: "text", weight: 10 }, + ], + ignoreLocation: true, + threshold: 0.1, + }; + const fuse = new window.Fuse([], options); + + // fetch the main search.json + const response = await fetch(offsetURL("search.json")); + if (response.status == 200) { + return response.json().then(function (searchDocs) { + searchDocs.forEach(function (searchDoc) { + fuse.add(searchDoc); + }); + fuseIndex = fuse; + return fuseIndex; + }); + } else { + return Promise.reject( + new Error( + "Unexpected status from search index request: " + response.status + ) + ); + } + } + return fuseIndex; +} + +function inputElement() { + return window.document.body.querySelector(".aa-Form .aa-Input"); +} + +function focusSearchInput() { + setTimeout(() => { + const inputEl = inputElement(); + if (inputEl) { + inputEl.focus(); + } + }, 50); +} + +/* Panels */ +const kItemTypeDoc = "document"; +const kItemTypeMore = "document-more"; +const kItemTypeItem = "document-item"; +const kItemTypeError = "error"; + +function renderItem( + item, + createElement, + state, + setActiveItemId, + setContext, + refresh +) { + switch (item.type) { + case kItemTypeDoc: + return createDocumentCard( + createElement, + "file-richtext", + item.title, + item.section, + item.text, + item.href + ); + case kItemTypeMore: + return createMoreCard( + createElement, + item, + state, + setActiveItemId, + setContext, + refresh + ); + case kItemTypeItem: + return createSectionCard( + createElement, + item.section, + item.text, + item.href + ); + case kItemTypeError: + return createErrorCard(createElement, item.title, item.text); + default: + return undefined; + } +} + +function createDocumentCard(createElement, icon, title, section, text, href) { + const iconEl = createElement("i", { + class: `bi bi-${icon} search-result-icon`, + }); + const titleEl = createElement("p", { class: "search-result-title" }, title); + const titleContainerEl = createElement( + "div", + { class: "search-result-title-container" }, + [iconEl, titleEl] + ); + + const textEls = []; + if (section) { + const sectionEl = createElement( + "p", + { class: "search-result-section" }, + section + ); + textEls.push(sectionEl); + } + const descEl = createElement("p", { + class: "search-result-text", + dangerouslySetInnerHTML: { + __html: text, + }, + }); + textEls.push(descEl); + + const textContainerEl = createElement( + "div", + { class: "search-result-text-container" }, + textEls + ); + + const containerEl = createElement( + "div", + { + class: "search-result-container", + }, + [titleContainerEl, textContainerEl] + ); + + const linkEl = createElement( + "a", + { + href: offsetURL(href), + class: "search-result-link", + }, + containerEl + ); + + const classes = ["search-result-doc", "search-item"]; + if (!section) { + classes.push("document-selectable"); + } + + return createElement( + "div", + { + class: classes.join(" "), + }, + linkEl + ); +} + +function createMoreCard( + createElement, + item, + state, + setActiveItemId, + setContext, + refresh +) { + const moreCardEl = createElement( + "div", + { + class: "search-result-more search-item", + onClick: (e) => { + // Handle expanding the sections by adding the expanded + // section to the list of expanded sections + toggleExpanded(item, state, setContext, setActiveItemId, refresh); + e.stopPropagation(); + }, + }, + item.title + ); + + return moreCardEl; +} + +function toggleExpanded(item, state, setContext, setActiveItemId, refresh) { + const expanded = state.context.expanded || []; + if (expanded.includes(item.target)) { + setContext({ + expanded: expanded.filter((target) => target !== item.target), + }); + } else { + setContext({ expanded: [...expanded, item.target] }); + } + + refresh(); + setActiveItemId(item.__autocomplete_id); +} + +function createSectionCard(createElement, section, text, href) { + const sectionEl = createSection(createElement, section, text, href); + return createElement( + "div", + { + class: "search-result-doc-section search-item", + }, + sectionEl + ); +} + +function createSection(createElement, title, text, href) { + const descEl = createElement("p", { + class: "search-result-text", + dangerouslySetInnerHTML: { + __html: text, + }, + }); + + const titleEl = createElement("p", { class: "search-result-section" }, title); + const linkEl = createElement( + "a", + { + href: offsetURL(href), + class: "search-result-link", + }, + [titleEl, descEl] + ); + return linkEl; +} + +function createErrorCard(createElement, title, text) { + const descEl = createElement("p", { + class: "search-error-text", + dangerouslySetInnerHTML: { + __html: text, + }, + }); + + const titleEl = createElement("p", { + class: "search-error-title", + dangerouslySetInnerHTML: { + __html: ` ${title}`, + }, + }); + const errorEl = createElement("div", { class: "search-error" }, [ + titleEl, + descEl, + ]); + return errorEl; +} + +function positionPanel(pos) { + const panelEl = window.document.querySelector( + "#quarto-search-results .aa-Panel" + ); + const inputEl = window.document.querySelector( + "#quarto-search .aa-Autocomplete" + ); + + if (panelEl && inputEl) { + panelEl.style.top = `${Math.round(panelEl.offsetTop)}px`; + if (pos === "start") { + panelEl.style.left = `${Math.round(inputEl.left)}px`; + } else { + panelEl.style.right = `${Math.round(inputEl.offsetRight)}px`; + } + } +} + +/* Highlighting */ +// highlighting functions +function highlightMatch(query, text) { + if (text) { + const start = text.toLowerCase().indexOf(query.toLowerCase()); + if (start !== -1) { + const startMark = ""; + const endMark = ""; + + const end = start + query.length; + text = + text.slice(0, start) + + startMark + + text.slice(start, end) + + endMark + + text.slice(end); + const startInfo = clipStart(text, start); + const endInfo = clipEnd( + text, + startInfo.position + startMark.length + endMark.length + ); + text = + startInfo.prefix + + text.slice(startInfo.position, endInfo.position) + + endInfo.suffix; + + return text; + } else { + return text; + } + } else { + return text; + } +} + +function clipStart(text, pos) { + const clipStart = pos - 50; + if (clipStart < 0) { + // This will just return the start of the string + return { + position: 0, + prefix: "", + }; + } else { + // We're clipping before the start of the string, walk backwards to the first space. + const spacePos = findSpace(text, pos, -1); + return { + position: spacePos.position, + prefix: "", + }; + } +} + +function clipEnd(text, pos) { + const clipEnd = pos + 200; + if (clipEnd > text.length) { + return { + position: text.length, + suffix: "", + }; + } else { + const spacePos = findSpace(text, clipEnd, 1); + return { + position: spacePos.position, + suffix: spacePos.clipped ? "…" : "", + }; + } +} + +function findSpace(text, start, step) { + let stepPos = start; + while (stepPos > -1 && stepPos < text.length) { + const char = text[stepPos]; + if (char === " " || char === "," || char === ":") { + return { + position: step === 1 ? stepPos : stepPos - step, + clipped: stepPos > 1 && stepPos < text.length, + }; + } + stepPos = stepPos + step; + } + + return { + position: stepPos - step, + clipped: false, + }; +} + +// removes highlighting as implemented by the mark tag +function clearHighlight(searchterm, el) { + const childNodes = el.childNodes; + for (let i = childNodes.length - 1; i >= 0; i--) { + const node = childNodes[i]; + if (node.nodeType === Node.ELEMENT_NODE) { + if ( + node.tagName === "MARK" && + node.innerText.toLowerCase() === searchterm.toLowerCase() + ) { + el.replaceChild(document.createTextNode(node.innerText), node); + } else { + clearHighlight(searchterm, node); + } + } + } +} + +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string +} + +// highlight matches +function highlight(term, el) { + const termRegex = new RegExp(term, "ig"); + const childNodes = el.childNodes; + + // walk back to front avoid mutating elements in front of us + for (let i = childNodes.length - 1; i >= 0; i--) { + const node = childNodes[i]; + + if (node.nodeType === Node.TEXT_NODE) { + // Search text nodes for text to highlight + const text = node.nodeValue; + + let startIndex = 0; + let matchIndex = text.search(termRegex); + if (matchIndex > -1) { + const markFragment = document.createDocumentFragment(); + while (matchIndex > -1) { + const prefix = text.slice(startIndex, matchIndex); + markFragment.appendChild(document.createTextNode(prefix)); + + const mark = document.createElement("mark"); + mark.appendChild( + document.createTextNode( + text.slice(matchIndex, matchIndex + term.length) + ) + ); + markFragment.appendChild(mark); + + startIndex = matchIndex + term.length; + matchIndex = text.slice(startIndex).search(new RegExp(term, "ig")); + if (matchIndex > -1) { + matchIndex = startIndex + matchIndex; + } + } + if (startIndex < text.length) { + markFragment.appendChild( + document.createTextNode(text.slice(startIndex, text.length)) + ); + } + + el.replaceChild(markFragment, node); + } + } else if (node.nodeType === Node.ELEMENT_NODE) { + // recurse through elements + highlight(term, node); + } + } +} + +/* Link Handling */ +// get the offset from this page for a given site root relative url +function offsetURL(url) { + var offset = getMeta("quarto:offset"); + return offset ? offset + url : url; +} + +// read a meta tag value +function getMeta(metaName) { + var metas = window.document.getElementsByTagName("meta"); + for (let i = 0; i < metas.length; i++) { + if (metas[i].getAttribute("name") === metaName) { + return metas[i].getAttribute("content"); + } + } + return ""; +} + +function algoliaSearch(query, limit, algoliaOptions) { + const { getAlgoliaResults } = window["@algolia/autocomplete-preset-algolia"]; + + const applicationId = algoliaOptions["application-id"]; + const searchOnlyApiKey = algoliaOptions["search-only-api-key"]; + const indexName = algoliaOptions["index-name"]; + const indexFields = algoliaOptions["index-fields"]; + const searchClient = window.algoliasearch(applicationId, searchOnlyApiKey); + const searchParams = algoliaOptions["params"]; + const searchAnalytics = !!algoliaOptions["analytics-events"]; + + return getAlgoliaResults({ + searchClient, + queries: [ + { + indexName: indexName, + query, + params: { + hitsPerPage: limit, + clickAnalytics: searchAnalytics, + ...searchParams, + }, + }, + ], + transformResponse: (response) => { + if (!indexFields) { + return response.hits.map((hit) => { + return hit.map((item) => { + return { + ...item, + text: highlightMatch(query, item.text), + }; + }); + }); + } else { + const remappedHits = response.hits.map((hit) => { + return hit.map((item) => { + const newItem = { ...item }; + ["href", "section", "title", "text"].forEach((keyName) => { + const mappedName = indexFields[keyName]; + if ( + mappedName && + item[mappedName] !== undefined && + mappedName !== keyName + ) { + newItem[keyName] = item[mappedName]; + delete newItem[mappedName]; + } + }); + newItem.text = highlightMatch(query, newItem.text); + return newItem; + }); + }); + return remappedHits; + } + }, + }); +} + +function fuseSearch(query, fuse, fuseOptions) { + return fuse.search(query, fuseOptions).map((result) => { + const addParam = (url, name, value) => { + const anchorParts = url.split("#"); + const baseUrl = anchorParts[0]; + const sep = baseUrl.search("\\?") > 0 ? "&" : "?"; + anchorParts[0] = baseUrl + sep + name + "=" + value; + return anchorParts.join("#"); + }; + + return { + title: result.item.title, + section: result.item.section, + href: addParam(result.item.href, kQueryArg, query), + text: highlightMatch(query, result.item.text), + }; + }); +} diff --git a/content/find/parsnip/index_files/selectize/selectize.bootstrap3.css b/docs/site_libs/selectize-0.12.0/selectize.bootstrap3.css similarity index 100% rename from content/find/parsnip/index_files/selectize/selectize.bootstrap3.css rename to docs/site_libs/selectize-0.12.0/selectize.bootstrap3.css diff --git a/content/find/parsnip/index_files/selectize/selectize.min.js b/docs/site_libs/selectize-0.12.0/selectize.min.js similarity index 100% rename from content/find/parsnip/index_files/selectize/selectize.min.js rename to docs/site_libs/selectize-0.12.0/selectize.min.js diff --git a/content/start/case-study/figs/logistic-results-1.svg b/docs/start/case-study/figs/logistic-results-1.svg similarity index 100% rename from content/start/case-study/figs/logistic-results-1.svg rename to docs/start/case-study/figs/logistic-results-1.svg diff --git a/content/start/case-study/figs/logistic-roc-curve-1.svg b/docs/start/case-study/figs/logistic-roc-curve-1.svg similarity index 100% rename from content/start/case-study/figs/logistic-roc-curve-1.svg rename to docs/start/case-study/figs/logistic-roc-curve-1.svg diff --git a/docs/start/case-study/figs/lr-plot-lines-1.svg b/docs/start/case-study/figs/lr-plot-lines-1.svg new file mode 100644 index 00000000..dcc3c5e8 --- /dev/null +++ b/docs/start/case-study/figs/lr-plot-lines-1.svg @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.5 +0.6 +0.7 +0.8 + + + + + + + + +0.0001 +0.0010 +0.0100 +0.1000 +penalty +Area under the ROC Curve + + diff --git a/content/start/case-study/figs/rf-importance-1.svg b/docs/start/case-study/figs/rf-importance-1.svg similarity index 100% rename from content/start/case-study/figs/rf-importance-1.svg rename to docs/start/case-study/figs/rf-importance-1.svg diff --git a/content/start/case-study/figs/rf-lr-roc-curve-1.svg b/docs/start/case-study/figs/rf-lr-roc-curve-1.svg similarity index 100% rename from content/start/case-study/figs/rf-lr-roc-curve-1.svg rename to docs/start/case-study/figs/rf-lr-roc-curve-1.svg diff --git a/content/start/case-study/figs/rf-results-1.svg b/docs/start/case-study/figs/rf-results-1.svg similarity index 100% rename from content/start/case-study/figs/rf-results-1.svg rename to docs/start/case-study/figs/rf-results-1.svg diff --git a/content/start/case-study/figs/test-set-roc-curve-1.svg b/docs/start/case-study/figs/test-set-roc-curve-1.svg similarity index 100% rename from content/start/case-study/figs/test-set-roc-curve-1.svg rename to docs/start/case-study/figs/test-set-roc-curve-1.svg diff --git a/content/start/case-study/img/hotel.jpg b/docs/start/case-study/img/hotel.jpg similarity index 100% rename from content/start/case-study/img/hotel.jpg rename to docs/start/case-study/img/hotel.jpg diff --git a/content/start/case-study/img/validation-split.svg b/docs/start/case-study/img/validation-split.svg similarity index 100% rename from content/start/case-study/img/validation-split.svg rename to docs/start/case-study/img/validation-split.svg diff --git a/docs/start/case-study/index.html b/docs/start/case-study/index.html new file mode 100644 index 00000000..34eeac1d --- /dev/null +++ b/docs/start/case-study/index.html @@ -0,0 +1,1132 @@ + + + + + + + + + + +tidymodels - A predictive modeling case study + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    A predictive modeling case study

    +
    +
    model fitting
    +
    tuning
    +
    parsnip
    +
    recipes
    +
    rsample
    +
    workflows
    +
    tune
    +
    +
    + +
    +
    +

    Develop, from beginning to end, a predictive model using best practices.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    Each of the four previous Get Started articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you’ll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays.

    +
    +
    +
    +
    +

    +
    +
    +
    +
    +

    To use code in this article, you will need to install the following packages: glmnet, ranger, readr, tidymodels, and vip.

    +
    +
    library(tidymodels)  
    +
    +# Helper packages
    +library(readr)       # for importing data
    +library(vip)         # for variable importance plots
    +
    +

    +
    +
    +

    The Hotel Bookings Data

    +

    Let’s use hotel bookings data from Antonio, Almeida, and Nunes (2019) to predict which hotel stays included children and/or babies, based on the other characteristics of the stays such as which hotel the guests stay at, how much they pay, etc. This was also a #TidyTuesday dataset with a data dictionary you may want to look over to learn more about the variables. We’ll use a slightly edited version of the dataset for this case study.

    +

    To start, let’s read our hotel data into R, which we’ll do by providing readr::read_csv() with a url where our CSV data is located (“https://tidymodels.org/start/case-study/hotels.csv”):

    +
    +
    library(tidymodels)
    +library(readr)
    +
    +hotels <- 
    +  read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>%
    +  mutate(across(where(is.character), as.factor))
    +
    +dim(hotels)
    +#> [1] 50000    23
    +
    +

    In the original paper, the authors caution that the distribution of many variables (such as number of adults/children, room type, meals bought, country of origin of the guests, and so forth) is different for hotel stays that were canceled versus not canceled. This makes sense because much of that information is gathered (or gathered again more accurately) when guests check in for their stay, so canceled bookings are likely to have more missing data than non-canceled bookings, and/or to have different characteristics when data is not missing. Given this, it is unlikely that we can reliably detect meaningful differences between guests who cancel their bookings and those who do not with this dataset. To build our models here, we have already filtered the data to include only the bookings that did not cancel, so we’ll be analyzing hotel stays only.

    +
    +
    glimpse(hotels)
    +#> Rows: 50,000
    +#> Columns: 23
    +#> $ hotel                          <fct> City_Hotel, City_Hotel, Resort_Hotel, R…
    +#> $ lead_time                      <dbl> 217, 2, 95, 143, 136, 67, 47, 56, 80, 6…
    +#> $ stays_in_weekend_nights        <dbl> 1, 0, 2, 2, 1, 2, 0, 0, 0, 2, 1, 0, 1, …
    +#> $ stays_in_week_nights           <dbl> 3, 1, 5, 6, 4, 2, 2, 3, 4, 2, 2, 1, 2, …
    +#> $ adults                         <dbl> 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 1, 2, …
    +#> $ children                       <fct> none, none, none, none, none, none, chi…
    +#> $ meal                           <fct> BB, BB, BB, HB, HB, SC, BB, BB, BB, BB,…
    +#> $ country                        <fct> DEU, PRT, GBR, ROU, PRT, GBR, ESP, ESP,…
    +#> $ market_segment                 <fct> Offline_TA/TO, Direct, Online_TA, Onlin…
    +#> $ distribution_channel           <fct> TA/TO, Direct, TA/TO, TA/TO, Direct, TA…
    +#> $ is_repeated_guest              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
    +#> $ previous_cancellations         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
    +#> $ previous_bookings_not_canceled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
    +#> $ reserved_room_type             <fct> A, D, A, A, F, A, C, B, D, A, A, D, A, …
    +#> $ assigned_room_type             <fct> A, K, A, A, F, A, C, A, D, A, D, D, A, …
    +#> $ booking_changes                <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
    +#> $ deposit_type                   <fct> No_Deposit, No_Deposit, No_Deposit, No_…
    +#> $ days_in_waiting_list           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
    +#> $ customer_type                  <fct> Transient-Party, Transient, Transient, …
    +#> $ average_daily_rate             <dbl> 80.75, 170.00, 8.00, 81.00, 157.60, 49.…
    +#> $ required_car_parking_spaces    <fct> none, none, none, none, none, none, non…
    +#> $ total_of_special_requests      <dbl> 1, 3, 2, 1, 4, 1, 1, 1, 1, 1, 0, 1, 0, …
    +#> $ arrival_date                   <date> 2016-09-01, 2017-08-25, 2016-11-19, 20…
    +
    +

    We will build a model to predict which actual hotel stays included children and/or babies, and which did not. Our outcome variable children is a factor variable with two levels:

    +
    +
    hotels %>% 
    +  count(children) %>% 
    +  mutate(prop = n/sum(n))
    +#> # A tibble: 2 × 3
    +#>   children     n   prop
    +#>   <fct>    <int>  <dbl>
    +#> 1 children  4038 0.0808
    +#> 2 none     45962 0.919
    +
    +

    We can see that children were only in 8.1% of the reservations. This type of class imbalance can often wreak havoc on an analysis. While there are several methods for combating this issue using recipes (search for steps to upsample or downsample) or other more specialized packages like themis, the analyses shown below analyze the data as-is.

    +
    +
    +

    Data Splitting & Resampling

    +

    For a data splitting strategy, let’s reserve 25% of the stays to the test set. As in our Evaluate your model with resampling article, we know our outcome variable children is pretty imbalanced so we’ll use a stratified random sample:

    +
    +
    set.seed(123)
    +splits      <- initial_split(hotels, strata = children)
    +
    +hotel_other <- training(splits)
    +hotel_test  <- testing(splits)
    +
    +# training set proportions by children
    +hotel_other %>% 
    +  count(children) %>% 
    +  mutate(prop = n/sum(n))
    +#> # A tibble: 2 × 3
    +#>   children     n   prop
    +#>   <fct>    <int>  <dbl>
    +#> 1 children  3027 0.0807
    +#> 2 none     34473 0.919
    +
    +# test set proportions by children
    +hotel_test  %>% 
    +  count(children) %>% 
    +  mutate(prop = n/sum(n))
    +#> # A tibble: 2 × 3
    +#>   children     n   prop
    +#>   <fct>    <int>  <dbl>
    +#> 1 children  1011 0.0809
    +#> 2 none     11489 0.919
    +
    +

    In our articles so far, we’ve relied on 10-fold cross-validation as the primary resampling method using rsample::vfold_cv(). This has created 10 different resamples of the training set (which we further split into analysis and assessment sets), producing 10 different performance metrics that we then aggregated.

    +

    For this case study, rather than using multiple iterations of resampling, let’s create a single resample called a validation set. In tidymodels, a validation set is treated as a single iteration of resampling. This will be a split from the 37,500 stays that were not used for testing, which we called hotel_other. This split creates two new datasets:

    +
      +
    • the set held out for the purpose of measuring performance, called the validation set, and

    • +
    • the remaining data used to fit the model, called the training set.

    • +
    +
    +
    +
    +
    +

    +
    +
    +
    +
    +

    We’ll use the validation_split() function to allocate 20% of the hotel_other stays to the validation set and 30,000 stays to the training set. This means that our model performance metrics will be computed on a single set of 7,500 hotel stays. This is fairly large, so the amount of data should provide enough precision to be a reliable indicator for how well each model predicts the outcome with a single iteration of resampling.

    +
    +
    set.seed(234)
    +val_set <- validation_split(hotel_other, 
    +                            strata = children, 
    +                            prop = 0.80)
    +val_set
    +#> # Validation Set Split (0.8/0.2)  using stratification 
    +#> # A tibble: 1 × 2
    +#>   splits               id        
    +#>   <list>               <chr>     
    +#> 1 <split [30000/7500]> validation
    +
    +

    This function, like initial_split(), has the same strata argument, which uses stratified sampling to create the resample. This means that we’ll have roughly the same proportions of hotel stays with and without children in our new validation and training sets, as compared to the original hotel_other proportions.

    +
    +
    +

    A first model: penalized logistic regression

    +

    Since our outcome variable children is categorical, logistic regression would be a good first model to start. Let’s use a model that can perform feature selection during training. The glmnet R package fits a generalized linear model via penalized maximum likelihood. This method of estimating the logistic regression slope parameters uses a penalty on the process so that less relevant predictors are driven towards a value of zero. One of the glmnet penalization methods, called the lasso method, can actually set the predictor slopes to zero if a large enough penalty is used.

    +
    +

    Build the model

    +

    To specify a penalized logistic regression model that uses a feature selection penalty, let’s use the parsnip package with the glmnet engine:

    +
    +
    lr_mod <- 
    +  logistic_reg(penalty = tune(), mixture = 1) %>% 
    +  set_engine("glmnet")
    +
    +

    We’ll set the penalty argument to tune() as a placeholder for now. This is a model hyperparameter that we will tune to find the best value for making predictions with our data. Setting mixture to a value of one means that the glmnet model will potentially remove irrelevant predictors and choose a simpler model.

    +
    +
    +

    Create the recipe

    +

    Let’s create a recipe to define the preprocessing steps we need to prepare our hotel stays data for this model. It might make sense to create a set of date-based predictors that reflect important components related to the arrival date. We have already introduced a number of useful recipe steps for creating features from dates:

    +
      +
    • step_date() creates predictors for the year, month, and day of the week.

    • +
    • step_holiday() generates a set of indicator variables for specific holidays. Although we don’t know where these two hotels are located, we do know that the countries for origin for most stays are based in Europe.

    • +
    • step_rm() removes variables; here we’ll use it to remove the original date variable since we no longer want it in the model.

    • +
    +

    Additionally, all categorical predictors (e.g., distribution_channel, hotel, …) should be converted to dummy variables, and all numeric predictors need to be centered and scaled.

    +
      +
    • step_dummy() converts characters or factors (i.e., nominal variables) into one or more numeric binary model terms for the levels of the original data.

    • +
    • step_zv() removes indicator variables that only contain a single unique value (e.g. all zeros). This is important because, for penalized models, the predictors should be centered and scaled.

    • +
    • step_normalize() centers and scales numeric variables.

    • +
    +

    Putting all these steps together into a recipe for a penalized logistic regression model, we have:

    +
    +
    holidays <- c("AllSouls", "AshWednesday", "ChristmasEve", "Easter", 
    +              "ChristmasDay", "GoodFriday", "NewYearsDay", "PalmSunday")
    +
    +lr_recipe <- 
    +  recipe(children ~ ., data = hotel_other) %>% 
    +  step_date(arrival_date) %>% 
    +  step_holiday(arrival_date, holidays = holidays) %>% 
    +  step_rm(arrival_date) %>% 
    +  step_dummy(all_nominal_predictors()) %>% 
    +  step_zv(all_predictors()) %>% 
    +  step_normalize(all_predictors())
    +
    +
    +
    +

    Create the workflow

    +

    As we introduced in Preprocess your data with recipes, let’s bundle the model and recipe into a single workflow() object to make management of the R objects easier:

    +
    +
    lr_workflow <- 
    +  workflow() %>% 
    +  add_model(lr_mod) %>% 
    +  add_recipe(lr_recipe)
    +
    +
    +
    +

    Create the grid for tuning

    +

    Before we fit this model, we need to set up a grid of penalty values to tune. In our Tune model parameters article, we used dials::grid_regular() to create an expanded grid based on a combination of two hyperparameters. Since we have only one hyperparameter to tune here, we can set the grid up manually using a one-column tibble with 30 candidate values:

    +
    +
    lr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30))
    +
    +lr_reg_grid %>% top_n(-5) # lowest penalty values
    +#> Selecting by penalty
    +#> # A tibble: 5 × 1
    +#>    penalty
    +#>      <dbl>
    +#> 1 0.0001  
    +#> 2 0.000127
    +#> 3 0.000161
    +#> 4 0.000204
    +#> 5 0.000259
    +lr_reg_grid %>% top_n(5)  # highest penalty values
    +#> Selecting by penalty
    +#> # A tibble: 5 × 1
    +#>   penalty
    +#>     <dbl>
    +#> 1  0.0386
    +#> 2  0.0489
    +#> 3  0.0621
    +#> 4  0.0788
    +#> 5  0.1
    +
    +
    +
    +

    Train and tune the model

    +

    Let’s use tune::tune_grid() to train these 30 penalized logistic regression models. We’ll also save the validation set predictions (via the call to control_grid()) so that diagnostic information can be available after the model fit. The area under the ROC curve will be used to quantify how well the model performs across a continuum of event thresholds (recall that the event rate—the proportion of stays including children— is very low for these data).

    +
    +
    lr_res <- 
    +  lr_workflow %>% 
    +  tune_grid(val_set,
    +            grid = lr_reg_grid,
    +            control = control_grid(save_pred = TRUE),
    +            metrics = metric_set(roc_auc))
    +
    +

    It might be easier to visualize the validation set metrics by plotting the area under the ROC curve against the range of penalty values:

    +
    +
    lr_plot <- 
    +  lr_res %>% 
    +  collect_metrics() %>% 
    +  ggplot(aes(x = penalty, y = mean)) + 
    +  geom_point() + 
    +  geom_line() + 
    +  ylab("Area under the ROC Curve") +
    +  scale_x_log10(labels = scales::label_number())
    +
    +lr_plot 
    +
    +
    +
    +

    +
    +
    +
    +
    +

    This plots shows us that model performance is generally better at the smaller penalty values. This suggests that the majority of the predictors are important to the model. We also see a steep drop in the area under the ROC curve towards the highest penalty values. This happens because a large enough penalty will remove all predictors from the model, and not surprisingly predictive accuracy plummets with no predictors in the model (recall that an ROC AUC value of 0.50 means that the model does no better than chance at predicting the correct class).

    +

    Our model performance seems to plateau at the smaller penalty values, so going by the roc_auc metric alone could lead us to multiple options for the “best” value for this hyperparameter:

    +
    +
    top_models <-
    +  lr_res %>% 
    +  show_best("roc_auc", n = 15) %>% 
    +  arrange(penalty) 
    +top_models
    +#> # A tibble: 15 × 7
    +#>     penalty .metric .estimator  mean     n std_err .config              
    +#>       <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
    +#>  1 0.000127 roc_auc binary     0.872     1      NA Preprocessor1_Model02
    +#>  2 0.000161 roc_auc binary     0.872     1      NA Preprocessor1_Model03
    +#>  3 0.000204 roc_auc binary     0.873     1      NA Preprocessor1_Model04
    +#>  4 0.000259 roc_auc binary     0.873     1      NA Preprocessor1_Model05
    +#>  5 0.000329 roc_auc binary     0.874     1      NA Preprocessor1_Model06
    +#>  6 0.000418 roc_auc binary     0.874     1      NA Preprocessor1_Model07
    +#>  7 0.000530 roc_auc binary     0.875     1      NA Preprocessor1_Model08
    +#>  8 0.000672 roc_auc binary     0.875     1      NA Preprocessor1_Model09
    +#>  9 0.000853 roc_auc binary     0.876     1      NA Preprocessor1_Model10
    +#> 10 0.00108  roc_auc binary     0.876     1      NA Preprocessor1_Model11
    +#> 11 0.00137  roc_auc binary     0.876     1      NA Preprocessor1_Model12
    +#> 12 0.00174  roc_auc binary     0.876     1      NA Preprocessor1_Model13
    +#> 13 0.00221  roc_auc binary     0.876     1      NA Preprocessor1_Model14
    +#> 14 0.00281  roc_auc binary     0.875     1      NA Preprocessor1_Model15
    +#> 15 0.00356  roc_auc binary     0.873     1      NA Preprocessor1_Model16
    +
    +

    Every candidate model in this tibble likely includes more predictor variables than the model in the row below it. If we used select_best(), it would return candidate model 11 with a penalty value of 0.00137, shown with the dotted line below.

    +
    +
    +
    +
    +

    +
    +
    +
    +
    +

    However, we may want to choose a penalty value further along the x-axis, closer to where we start to see the decline in model performance. For example, candidate model 12 with a penalty value of 0.00174 has effectively the same performance as the numerically best model, but might eliminate more predictors. This penalty value is marked by the solid line above. In general, fewer irrelevant predictors is better. If performance is about the same, we’d prefer to choose a higher penalty value.

    +

    Let’s select this value and visualize the validation set ROC curve:

    +
    +
    lr_best <- 
    +  lr_res %>% 
    +  collect_metrics() %>% 
    +  arrange(penalty) %>% 
    +  slice(12)
    +lr_best
    +#> # A tibble: 1 × 7
    +#>   penalty .metric .estimator  mean     n std_err .config              
    +#>     <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
    +#> 1 0.00137 roc_auc binary     0.876     1      NA Preprocessor1_Model12
    +
    +
    +
    lr_auc <- 
    +  lr_res %>% 
    +  collect_predictions(parameters = lr_best) %>% 
    +  roc_curve(children, .pred_children) %>% 
    +  mutate(model = "Logistic Regression")
    +
    +autoplot(lr_auc)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    The level of performance generated by this logistic regression model is good, but not groundbreaking. Perhaps the linear nature of the prediction equation is too limiting for this data set. As a next step, we might consider a highly non-linear model generated using a tree-based ensemble method.

    +
    +
    +
    +

    A second model: tree-based ensemble

    +

    An effective and low-maintenance modeling technique is a random forest. This model was also used in our Evaluate your model with resampling article. Compared to logistic regression, a random forest model is more flexible. A random forest is an ensemble model typically made up of thousands of decision trees, where each individual tree sees a slightly different version of the training data and learns a sequence of splitting rules to predict new data. Each tree is non-linear, and aggregating across trees makes random forests also non-linear but more robust and stable compared to individual trees. Tree-based models like random forests require very little preprocessing and can effectively handle many types of predictors (sparse, skewed, continuous, categorical, etc.).

    +
    +

    Build the model and improve training time

    +

    Although the default hyperparameters for random forests tend to give reasonable results, we’ll plan to tune two hyperparameters that we think could improve performance. Unfortunately, random forest models can be computationally expensive to train and to tune. The computations required for model tuning can usually be easily parallelized to improve training time. The tune package can do parallel processing for you, and allows users to use multiple cores or separate machines to fit models.

    +

    But, here we are using a single validation set, so parallelization isn’t an option using the tune package. For this specific case study, a good alternative is provided by the engine itself. The ranger package offers a built-in way to compute individual random forest models in parallel. To do this, we need to know the the number of cores we have to work with. We can use the parallel package to query the number of cores on your own computer to understand how much parallelization you can do:

    +
    +
    cores <- parallel::detectCores()
    +cores
    +#> [1] 10
    +
    +

    We have 10 cores to work with. We can pass this information to the ranger engine when we set up our parsnip rand_forest() model. To enable parallel processing, we can pass engine-specific arguments like num.threads to ranger when we set the engine:

    +
    +
    rf_mod <- 
    +  rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
    +  set_engine("ranger", num.threads = cores) %>% 
    +  set_mode("classification")
    +
    +

    This works well in this modeling context, but it bears repeating: if you use any other resampling method, let tune do the parallel processing for you — we typically do not recommend relying on the modeling engine (like we did here) to do this.

    +

    In this model, we used tune() as a placeholder for the mtry and min_n argument values, because these are our two hyperparameters that we will tune.

    +
    +
    +

    Create the recipe and workflow

    +

    Unlike penalized logistic regression models, random forest models do not require dummy or normalized predictor variables. Nevertheless, we want to do some feature engineering again with our arrival_date variable. As before, the date predictor is engineered so that the random forest model does not need to work hard to tease these potential patterns from the data.

    +
    +
    rf_recipe <- 
    +  recipe(children ~ ., data = hotel_other) %>% 
    +  step_date(arrival_date) %>% 
    +  step_holiday(arrival_date) %>% 
    +  step_rm(arrival_date) 
    +
    +

    Adding this recipe to our parsnip model gives us a new workflow for predicting whether a hotel stay included children and/or babies as guests with a random forest:

    +
    +
    rf_workflow <- 
    +  workflow() %>% 
    +  add_model(rf_mod) %>% 
    +  add_recipe(rf_recipe)
    +
    +
    +
    +

    Train and tune the model

    +

    When we set up our parsnip model, we chose two hyperparameters for tuning:

    +
    +
    rf_mod
    +#> Random Forest Model Specification (classification)
    +#> 
    +#> Main Arguments:
    +#>   mtry = tune()
    +#>   trees = 1000
    +#>   min_n = tune()
    +#> 
    +#> Engine-Specific Arguments:
    +#>   num.threads = cores
    +#> 
    +#> Computational engine: ranger
    +
    +# show what will be tuned
    +extract_parameter_set_dials(rf_mod)
    +#> Collection of 2 parameters for tuning
    +#> 
    +#>  identifier  type    object
    +#>        mtry  mtry nparam[?]
    +#>       min_n min_n nparam[+]
    +#> 
    +#> Model parameters needing finalization:
    +#>    # Randomly Selected Predictors ('mtry')
    +#> 
    +#> See `?dials::finalize` or `?dials::update.parameters` for more information.
    +
    +

    The mtry hyperparameter sets the number of predictor variables that each node in the decision tree “sees” and can learn about, so it can range from 1 to the total number of features present; when mtry = all possible features, the model is the same as bagging decision trees. The min_n hyperparameter sets the minimum n to split at any node.

    +

    We will use a space-filling design to tune, with 25 candidate models:

    +
    +
    set.seed(345)
    +rf_res <- 
    +  rf_workflow %>% 
    +  tune_grid(val_set,
    +            grid = 25,
    +            control = control_grid(save_pred = TRUE),
    +            metrics = metric_set(roc_auc))
    +#> i Creating pre-processing data to finalize unknown parameter: mtry
    +
    +

    The message printed above “Creating pre-processing data to finalize unknown parameter: mtry” is related to the size of the data set. Since mtry depends on the number of predictors in the data set, tune_grid() determines the upper bound for mtry once it receives the data.

    +

    Here are our top 5 random forest models, out of the 25 candidates:

    +
    +
    rf_res %>% 
    +  show_best(metric = "roc_auc")
    +#> # A tibble: 5 × 8
    +#>    mtry min_n .metric .estimator  mean     n std_err .config              
    +#>   <int> <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
    +#> 1     8     7 roc_auc binary     0.926     1      NA Preprocessor1_Model13
    +#> 2    12     7 roc_auc binary     0.926     1      NA Preprocessor1_Model01
    +#> 3    13     4 roc_auc binary     0.925     1      NA Preprocessor1_Model05
    +#> 4     9    12 roc_auc binary     0.924     1      NA Preprocessor1_Model19
    +#> 5     6    18 roc_auc binary     0.924     1      NA Preprocessor1_Model24
    +
    +

    Right away, we see that these values for area under the ROC look more promising than our top model using penalized logistic regression, which yielded an ROC AUC of 0.876.

    +

    Plotting the results of the tuning process highlights that both mtry (number of predictors at each node) and min_n (minimum number of data points required to keep splitting) should be fairly small to optimize performance. However, the range of the y-axis indicates that the model is very robust to the choice of these parameter values — all but one of the ROC AUC values are greater than 0.90.

    +
    +
    autoplot(rf_res)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Let’s select the best model according to the ROC AUC metric. Our final tuning parameter values are:

    +
    +
    rf_best <- 
    +  rf_res %>% 
    +  select_best(metric = "roc_auc")
    +rf_best
    +#> # A tibble: 1 × 3
    +#>    mtry min_n .config              
    +#>   <int> <int> <chr>                
    +#> 1     8     7 Preprocessor1_Model13
    +
    +

    To calculate the data needed to plot the ROC curve, we use collect_predictions(). This is only possible after tuning with control_grid(save_pred = TRUE). In the output, you can see the two columns that hold our class probabilities for predicting hotel stays including and not including children.

    +
    +
    rf_res %>% 
    +  collect_predictions()
    +#> # A tibble: 187,500 × 8
    +#>    id         .pred_children .pred_none  .row  mtry min_n children .config      
    +#>    <chr>               <dbl>      <dbl> <int> <int> <int> <fct>    <chr>        
    +#>  1 validation        0.152        0.848    13    12     7 none     Preprocessor…
    +#>  2 validation        0.0302       0.970    20    12     7 none     Preprocessor…
    +#>  3 validation        0.513        0.487    22    12     7 children Preprocessor…
    +#>  4 validation        0.0103       0.990    23    12     7 none     Preprocessor…
    +#>  5 validation        0.0111       0.989    31    12     7 none     Preprocessor…
    +#>  6 validation        0            1        38    12     7 none     Preprocessor…
    +#>  7 validation        0            1        39    12     7 none     Preprocessor…
    +#>  8 validation        0.00325      0.997    50    12     7 none     Preprocessor…
    +#>  9 validation        0.0241       0.976    54    12     7 none     Preprocessor…
    +#> 10 validation        0.0441       0.956    57    12     7 children Preprocessor…
    +#> # ℹ 187,490 more rows
    +
    +

    To filter the predictions for only our best random forest model, we can use the parameters argument and pass it our tibble with the best hyperparameter values from tuning, which we called rf_best:

    +
    +
    rf_auc <- 
    +  rf_res %>% 
    +  collect_predictions(parameters = rf_best) %>% 
    +  roc_curve(children, .pred_children) %>% 
    +  mutate(model = "Random Forest")
    +
    +

    Now, we can compare the validation set ROC curves for our top penalized logistic regression model and random forest model:

    +
    +
    bind_rows(rf_auc, lr_auc) %>% 
    +  ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) + 
    +  geom_path(lwd = 1.5, alpha = 0.8) +
    +  geom_abline(lty = 3) + 
    +  coord_equal() + 
    +  scale_color_viridis_d(option = "plasma", end = .6)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    The random forest is uniformly better across event probability thresholds.

    +
    +
    +
    +

    The last fit

    +

    Our goal was to predict which hotel stays included children and/or babies. The random forest model clearly performed better than the penalized logistic regression model, and would be our best bet for predicting hotel stays with and without children. After selecting our best model and hyperparameter values, our last step is to fit the final model on all the rows of data not originally held out for testing (both the training and the validation sets combined), and then evaluate the model performance one last time with the held-out test set.

    +

    We’ll start by building our parsnip model object again from scratch. We take our best hyperparameter values from our random forest model. When we set the engine, we add a new argument: importance = "impurity". This will provide variable importance scores for this last model, which gives some insight into which predictors drive model performance.

    +
    +
    # the last model
    +last_rf_mod <- 
    +  rand_forest(mtry = 8, min_n = 7, trees = 1000) %>% 
    +  set_engine("ranger", num.threads = cores, importance = "impurity") %>% 
    +  set_mode("classification")
    +
    +# the last workflow
    +last_rf_workflow <- 
    +  rf_workflow %>% 
    +  update_model(last_rf_mod)
    +
    +# the last fit
    +set.seed(345)
    +last_rf_fit <- 
    +  last_rf_workflow %>% 
    +  last_fit(splits)
    +
    +last_rf_fit
    +#> # Resampling results
    +#> # Manual resampling 
    +#> # A tibble: 1 × 6
    +#>   splits                id             .metrics .notes   .predictions .workflow 
    +#>   <list>                <chr>          <list>   <list>   <list>       <list>    
    +#> 1 <split [37500/12500]> train/test sp… <tibble> <tibble> <tibble>     <workflow>
    +
    +

    This fitted workflow contains everything, including our final metrics based on the test set. So, how did this model do on the test set? Was the validation set a good estimate of future performance?

    +
    +
    last_rf_fit %>% 
    +  collect_metrics()
    +#> # A tibble: 2 × 4
    +#>   .metric  .estimator .estimate .config             
    +#>   <chr>    <chr>          <dbl> <chr>               
    +#> 1 accuracy binary         0.946 Preprocessor1_Model1
    +#> 2 roc_auc  binary         0.923 Preprocessor1_Model1
    +
    +

    This ROC AUC value is pretty close to what we saw when we tuned the random forest model with the validation set, which is good news. That means that our estimate of how well our model would perform with new data was not too far off from how well our model actually performed with the unseen test data.

    +

    We can access those variable importance scores via the .workflow column. We can extract out the fit from the workflow object, and then use the vip package to visualize the variable importance scores for the top 20 features:

    +
    +
    last_rf_fit %>% 
    +  extract_fit_parsnip() %>% 
    +  vip(num_features = 20)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    The most important predictors in whether a hotel stay had children or not were the daily cost for the room, the type of room reserved, the time between the creation of the reservation and the arrival date, and the type of room that was ultimately assigned.

    +

    Let’s generate our last ROC curve to visualize. Since the event we are predicting is the first level in the children factor (“children”), we provide roc_curve() with the relevant class probability .pred_children:

    +
    +
    last_rf_fit %>% 
    +  collect_predictions() %>% 
    +  roc_curve(children, .pred_children) %>% 
    +  autoplot()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Based on these results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data.

    +
    +
    +

    Where to next?

    +

    If you’ve made it to the end of this series of Get Started articles, we hope you feel ready to learn more! You now know the core tidymodels packages and how they fit together. After you are comfortable with the basics we introduced in this series, you can learn how to go farther with tidymodels in your modeling and machine learning projects.

    +

    Here are some more ideas for where to go next:

    + +

    ###

    +
    +Happy modeling! +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  glmnet     * 4.1-7   2023-03-23 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  ranger     * 0.15.1  2023-04-03 [1] CRAN (R 4.3.0)
    +#>  readr      * 2.1.4   2023-02-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  vip        * 0.3.2   2020-12-17 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/start/index.html b/docs/start/index.html new file mode 100644 index 00000000..557ce669 --- /dev/null +++ b/docs/start/index.html @@ -0,0 +1,468 @@ + + + + + + + + + + +tidymodels - Welcome! + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Welcome!

    +
    + +
    +
    + What do you need to know to start using tidymodels? Learn what you need in 5 articles. +
    +
    + + +
    + + + + +
    + + +
    + +

    Here, learn what you need to get started with tidymodels in five articles, starting with how to create a model and ending with a beginning-to-end modeling case study. After you are comfortable with these basics, you can learn how to go farther with tidymodels.

    +
    +

    If you are new to R or the tidyverse

    +

    To get the most out of tidymodels, we recommend that you start by learning some basics about R and the tidyverse first, then return here when you feel ready. Here are some resources to start learning:

    + + + +
    + +
    + +
    + + + + + \ No newline at end of file diff --git a/content/start/models/figs/dwplot-1.svg b/docs/start/models/figs/dwplot-1.svg similarity index 100% rename from content/start/models/figs/dwplot-1.svg rename to docs/start/models/figs/dwplot-1.svg diff --git a/content/start/models/figs/lm-all-pred-1.svg b/docs/start/models/figs/lm-all-pred-1.svg similarity index 100% rename from content/start/models/figs/lm-all-pred-1.svg rename to docs/start/models/figs/lm-all-pred-1.svg diff --git a/docs/start/models/figs/stan-pred-1.svg b/docs/start/models/figs/stan-pred-1.svg new file mode 100644 index 00000000..4e7a314f --- /dev/null +++ b/docs/start/models/figs/stan-pred-1.svg @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.05 +0.06 +0.07 +0.08 +0.09 +0.10 + + + + + + + + + +Initial +Low +High +food_regime +urchin size +Bayesian model with t(1) prior distribution + + diff --git a/content/start/models/figs/urchin-plot-1.svg b/docs/start/models/figs/urchin-plot-1.svg similarity index 100% rename from content/start/models/figs/urchin-plot-1.svg rename to docs/start/models/figs/urchin-plot-1.svg diff --git a/docs/start/models/index.html b/docs/start/models/index.html new file mode 100644 index 00000000..73d09954 --- /dev/null +++ b/docs/start/models/index.html @@ -0,0 +1,899 @@ + + + + + + + + + + +tidymodels - Build a model + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Build a model

    +
    +
    model fitting
    +
    parsnip
    +
    broom
    +
    +
    + +
    +
    +

    Get started by learning how to specify and train a model using tidymodels.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    How do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the parsnip package, and understand why these functions are designed this way.

    +

    To use code in this article, you will need to install the following packages: broom.mixed, dotwhisker, readr, rstanarm, and tidymodels.

    +
    +
    library(tidymodels)  # for the parsnip package, along with the rest of tidymodels
    +
    +# Helper packages
    +library(readr)       # for importing data
    +library(broom.mixed) # for converting bayesian models to tidy tibbles
    +library(dotwhisker)  # for visualizing regression results
    +
    +

    +
    +
    +

    The Sea Urchins Data

    +

    Let’s use the data from Constable (1993) to explore how three different feeding regimes affect the size of sea urchins over time. The initial size of the sea urchins at the beginning of the experiment probably affects how big they grow as they are fed.

    +

    To start, let’s read our urchins data into R, which we’ll do by providing readr::read_csv() with a url where our CSV data is located (“https://tidymodels.org/start/models/urchins.csv”):

    +
    +
    urchins <-
    +  # Data were assembled for a tutorial 
    +  # at https://www.flutterbys.com.au/stats/tut/tut7.5a.html
    +  read_csv("https://tidymodels.org/start/models/urchins.csv") %>% 
    +  # Change the names to be a little more verbose
    +  setNames(c("food_regime", "initial_volume", "width")) %>% 
    +  # Factors are very helpful for modeling, so we convert one column
    +  mutate(food_regime = factor(food_regime, levels = c("Initial", "Low", "High")))
    +#> Rows: 72 Columns: 3
    +#> ── Column specification ──────────────────────────────────────────────
    +#> Delimiter: ","
    +#> chr (1): TREAT
    +#> dbl (2): IV, SUTW
    +#> 
    +#> ℹ Use `spec()` to retrieve the full column specification for this data.
    +#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
    +
    +

    Let’s take a quick look at the data:

    +
    +
    urchins
    +#> # A tibble: 72 × 3
    +#>    food_regime initial_volume width
    +#>    <fct>                <dbl> <dbl>
    +#>  1 Initial                3.5 0.01 
    +#>  2 Initial                5   0.02 
    +#>  3 Initial                8   0.061
    +#>  4 Initial               10   0.051
    +#>  5 Initial               13   0.041
    +#>  6 Initial               13   0.061
    +#>  7 Initial               15   0.041
    +#>  8 Initial               15   0.071
    +#>  9 Initial               16   0.092
    +#> 10 Initial               17   0.051
    +#> # ℹ 62 more rows
    +
    +

    The urchins data is a tibble. If you are new to tibbles, the best place to start is the tibbles chapter in R for Data Science. For each of the 72 urchins, we know their:

    +
      +
    • experimental feeding regime group (food_regime: either Initial, Low, or High),
    • +
    • size in milliliters at the start of the experiment (initial_volume), and
    • +
    • suture width at the end of the experiment (width).
    • +
    +

    As a first step in modeling, it’s always a good idea to plot the data:

    +
    +
    ggplot(urchins,
    +       aes(x = initial_volume, 
    +           y = width, 
    +           group = food_regime, 
    +           col = food_regime)) + 
    +  geom_point() + 
    +  geom_smooth(method = lm, se = FALSE) +
    +  scale_color_viridis_d(option = "plasma", end = .7)
    +#> `geom_smooth()` using formula = 'y ~ x'
    +
    +
    +
    +

    +
    +
    +
    +
    +

    We can see that urchins that were larger in volume at the start of the experiment tended to have wider sutures at the end, but the slopes of the lines look different so this effect may depend on the feeding regime condition.

    +
    +
    +

    Build and fit a model

    +

    A standard two-way analysis of variance (ANOVA) model makes sense for this dataset because we have both a continuous predictor and a categorical predictor. Since the slopes appear to be different for at least two of the feeding regimes, let’s build a model that allows for two-way interactions. Specifying an R formula with our variables in this way:

    +
    +
    width ~ initial_volume * food_regime
    +
    +

    allows our regression model depending on initial volume to have separate slopes and intercepts for each food regime.

    +

    For this kind of model, ordinary least squares is a good initial approach. With tidymodels, we start by specifying the functional form of the model that we want using the parsnip package. Since there is a numeric outcome and the model should be linear with slopes and intercepts, the model type is “linear regression”. We can declare this with:

    +
    +
    linear_reg()
    +#> Linear Regression Model Specification (regression)
    +#> 
    +#> Computational engine: lm
    +
    +

    That is pretty underwhelming since, on its own, it doesn’t really do much. However, now that the type of model has been specified, we can think about a method for fitting or training the model, the model engine. The engine value is often a mash-up of the software that can be used to fit or train the model as well as the estimation method. The default for linear_reg() is "lm" for ordinary least squares, as you can see above. We could set a non-default option instead:

    +
    +
    linear_reg() %>% 
    +  set_engine("keras")
    +#> Linear Regression Model Specification (regression)
    +#> 
    +#> Computational engine: keras
    +
    +

    The documentation page for linear_reg() lists all the possible engines. We’ll save our model object using the default engine as lm_mod.

    +
    +
    lm_mod <- linear_reg()
    +
    +

    From here, the model can be estimated or trained using the fit() function:

    +
    +
    lm_fit <- 
    +  lm_mod %>% 
    +  fit(width ~ initial_volume * food_regime, data = urchins)
    +lm_fit
    +#> parsnip model object
    +#> 
    +#> 
    +#> Call:
    +#> stats::lm(formula = width ~ initial_volume * food_regime, data = data)
    +#> 
    +#> Coefficients:
    +#>                    (Intercept)                  initial_volume  
    +#>                      0.0331216                       0.0015546  
    +#>                 food_regimeLow                 food_regimeHigh  
    +#>                      0.0197824                       0.0214111  
    +#>  initial_volume:food_regimeLow  initial_volume:food_regimeHigh  
    +#>                     -0.0012594                       0.0005254
    +
    +

    Perhaps our analysis requires a description of the model parameter estimates and their statistical properties. Although the summary() function for lm objects can provide that, it gives the results back in an unwieldy format. Many models have a tidy() method that provides the summary results in a more predictable and useful format (e.g. a data frame with standard column names):

    +
    +
    tidy(lm_fit)
    +#> # A tibble: 6 × 5
    +#>   term                            estimate std.error statistic  p.value
    +#>   <chr>                              <dbl>     <dbl>     <dbl>    <dbl>
    +#> 1 (Intercept)                     0.0331    0.00962      3.44  0.00100 
    +#> 2 initial_volume                  0.00155   0.000398     3.91  0.000222
    +#> 3 food_regimeLow                  0.0198    0.0130       1.52  0.133   
    +#> 4 food_regimeHigh                 0.0214    0.0145       1.47  0.145   
    +#> 5 initial_volume:food_regimeLow  -0.00126   0.000510    -2.47  0.0162  
    +#> 6 initial_volume:food_regimeHigh  0.000525  0.000702     0.748 0.457
    +
    +

    This kind of output can be used to generate a dot-and-whisker plot of our regression results using the dotwhisker package:

    +
    +
    tidy(lm_fit) %>% 
    +  dwplot(dot_args = list(size = 2, color = "black"),
    +         whisker_args = list(color = "black"),
    +         vline = geom_vline(xintercept = 0, colour = "grey50", linetype = 2))
    +
    +
    +
    +

    +
    +
    +
    +
    +
    +
    +

    Use a model to predict

    +

    This fitted object lm_fit has the lm model output built-in, which you can access with lm_fit$fit, but there are some benefits to using the fitted parsnip model object when it comes to predicting.

    +

    Suppose that, for a publication, it would be particularly interesting to make a plot of the mean body size for urchins that started the experiment with an initial volume of 20ml. To create such a graph, we start with some new example data that we will make predictions for, to show in our graph:

    +
    +
    new_points <- expand.grid(initial_volume = 20, 
    +                          food_regime = c("Initial", "Low", "High"))
    +new_points
    +#>   initial_volume food_regime
    +#> 1             20     Initial
    +#> 2             20         Low
    +#> 3             20        High
    +
    +

    To get our predicted results, we can use the predict() function to find the mean values at 20ml.

    +

    It is also important to communicate the variability, so we also need to find the predicted confidence intervals. If we had used lm() to fit the model directly, a few minutes of reading the documentation page for predict.lm() would explain how to do this. However, if we decide to use a different model to estimate urchin size (spoiler: we will!), it is likely that a completely different syntax would be required.

    +

    Instead, with tidymodels, the types of predicted values are standardized so that we can use the same syntax to get these values.

    +

    First, let’s generate the mean body width values:

    +
    +
    mean_pred <- predict(lm_fit, new_data = new_points)
    +mean_pred
    +#> # A tibble: 3 × 1
    +#>    .pred
    +#>    <dbl>
    +#> 1 0.0642
    +#> 2 0.0588
    +#> 3 0.0961
    +
    +

    When making predictions, the tidymodels convention is to always produce a tibble of results with standardized column names. This makes it easy to combine the original data and the predictions in a usable format:

    +
    +
    conf_int_pred <- predict(lm_fit, 
    +                         new_data = new_points, 
    +                         type = "conf_int")
    +conf_int_pred
    +#> # A tibble: 3 × 2
    +#>   .pred_lower .pred_upper
    +#>         <dbl>       <dbl>
    +#> 1      0.0555      0.0729
    +#> 2      0.0499      0.0678
    +#> 3      0.0870      0.105
    +
    +# Now combine: 
    +plot_data <- 
    +  new_points %>% 
    +  bind_cols(mean_pred) %>% 
    +  bind_cols(conf_int_pred)
    +
    +# and plot:
    +ggplot(plot_data, aes(x = food_regime)) + 
    +  geom_point(aes(y = .pred)) + 
    +  geom_errorbar(aes(ymin = .pred_lower, 
    +                    ymax = .pred_upper),
    +                width = .2) + 
    +  labs(y = "urchin size")
    +
    +
    +
    +

    +
    +
    +
    +
    +
    +
    +

    Model with a different engine

    +

    Every one on your team is happy with that plot except that one person who just read their first book on Bayesian analysis. They are interested in knowing if the results would be different if the model were estimated using a Bayesian approach. In such an analysis, a prior distribution needs to be declared for each model parameter that represents the possible values of the parameters (before being exposed to the observed data). After some discussion, the group agrees that the priors should be bell-shaped but, since no one has any idea what the range of values should be, to take a conservative approach and make the priors wide using a Cauchy distribution (which is the same as a t-distribution with a single degree of freedom).

    +

    The documentation on the rstanarm package shows us that the stan_glm() function can be used to estimate this model, and that the function arguments that need to be specified are called prior and prior_intercept. It turns out that linear_reg() has a stan engine. Since these prior distribution arguments are specific to the Stan software, they are passed as arguments to parsnip::set_engine(). After that, the same exact fit() call is used:

    +
    +
    # set the prior distribution
    +prior_dist <- rstanarm::student_t(df = 1)
    +
    +set.seed(123)
    +
    +# make the parsnip model
    +bayes_mod <-   
    +  linear_reg() %>% 
    +  set_engine("stan", 
    +             prior_intercept = prior_dist, 
    +             prior = prior_dist) 
    +
    +# train the model
    +bayes_fit <- 
    +  bayes_mod %>% 
    +  fit(width ~ initial_volume * food_regime, data = urchins)
    +
    +print(bayes_fit, digits = 5)
    +#> parsnip model object
    +#> 
    +#> stan_glm
    +#>  family:       gaussian [identity]
    +#>  formula:      width ~ initial_volume * food_regime
    +#>  observations: 72
    +#>  predictors:   6
    +#> ------
    +#>                                Median   MAD_SD  
    +#> (Intercept)                     0.03336  0.01003
    +#> initial_volume                  0.00156  0.00040
    +#> food_regimeLow                  0.01963  0.01308
    +#> food_regimeHigh                 0.02120  0.01421
    +#> initial_volume:food_regimeLow  -0.00126  0.00051
    +#> initial_volume:food_regimeHigh  0.00054  0.00070
    +#> 
    +#> Auxiliary parameter(s):
    +#>       Median  MAD_SD 
    +#> sigma 0.02129 0.00188
    +#> 
    +#> ------
    +#> * For help interpreting the printed output see ?print.stanreg
    +#> * For info on the priors used see ?prior_summary.stanreg
    +
    +

    This kind of Bayesian analysis (like many models) involves randomly generated numbers in its fitting procedure. We can use set.seed() to ensure that the same (pseudo-)random numbers are generated each time we run this code. The number 123 isn’t special or related to our data; it is just a “seed” used to choose random numbers.

    +

    To update the parameter table, the tidy() method is once again used:

    +
    +
    tidy(bayes_fit, conf.int = TRUE)
    +#> # A tibble: 6 × 5
    +#>   term                            estimate std.error  conf.low conf.high
    +#>   <chr>                              <dbl>     <dbl>     <dbl>     <dbl>
    +#> 1 (Intercept)                     0.0334    0.0100    0.0179    0.0493  
    +#> 2 initial_volume                  0.00156   0.000404  0.000876  0.00219 
    +#> 3 food_regimeLow                  0.0196    0.0131   -0.00271   0.0414  
    +#> 4 food_regimeHigh                 0.0212    0.0142   -0.00289   0.0455  
    +#> 5 initial_volume:food_regimeLow  -0.00126   0.000515 -0.00213  -0.000364
    +#> 6 initial_volume:food_regimeHigh  0.000541  0.000696 -0.000669  0.00174
    +
    +

    A goal of the tidymodels packages is that the interfaces to common tasks are standardized (as seen in the tidy() results above). The same is true for getting predictions; we can use the same code even though the underlying packages use very different syntax:

    +
    +
    bayes_plot_data <- 
    +  new_points %>% 
    +  bind_cols(predict(bayes_fit, new_data = new_points)) %>% 
    +  bind_cols(predict(bayes_fit, new_data = new_points, type = "conf_int"))
    +
    +ggplot(bayes_plot_data, aes(x = food_regime)) + 
    +  geom_point(aes(y = .pred)) + 
    +  geom_errorbar(aes(ymin = .pred_lower, ymax = .pred_upper), width = .2) + 
    +  labs(y = "urchin size") + 
    +  ggtitle("Bayesian model with t(1) prior distribution")
    +
    +
    +
    +

    +
    +
    +
    +
    +

    This isn’t very different from the non-Bayesian results (except in interpretation).

    +
    +
    +
    + +
    +
    +Note +
    +
    +
    +

    The parsnip package can work with many model types, engines, and arguments. Check out tidymodels.org/find/parsnip to see what is available.

    +
    +
    +
    +
    +

    Why does it work that way?

    +

    The extra step of defining the model using a function like linear_reg() might seem superfluous since a call to lm() is much more succinct. However, the problem with standard modeling functions is that they don’t separate what you want to do from the execution. For example, the process of executing a formula has to happen repeatedly across model calls even when the formula does not change; we can’t recycle those computations.

    +

    Also, using the tidymodels framework, we can do some interesting things by incrementally creating a model (instead of using single function call). Model tuning with tidymodels uses the specification of the model to declare what parts of the model should be tuned. That would be very difficult to do if linear_reg() immediately fit the model.

    +

    If you are familiar with the tidyverse, you may have noticed that our modeling code uses the magrittr pipe (%>%). With dplyr and other tidyverse packages, the pipe works well because all of the functions take the data as the first argument. For example:

    +
    +
    urchins %>% 
    +  group_by(food_regime) %>% 
    +  summarize(med_vol = median(initial_volume))
    +#> # A tibble: 3 × 2
    +#>   food_regime med_vol
    +#>   <fct>         <dbl>
    +#> 1 Initial        20.5
    +#> 2 Low            19.2
    +#> 3 High           15
    +
    +

    whereas the modeling code uses the pipe to pass around the model object:

    +
    +
    bayes_mod %>% 
    +  fit(width ~ initial_volume * food_regime, data = urchins)
    +
    +

    This may seem jarring if you have used dplyr a lot, but it is extremely similar to how ggplot2 operates:

    +
    +
    ggplot(urchins,
    +       aes(initial_volume, width)) +      # returns a ggplot object 
    +  geom_jitter() +                         # same
    +  geom_smooth(method = lm, se = FALSE) +  # same                    
    +  labs(x = "Volume", y = "Width")         # etc
    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package     * version date (UTC) lib source
    +#>  broom       * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  broom.mixed * 0.2.9.4 2022-04-17 [1] CRAN (R 4.3.0)
    +#>  dials       * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dotwhisker  * 0.7.4   2021-09-02 [1] CRAN (R 4.3.0)
    +#>  dplyr       * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2     * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer       * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip     * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr       * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  readr       * 2.1.4   2023-02-10 [1] CRAN (R 4.3.0)
    +#>  recipes     * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang         1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample     * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  rstanarm    * 2.21.4  2023-04-08 [1] CRAN (R 4.3.0)
    +#>  tibble      * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels  * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune        * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows   * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick   * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/start/recipes/figs/roc-plot-1.svg b/docs/start/recipes/figs/roc-plot-1.svg new file mode 100644 index 00000000..e1f84bc1 --- /dev/null +++ b/docs/start/recipes/figs/roc-plot-1.svg @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +1 - specificity +sensitivity + + diff --git a/docs/start/recipes/index.html b/docs/start/recipes/index.html new file mode 100644 index 00000000..1a8e2868 --- /dev/null +++ b/docs/start/recipes/index.html @@ -0,0 +1,1097 @@ + + + + + + + + + + +tidymodels - Preprocess your data with recipes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Preprocess your data with recipes

    +
    +
    pre-processing
    +
    recipes
    +
    parsnip
    +
    workflows
    +
    yardstick
    +
    broom
    +
    +
    + +
    +
    +

    Prepare data for modeling with modular preprocessing steps.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    In our Build a Model article, we learned how to specify and train models with different engines using the parsnip package. In this article, we’ll explore another tidymodels package, recipes, which is designed to help you preprocess your data before training your model. Recipes are built as a series of preprocessing steps, such as:

    +
      +
    • converting qualitative predictors to indicator variables (also known as dummy variables),

    • +
    • transforming data to be on a different scale (e.g., taking the logarithm of a variable),

    • +
    • transforming whole groups of predictors together,

    • +
    • extracting key features from raw variables (e.g., getting the day of the week out of a date variable),

    • +
    +

    and so on. If you are familiar with R’s formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling.

    +

    To use code in this article, you will need to install the following packages: nycflights13, skimr, and tidymodels.

    +
    +
    library(tidymodels)      # for the recipes package, along with the rest of tidymodels
    +
    +# Helper packages
    +library(nycflights13)    # for flight data
    +library(skimr)           # for variable summaries
    +
    +

    +
    +
    +

    The New York City flight data

    +

    Let’s use the nycflights13 data to predict whether a plane arrives more than 30 minutes late. This data set contains information on 325,819 flights departing near New York City in 2013. Let’s start by loading the data and making a few changes to the variables:

    +
    +
    set.seed(123)
    +
    +flight_data <- 
    +  flights %>% 
    +  mutate(
    +    # Convert the arrival delay to a factor
    +    arr_delay = ifelse(arr_delay >= 30, "late", "on_time"),
    +    arr_delay = factor(arr_delay),
    +    # We will use the date (not date-time) in the recipe below
    +    date = lubridate::as_date(time_hour)
    +  ) %>% 
    +  # Include the weather data
    +  inner_join(weather, by = c("origin", "time_hour")) %>% 
    +  # Only retain the specific columns we will use
    +  select(dep_time, flight, origin, dest, air_time, distance, 
    +         carrier, date, arr_delay, time_hour) %>% 
    +  # Exclude missing data
    +  na.omit() %>% 
    +  # For creating models, it is better to have qualitative columns
    +  # encoded as factors (instead of character strings)
    +  mutate_if(is.character, as.factor)
    +
    +

    We can see that about 16% of the flights in this data set arrived more than 30 minutes late.

    +
    +
    flight_data %>% 
    +  count(arr_delay) %>% 
    +  mutate(prop = n/sum(n))
    +#> # A tibble: 2 × 3
    +#>   arr_delay      n  prop
    +#>   <fct>      <int> <dbl>
    +#> 1 late       52540 0.161
    +#> 2 on_time   273279 0.839
    +
    +

    Before we start building up our recipe, let’s take a quick look at a few specific variables that will be important for both preprocessing and modeling.

    +

    First, notice that the variable we created called arr_delay is a factor variable; it is important that our outcome variable for training a logistic regression model is a factor.

    +
    +
    glimpse(flight_data)
    +#> Rows: 325,819
    +#> Columns: 10
    +#> $ dep_time  <int> 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 558, …
    +#> $ flight    <int> 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49, 71…
    +#> $ origin    <fct> EWR, LGA, JFK, JFK, LGA, EWR, EWR, LGA, JFK, LGA, JFK, JFK, …
    +#> $ dest      <fct> IAH, IAH, MIA, BQN, ATL, ORD, FLL, IAD, MCO, ORD, PBI, TPA, …
    +#> $ air_time  <dbl> 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158, 3…
    +#> $ distance  <dbl> 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 1028,…
    +#> $ carrier   <fct> UA, UA, AA, B6, DL, UA, B6, EV, B6, AA, B6, B6, UA, UA, AA, …
    +#> $ date      <date> 2013-01-01, 2013-01-01, 2013-01-01, 2013-01-01, 2013-01-01,…
    +#> $ arr_delay <fct> on_time, on_time, late, on_time, on_time, on_time, on_time, …
    +#> $ time_hour <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:…
    +
    +

    Second, there are two variables that we don’t want to use as predictors in our model, but that we would like to retain as identification variables that can be used to troubleshoot poorly predicted data points. These are flight, a numeric value, and time_hour, a date-time value.

    +

    Third, there are 104 flight destinations contained in dest and 16 distinct carriers.

    +
    +
    flight_data %>% 
    +  skimr::skim(dest, carrier) 
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +Data summary +
    +Name + +Piped data +
    +Number of rows + +325819 +
    +Number of columns + +10 +
    +_______________________ + +
    +Column type frequency: + +
    +factor + +2 +
    +________________________ + +
    +Group variables + +None +
    +

    Variable type: factor

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +skim_variable + +n_missing + +complete_rate + +ordered + +n_unique + +top_counts +
    +dest + +0 + +1 + +FALSE + +104 + +ATL: 16771, ORD: 16507, LAX: 15942, BOS: 14948 +
    +carrier + +0 + +1 + +FALSE + +16 + +UA: 57489, B6: 53715, EV: 50868, DL: 47465 +
    +
    +
    +

    Because we’ll be using a simple logistic regression model, the variables dest and carrier will be converted to dummy variables. However, some of these values do not occur very frequently and this could complicate our analysis. We’ll discuss specific steps later in this article that we can add to our recipe to address this issue before modeling.

    +
    +
    +

    Data splitting

    +

    To get started, let’s split this single dataset into two: a training set and a testing set. We’ll keep most of the rows in the original dataset (subset chosen randomly) in the training set. The training data will be used to fit the model, and the testing set will be used to measure model performance.

    +

    To do this, we can use the rsample package to create an object that contains the information on how to split the data, and then two more rsample functions to create data frames for the training and testing sets:

    +
    +
    # Fix the random numbers by setting the seed 
    +# This enables the analysis to be reproducible when random numbers are used 
    +set.seed(222)
    +# Put 3/4 of the data into the training set 
    +data_split <- initial_split(flight_data, prop = 3/4)
    +
    +# Create data frames for the two sets:
    +train_data <- training(data_split)
    +test_data  <- testing(data_split)
    +
    +
    +
    +

    Create recipe and roles

    +

    To get started, let’s create a recipe for a simple logistic regression model. Before training the model, we can use a recipe to create a few new predictors and conduct some preprocessing required by the model.

    +

    Let’s initiate a new recipe:

    +
    +
    flights_rec <- 
    +  recipe(arr_delay ~ ., data = train_data) 
    +
    +

    The recipe() function as we used it here has two arguments:

    +
      +
    • A formula. Any variable on the left-hand side of the tilde (~) is considered the model outcome (here, arr_delay). On the right-hand side of the tilde are the predictors. Variables may be listed by name, or you can use the dot (.) to indicate all other variables as predictors.

    • +
    • The data. A recipe is associated with the data set used to create the model. This will typically be the training set, so data = train_data here. Naming a data set doesn’t actually change the data itself; it is only used to catalog the names of the variables and their types, like factors, integers, dates, etc.

    • +
    +

    Now we can add roles to this recipe. We can use the update_role() function to let recipes know that flight and time_hour are variables with a custom role that we called "ID" (a role can have any character value). Whereas our formula included all variables in the training set other than arr_delay as predictors, this tells the recipe to keep these two variables but not use them as either outcomes or predictors.

    +
    +
    flights_rec <- 
    +  recipe(arr_delay ~ ., data = train_data) %>% 
    +  update_role(flight, time_hour, new_role = "ID") 
    +
    +

    This step of adding roles to a recipe is optional; the purpose of using it here is that those two variables can be retained in the data but not included in the model. This can be convenient when, after the model is fit, we want to investigate some poorly predicted value. These ID columns will be available and can be used to try to understand what went wrong.

    +

    To get the current set of variables and roles, use the summary() function:

    +
    +
    summary(flights_rec)
    +#> # A tibble: 10 × 4
    +#>    variable  type      role      source  
    +#>    <chr>     <list>    <chr>     <chr>   
    +#>  1 dep_time  <chr [2]> predictor original
    +#>  2 flight    <chr [2]> ID        original
    +#>  3 origin    <chr [3]> predictor original
    +#>  4 dest      <chr [3]> predictor original
    +#>  5 air_time  <chr [2]> predictor original
    +#>  6 distance  <chr [2]> predictor original
    +#>  7 carrier   <chr [3]> predictor original
    +#>  8 date      <chr [1]> predictor original
    +#>  9 time_hour <chr [1]> ID        original
    +#> 10 arr_delay <chr [3]> outcome   original
    +
    +
    +
    +

    Create features

    +

    Now we can start adding steps onto our recipe using the pipe operator. Perhaps it is reasonable for the date of the flight to have an effect on the likelihood of a late arrival. A little bit of feature engineering might go a long way to improving our model. How should the date be encoded into the model? The date column has an R date object so including that column “as is” will mean that the model will convert it to a numeric format equal to the number of days after a reference date:

    +
    +
    flight_data %>% 
    +  distinct(date) %>% 
    +  mutate(numeric_date = as.numeric(date)) 
    +#> # A tibble: 364 × 2
    +#>    date       numeric_date
    +#>    <date>            <dbl>
    +#>  1 2013-01-01        15706
    +#>  2 2013-01-02        15707
    +#>  3 2013-01-03        15708
    +#>  4 2013-01-04        15709
    +#>  5 2013-01-05        15710
    +#>  6 2013-01-06        15711
    +#>  7 2013-01-07        15712
    +#>  8 2013-01-08        15713
    +#>  9 2013-01-09        15714
    +#> 10 2013-01-10        15715
    +#> # ℹ 354 more rows
    +
    +

    It’s possible that the numeric date variable is a good option for modeling; perhaps the model would benefit from a linear trend between the log-odds of a late arrival and the numeric date variable. However, it might be better to add model terms derived from the date that have a better potential to be important to the model. For example, we could derive the following meaningful features from the single date variable:

    +
      +
    • the day of the week,

    • +
    • the month, and

    • +
    • whether or not the date corresponds to a holiday.

    • +
    +

    Let’s do all three of these by adding steps to our recipe:

    +
    +
    flights_rec <- 
    +  recipe(arr_delay ~ ., data = train_data) %>% 
    +  update_role(flight, time_hour, new_role = "ID") %>% 
    +  step_date(date, features = c("dow", "month")) %>%               
    +  step_holiday(date, 
    +               holidays = timeDate::listHolidays("US"), 
    +               keep_original_cols = FALSE)
    +
    +

    What do each of these steps do?

    +
      +
    • With step_date(), we created two new factor columns with the appropriate day of the week and the month.

    • +
    • With step_holiday(), we created a binary variable indicating whether the current date is a holiday or not. The argument value of timeDate::listHolidays("US") uses the timeDate package to list the 18 standard US holidays.

    • +
    • With keep_original_cols = FALSE, we remove the original date variable since we no longer want it in the model. Many recipe steps that create new variables have this argument.

    • +
    +

    Next, we’ll turn our attention to the variable types of our predictors. Because we plan to train a logistic regression model, we know that predictors will ultimately need to be numeric, as opposed to nominal data like strings and factor variables. In other words, there may be a difference in how we store our data (in factors inside a data frame), and how the underlying equations require them (a purely numeric matrix).

    +

    For factors like dest and origin, standard practice is to convert them into dummy or indicator variables to make them numeric. These are binary values for each level of the factor. For example, our origin variable has values of "EWR", "JFK", and "LGA". The standard dummy variable encoding, shown below, will create two numeric columns of the data that are 1 when the originating airport is "JFK" or "LGA" and zero otherwise, respectively.

    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + +
    originorigin_JFKorigin_LGA
    JFK10
    EWR00
    LGA01
    + + +
    +
    +

    But, unlike the standard model formula methods in R, a recipe does not automatically create these dummy variables for you; you’ll need to tell your recipe to add this step. This is for two reasons. First, many models do not require numeric predictors, so dummy variables may not always be preferred. Second, recipes can also be used for purposes outside of modeling, where non-dummy versions of the variables may work better. For example, you may want to make a table or a plot with a variable as a single factor. For those reasons, you need to explicitly tell recipes to create dummy variables using step_dummy():

    +
    +
    flights_rec <- 
    +  recipe(arr_delay ~ ., data = train_data) %>% 
    +  update_role(flight, time_hour, new_role = "ID") %>% 
    +  step_date(date, features = c("dow", "month")) %>%               
    +  step_holiday(date, 
    +               holidays = timeDate::listHolidays("US"), 
    +               keep_original_cols = FALSE) %>% 
    +  step_dummy(all_nominal_predictors())
    +
    +

    Here, we did something different than before: instead of applying a step to an individual variable, we used selectors to apply this recipe step to several variables at once, all_nominal_predictors(). The selector functions can be combined to select intersections of variables.

    +

    At this stage in the recipe, this step selects the origin, dest, and carrier variables. It also includes two new variables, date_dow and date_month, that were created by the earlier step_date().

    +

    More generally, the recipe selectors mean that you don’t always have to apply steps to individual variables one at a time. Since a recipe knows the variable type and role of each column, they can also be selected (or dropped) using this information.

    +

    We need one final step to add to our recipe. Since carrier and dest have some infrequently occurring factor values, it is possible that dummy variables might be created for values that don’t exist in the training set. For example, there is one destination that is only in the test set:

    +
    +
    test_data %>% 
    +  distinct(dest) %>% 
    +  anti_join(train_data)
    +#> Joining with `by = join_by(dest)`
    +#> # A tibble: 1 × 1
    +#>   dest 
    +#>   <fct>
    +#> 1 LEX
    +
    +

    When the recipe is applied to the training set, a column is made for LEX because the factor levels come from flight_data (not the training set), but this column will contain all zeros. This is a “zero-variance predictor” that has no information within the column. While some R functions will not produce an error for such predictors, it usually causes warnings and other issues. step_zv() will remove columns from the data when the training set data have a single value, so it is added to the recipe after step_dummy():

    +
    +
    flights_rec <- 
    +  recipe(arr_delay ~ ., data = train_data) %>% 
    +  update_role(flight, time_hour, new_role = "ID") %>% 
    +  step_date(date, features = c("dow", "month")) %>%               
    +  step_holiday(date, 
    +               holidays = timeDate::listHolidays("US"), 
    +               keep_original_cols = FALSE) %>% 
    +  step_dummy(all_nominal_predictors()) %>% 
    +  step_zv(all_predictors())
    +
    +

    Now we’ve created a specification of what should be done with the data. How do we use the recipe we made?

    +
    +
    +

    Fit a model with a recipe

    +

    Let’s use logistic regression to model the flight data. As we saw in Build a Model, we start by building a model specification using the parsnip package:

    +
    +
    lr_mod <- 
    +  logistic_reg() %>% 
    +  set_engine("glm")
    +
    +

    We will want to use our recipe across several steps as we train and test our model. We will:

    +
      +
    1. Process the recipe using the training set: This involves any estimation or calculations based on the training set. For our recipe, the training set will be used to determine which predictors should be converted to dummy variables and which predictors will have zero-variance in the training set, and should be slated for removal.

    2. +
    3. Apply the recipe to the training set: We create the final predictor set on the training set.

    4. +
    5. Apply the recipe to the test set: We create the final predictor set on the test set. Nothing is recomputed and no information from the test set is used here; the dummy variable and zero-variance results from the training set are applied to the test set.

    6. +
    +

    To simplify this process, we can use a model workflow, which pairs a model and recipe together. This is a straightforward approach because different recipes are often needed for different models, so when a model and recipe are bundled, it becomes easier to train and test workflows. We’ll use the workflows package from tidymodels to bundle our parsnip model (lr_mod) with our recipe (flights_rec).

    +
    +
    flights_wflow <- 
    +  workflow() %>% 
    +  add_model(lr_mod) %>% 
    +  add_recipe(flights_rec)
    +
    +flights_wflow
    +#> ══ Workflow ══════════════════════════════════════════════════════════
    +#> Preprocessor: Recipe
    +#> Model: logistic_reg()
    +#> 
    +#> ── Preprocessor ──────────────────────────────────────────────────────
    +#> 4 Recipe Steps
    +#> 
    +#> • step_date()
    +#> • step_holiday()
    +#> • step_dummy()
    +#> • step_zv()
    +#> 
    +#> ── Model ─────────────────────────────────────────────────────────────
    +#> Logistic Regression Model Specification (classification)
    +#> 
    +#> Computational engine: glm
    +
    +

    Now, there is a single function that can be used to prepare the recipe and train the model from the resulting predictors:

    +
    +
    flights_fit <- 
    +  flights_wflow %>% 
    +  fit(data = train_data)
    +
    +

    This object has the finalized recipe and fitted model objects inside. You may want to extract the model or recipe objects from the workflow. To do this, you can use the helper functions extract_fit_parsnip() and extract_recipe(). For example, here we pull the fitted model object then use the broom::tidy() function to get a tidy tibble of model coefficients:

    +
    +
    flights_fit %>% 
    +  extract_fit_parsnip() %>% 
    +  tidy()
    +#> # A tibble: 158 × 5
    +#>    term                         estimate std.error statistic  p.value
    +#>    <chr>                           <dbl>     <dbl>     <dbl>    <dbl>
    +#>  1 (Intercept)                   7.26    2.73           2.66 7.75e- 3
    +#>  2 dep_time                     -0.00166 0.0000141   -118.   0       
    +#>  3 air_time                     -0.0440  0.000563     -78.2  0       
    +#>  4 distance                      0.00508 0.00150        3.38 7.13e- 4
    +#>  5 date_USChristmasDay           1.35    0.178          7.59 3.32e-14
    +#>  6 date_USColumbusDay            0.721   0.170          4.23 2.33e- 5
    +#>  7 date_USCPulaskisBirthday      0.804   0.139          5.78 7.38e- 9
    +#>  8 date_USDecorationMemorialDay  0.582   0.117          4.96 7.22e- 7
    +#>  9 date_USElectionDay            0.945   0.190          4.97 6.73e- 7
    +#> 10 date_USGoodFriday             1.24    0.167          7.44 1.04e-13
    +#> # ℹ 148 more rows
    +
    +
    +
    +

    Use a trained workflow to predict

    +

    Our goal was to predict whether a plane arrives more than 30 minutes late. We have just:

    +
      +
    1. Built the model (lr_mod),

    2. +
    3. Created a preprocessing recipe (flights_rec),

    4. +
    5. Bundled the model and recipe (flights_wflow), and

    6. +
    7. Trained our workflow using a single call to fit().

    8. +
    +

    The next step is to use the trained workflow (flights_fit) to predict with the unseen test data, which we will do with a single call to predict(). The predict() method applies the recipe to the new data, then passes them to the fitted model.

    +
    +
    predict(flights_fit, test_data)
    +#> # A tibble: 81,455 × 1
    +#>    .pred_class
    +#>    <fct>      
    +#>  1 on_time    
    +#>  2 on_time    
    +#>  3 on_time    
    +#>  4 on_time    
    +#>  5 on_time    
    +#>  6 on_time    
    +#>  7 on_time    
    +#>  8 on_time    
    +#>  9 on_time    
    +#> 10 on_time    
    +#> # ℹ 81,445 more rows
    +
    +

    Because our outcome variable here is a factor, the output from predict() returns the predicted class: late versus on_time. But, let’s say we want the predicted class probabilities for each flight instead. To return those, we can specify type = "prob" when we use predict() or use augment() with the model plus test data to save them together:

    +
    +
    flights_aug <- 
    +  augment(flights_fit, test_data)
    +
    +# The data look like: 
    +flights_aug %>%
    +  select(arr_delay, time_hour, flight, .pred_class, .pred_on_time)
    +#> # A tibble: 81,455 × 5
    +#>    arr_delay time_hour           flight .pred_class .pred_on_time
    +#>    <fct>     <dttm>               <int> <fct>               <dbl>
    +#>  1 on_time   2013-01-01 05:00:00   1545 on_time             0.945
    +#>  2 on_time   2013-01-01 05:00:00   1714 on_time             0.949
    +#>  3 on_time   2013-01-01 06:00:00    507 on_time             0.964
    +#>  4 on_time   2013-01-01 06:00:00   5708 on_time             0.961
    +#>  5 on_time   2013-01-01 06:00:00     71 on_time             0.962
    +#>  6 on_time   2013-01-01 06:00:00    194 on_time             0.975
    +#>  7 on_time   2013-01-01 06:00:00   1124 on_time             0.963
    +#>  8 on_time   2013-01-01 05:00:00   1806 on_time             0.981
    +#>  9 on_time   2013-01-01 06:00:00   1187 on_time             0.935
    +#> 10 on_time   2013-01-01 06:00:00   4650 on_time             0.931
    +#> # ℹ 81,445 more rows
    +
    +

    Now that we have a tibble with our predicted class probabilities, how will we evaluate the performance of our workflow? We can see from these first few rows that our model predicted these 5 on time flights correctly because the values of .pred_on_time are p > .50. But we also know that we have 81,455 rows total to predict. We would like to calculate a metric that tells how well our model predicted late arrivals, compared to the true status of our outcome variable, arr_delay.

    +

    Let’s use the area under the ROC curve as our metric, computed using roc_curve() and roc_auc() from the yardstick package.

    +

    To generate a ROC curve, we need the predicted class probabilities for late and on_time, which we just calculated in the code chunk above. We can create the ROC curve with these values, using roc_curve() and then piping to the autoplot() method:

    +
    +
    flights_aug %>% 
    +  roc_curve(truth = arr_delay, .pred_late) %>% 
    +  autoplot()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Similarly, roc_auc() estimates the area under the curve:

    +
    +
    flights_aug %>% 
    +  roc_auc(truth = arr_delay, .pred_late)
    +#> # A tibble: 1 × 3
    +#>   .metric .estimator .estimate
    +#>   <chr>   <chr>          <dbl>
    +#> 1 roc_auc binary         0.764
    +
    +

    Not too bad! We leave it to the reader to test out this workflow without this recipe. You can use workflows::add_formula(arr_delay ~ .) instead of add_recipe() (remember to remove the identification variables first!), and see whether our recipe improved our model’s ability to predict late arrivals.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package      * version date (UTC) lib source
    +#>  broom        * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials        * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr        * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2      * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer        * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  nycflights13 * 1.0.2   2021-04-12 [1] CRAN (R 4.3.0)
    +#>  parsnip      * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr        * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes      * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang          1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample      * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  skimr        * 2.1.5   2022-12-23 [1] CRAN (R 4.3.0)
    +#>  tibble       * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels   * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune         * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows    * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick    * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/content/start/resampling/img/cells.png b/docs/start/resampling/img/cells.png similarity index 100% rename from content/start/resampling/img/cells.png rename to docs/start/resampling/img/cells.png diff --git a/content/start/resampling/img/resampling.svg b/docs/start/resampling/img/resampling.svg similarity index 100% rename from content/start/resampling/img/resampling.svg rename to docs/start/resampling/img/resampling.svg diff --git a/docs/start/resampling/index.html b/docs/start/resampling/index.html new file mode 100644 index 00000000..d3ac3189 --- /dev/null +++ b/docs/start/resampling/index.html @@ -0,0 +1,988 @@ + + + + + + + + + + +tidymodels - Evaluate your model with resampling + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Evaluate your model with resampling

    +
    +
    resampling
    +
    rsample
    +
    parsnip
    +
    tune
    +
    workflows
    +
    yardstick
    +
    +
    + +
    +
    +

    Measure model performance by generating different versions of the training data through resampling.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    So far, we have built a model and preprocessed data with a recipe. We also introduced workflows as a way to bundle a parsnip model and recipe together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on resampling statistics.

    +

    To use code in this article, you will need to install the following packages: modeldata, ranger, and tidymodels.

    +
    +
    library(tidymodels) # for the rsample package, along with the rest of tidymodels
    +
    +# Helper packages
    +library(modeldata)  # for the cells data
    +
    +

    +
    +
    +

    The cell image data

    +

    Let’s use data from Hill, LaPan, Li, and Haney (2007), available in the modeldata package, to predict cell image segmentation quality with resampling. To start, we load this data into R:

    +
    +
    data(cells, package = "modeldata")
    +cells
    +#> # A tibble: 2,019 × 58
    +#>    case  class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3
    +#>    <fct> <fct>      <dbl>     <int>          <dbl>          <dbl>          <dbl>
    +#>  1 Test  PS        143.         185           15.7           4.95           9.55
    +#>  2 Train PS        134.         819           31.9         207.            69.9 
    +#>  3 Train WS        107.         431           28.0         116.            63.9 
    +#>  4 Train PS         69.2        298           19.5         102.            28.2 
    +#>  5 Test  PS          2.89       285           24.3         112.            20.5 
    +#>  6 Test  WS         40.7        172          326.          654.           129.  
    +#>  7 Test  WS        174.         177          260.          596.           124.  
    +#>  8 Test  PS        180.         251           18.3           5.73          17.2 
    +#>  9 Test  WS         18.9        495           16.1          89.5           13.7 
    +#> 10 Test  WS        153.         384           17.7          89.9           20.4 
    +#> # ℹ 2,009 more rows
    +#> # ℹ 51 more variables: avg_inten_ch_4 <dbl>, convex_hull_area_ratio_ch_1 <dbl>,
    +#> #   convex_hull_perim_ratio_ch_1 <dbl>, diff_inten_density_ch_1 <dbl>,
    +#> #   diff_inten_density_ch_3 <dbl>, diff_inten_density_ch_4 <dbl>,
    +#> #   entropy_inten_ch_1 <dbl>, entropy_inten_ch_3 <dbl>,
    +#> #   entropy_inten_ch_4 <dbl>, eq_circ_diam_ch_1 <dbl>,
    +#> #   eq_ellipse_lwr_ch_1 <dbl>, eq_ellipse_oblate_vol_ch_1 <dbl>, …
    +
    +

    We have data for 2019 cells, with 58 variables. The main outcome variable of interest for us here is called class, which you can see is a factor. But before we jump into predicting the class variable, we need to understand it better. Below is a brief primer on cell image segmentation.

    +
    +

    Predicting image segmentation quality

    +

    Some biologists conduct experiments on cells. In drug discovery, a particular type of cell can be treated with either a drug or control and then observed to see what the effect is (if any). A common approach for this kind of measurement is cell imaging. Different parts of the cells can be colored so that the locations of a cell can be determined.

    +

    For example, in top panel of this image of five cells, the green color is meant to define the boundary of the cell (coloring something called the cytoskeleton) while the blue color defines the nucleus of the cell.

    +
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Using these colors, the cells in an image can be segmented so that we know which pixels belong to which cell. If this is done well, the cell can be measured in different ways that are important to the biology. Sometimes the shape of the cell matters and different mathematical tools are used to summarize characteristics like the size or “oblongness” of the cell.

    +

    The bottom panel shows some segmentation results. Cells 1 and 5 are fairly well segmented. However, cells 2 to 4 are bunched up together because the segmentation was not very good. The consequence of bad segmentation is data contamination; when the biologist analyzes the shape or size of these cells, the data are inaccurate and could lead to the wrong conclusion.

    +

    A cell-based experiment might involve millions of cells so it is unfeasible to visually assess them all. Instead, a subsample can be created and these cells can be manually labeled by experts as either poorly segmented (PS) or well-segmented (WS). If we can predict these labels accurately, the larger data set can be improved by filtering out the cells most likely to be poorly segmented.

    +
    +
    +

    Back to the cells data

    +

    The cells data has class labels for 2019 cells — each cell is labeled as either poorly segmented (PS) or well-segmented (WS). Each also has a total of 56 predictors based on automated image analysis measurements. For example, avg_inten_ch_1 is the mean intensity of the data contained in the nucleus, area_ch_1 is the total size of the cell, and so on (some predictors are fairly arcane in nature).

    +
    +
    cells
    +#> # A tibble: 2,019 × 58
    +#>    case  class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3
    +#>    <fct> <fct>      <dbl>     <int>          <dbl>          <dbl>          <dbl>
    +#>  1 Test  PS        143.         185           15.7           4.95           9.55
    +#>  2 Train PS        134.         819           31.9         207.            69.9 
    +#>  3 Train WS        107.         431           28.0         116.            63.9 
    +#>  4 Train PS         69.2        298           19.5         102.            28.2 
    +#>  5 Test  PS          2.89       285           24.3         112.            20.5 
    +#>  6 Test  WS         40.7        172          326.          654.           129.  
    +#>  7 Test  WS        174.         177          260.          596.           124.  
    +#>  8 Test  PS        180.         251           18.3           5.73          17.2 
    +#>  9 Test  WS         18.9        495           16.1          89.5           13.7 
    +#> 10 Test  WS        153.         384           17.7          89.9           20.4 
    +#> # ℹ 2,009 more rows
    +#> # ℹ 51 more variables: avg_inten_ch_4 <dbl>, convex_hull_area_ratio_ch_1 <dbl>,
    +#> #   convex_hull_perim_ratio_ch_1 <dbl>, diff_inten_density_ch_1 <dbl>,
    +#> #   diff_inten_density_ch_3 <dbl>, diff_inten_density_ch_4 <dbl>,
    +#> #   entropy_inten_ch_1 <dbl>, entropy_inten_ch_3 <dbl>,
    +#> #   entropy_inten_ch_4 <dbl>, eq_circ_diam_ch_1 <dbl>,
    +#> #   eq_ellipse_lwr_ch_1 <dbl>, eq_ellipse_oblate_vol_ch_1 <dbl>, …
    +
    +

    The rates of the classes are somewhat imbalanced; there are more poorly segmented cells than well-segmented cells:

    +
    +
    cells %>% 
    +  count(class) %>% 
    +  mutate(prop = n/sum(n))
    +#> # A tibble: 2 × 3
    +#>   class     n  prop
    +#>   <fct> <int> <dbl>
    +#> 1 PS     1300 0.644
    +#> 2 WS      719 0.356
    +
    +
    +
    +
    +

    Data splitting

    +

    In our previous Preprocess your data with recipes article, we started by splitting our data. It is common when beginning a modeling project to separate the data set into two partitions:

    +
      +
    • The training set is used to estimate parameters, compare models and feature engineering techniques, tune models, etc.

    • +
    • The test set is held in reserve until the end of the project, at which point there should only be one or two models under serious consideration. It is used as an unbiased source for measuring final model performance.

    • +
    +

    There are different ways to create these partitions of the data. The most common approach is to use a random sample. Suppose that one quarter of the data were reserved for the test set. Random sampling would randomly select 25% for the test set and use the remainder for the training set. We can use the rsample package for this purpose.

    +

    Since random sampling uses random numbers, it is important to set the random number seed. This ensures that the random numbers can be reproduced at a later time (if needed).

    +

    The function rsample::initial_split() takes the original data and saves the information on how to make the partitions. In the original analysis, the authors made their own training/test set and that information is contained in the column case. To demonstrate how to make a split, we’ll remove this column before we make our own split:

    +
    +
    set.seed(123)
    +cell_split <- initial_split(cells %>% select(-case), 
    +                            strata = class)
    +
    +

    Here we used the strata argument, which conducts a stratified split. This ensures that, despite the imbalance we noticed in our class variable, our training and test data sets will keep roughly the same proportions of poorly and well-segmented cells as in the original data. After the initial_split, the training() and testing() functions return the actual data sets.

    +
    +
    cell_train <- training(cell_split)
    +cell_test  <- testing(cell_split)
    +
    +nrow(cell_train)
    +#> [1] 1514
    +nrow(cell_train)/nrow(cells)
    +#> [1] 0.7498762
    +
    +# training set proportions by class
    +cell_train %>% 
    +  count(class) %>% 
    +  mutate(prop = n/sum(n))
    +#> # A tibble: 2 × 3
    +#>   class     n  prop
    +#>   <fct> <int> <dbl>
    +#> 1 PS      975 0.644
    +#> 2 WS      539 0.356
    +
    +# test set proportions by class
    +cell_test %>% 
    +  count(class) %>% 
    +  mutate(prop = n/sum(n))
    +#> # A tibble: 2 × 3
    +#>   class     n  prop
    +#>   <fct> <int> <dbl>
    +#> 1 PS      325 0.644
    +#> 2 WS      180 0.356
    +
    +

    The majority of the modeling work is then conducted on the training set data.

    +
    +
    +

    Modeling

    +

    Random forest models are ensembles of decision trees. A large number of decision tree models are created for the ensemble based on slightly different versions of the training set. When creating the individual decision trees, the fitting process encourages them to be as diverse as possible. The collection of trees are combined into the random forest model and, when a new sample is predicted, the votes from each tree are used to calculate the final predicted value for the new sample. For categorical outcome variables like class in our cells data example, the majority vote across all the trees in the random forest determines the predicted class for the new sample.

    +

    One of the benefits of a random forest model is that it is very low maintenance; it requires very little preprocessing of the data and the default parameters tend to give reasonable results. For that reason, we won’t create a recipe for the cells data.

    +

    At the same time, the number of trees in the ensemble should be large (in the thousands) and this makes the model moderately expensive to compute.

    +

    To fit a random forest model on the training set, let’s use the parsnip package with the ranger engine. We first define the model that we want to create:

    +
    +
    rf_mod <- 
    +  rand_forest(trees = 1000) %>% 
    +  set_engine("ranger") %>% 
    +  set_mode("classification")
    +
    +

    Starting with this parsnip model object, the fit() function can be used with a model formula. Since random forest models use random numbers, we again set the seed prior to computing:

    +
    +
    set.seed(234)
    +rf_fit <- 
    +  rf_mod %>% 
    +  fit(class ~ ., data = cell_train)
    +rf_fit
    +#> parsnip model object
    +#> 
    +#> Ranger result
    +#> 
    +#> Call:
    +#>  ranger::ranger(x = maybe_data_frame(x), y = y, num.trees = ~1000,      num.threads = 1, verbose = FALSE, seed = sample.int(10^5,          1), probability = TRUE) 
    +#> 
    +#> Type:                             Probability estimation 
    +#> Number of trees:                  1000 
    +#> Sample size:                      1514 
    +#> Number of independent variables:  56 
    +#> Mtry:                             7 
    +#> Target node size:                 10 
    +#> Variable importance mode:         none 
    +#> Splitrule:                        gini 
    +#> OOB prediction error (Brier s.):  0.1189338
    +
    +

    This new rf_fit object is our fitted model, trained on our training data set.

    +
    +
    +

    Estimating performance

    +

    During a modeling project, we might create a variety of different models. To choose between them, we need to consider how well these models do, as measured by some performance statistics. In our example in this article, some options we could use are:

    +
      +
    • the area under the Receiver Operating Characteristic (ROC) curve, and

    • +
    • overall classification accuracy.

    • +
    +

    The ROC curve uses the class probability estimates to give us a sense of performance across the entire set of potential probability cutoffs. Overall accuracy uses the hard class predictions to measure performance. The hard class predictions tell us whether our model predicted PS or WS for each cell. But, behind those predictions, the model is actually estimating a probability. A simple 50% probability cutoff is used to categorize a cell as poorly segmented.

    +

    The yardstick package has functions for computing both of these measures called roc_auc() and accuracy().

    +

    At first glance, it might seem like a good idea to use the training set data to compute these statistics. (This is actually a very bad idea.) Let’s see what happens if we try this. To evaluate performance based on the training set, we call the predict() method to get both types of predictions (i.e. probabilities and hard class predictions).

    +
    +
    rf_training_pred <- 
    +  predict(rf_fit, cell_train) %>% 
    +  bind_cols(predict(rf_fit, cell_train, type = "prob")) %>% 
    +  # Add the true outcome data back in
    +  bind_cols(cell_train %>% 
    +              select(class))
    +
    +

    Using the yardstick functions, this model has spectacular results, so spectacular that you might be starting to get suspicious:

    +
    +
    rf_training_pred %>%                # training set predictions
    +  roc_auc(truth = class, .pred_PS)
    +#> # A tibble: 1 × 3
    +#>   .metric .estimator .estimate
    +#>   <chr>   <chr>          <dbl>
    +#> 1 roc_auc binary          1.00
    +rf_training_pred %>%                # training set predictions
    +  accuracy(truth = class, .pred_class)
    +#> # A tibble: 1 × 3
    +#>   .metric  .estimator .estimate
    +#>   <chr>    <chr>          <dbl>
    +#> 1 accuracy binary         0.991
    +
    +

    Now that we have this model with exceptional performance, we proceed to the test set. Unfortunately, we discover that, although our results aren’t bad, they are certainly worse than what we initially thought based on predicting the training set:

    +
    +
    rf_testing_pred <- 
    +  predict(rf_fit, cell_test) %>% 
    +  bind_cols(predict(rf_fit, cell_test, type = "prob")) %>% 
    +  bind_cols(cell_test %>% select(class))
    +
    +
    +
    rf_testing_pred %>%                   # test set predictions
    +  roc_auc(truth = class, .pred_PS)
    +#> # A tibble: 1 × 3
    +#>   .metric .estimator .estimate
    +#>   <chr>   <chr>          <dbl>
    +#> 1 roc_auc binary         0.891
    +rf_testing_pred %>%                   # test set predictions
    +  accuracy(truth = class, .pred_class)
    +#> # A tibble: 1 × 3
    +#>   .metric  .estimator .estimate
    +#>   <chr>    <chr>          <dbl>
    +#> 1 accuracy binary         0.816
    +
    +
    +

    What happened here?

    +

    There are several reasons why training set statistics like the ones shown in this section can be unrealistically optimistic:

    +
      +
    • Models like random forests, neural networks, and other black-box methods can essentially memorize the training set. Re-predicting that same set should always result in nearly perfect results.

    • +
    • The training set does not have the capacity to be a good arbiter of performance. It is not an independent piece of information; predicting the training set can only reflect what the model already knows.

    • +
    +

    To understand that second point better, think about an analogy from teaching. Suppose you give a class a test, then give them the answers, then provide the same test. The student scores on the second test do not accurately reflect what they know about the subject; these scores would probably be higher than their results on the first test.

    +
    +
    +
    +

    Resampling to the rescue

    +

    Resampling methods, such as cross-validation and the bootstrap, are empirical simulation systems. They create a series of data sets similar to the training/testing split discussed previously; a subset of the data are used for creating the model and a different subset is used to measure performance. Resampling is always used with the training set. This schematic from Kuhn and Johnson (2019) illustrates data usage for resampling methods:

    +
    +
    +
    +
    +

    +
    +
    +
    +
    +

    In the first level of this diagram, you see what happens when you use rsample::initial_split(), which splits the original data into training and test sets. Then, the training set is chosen for resampling, and the test set is held out.

    +

    Let’s use 10-fold cross-validation (CV) in this example. This method randomly allocates the 1514 cells in the training set to 10 groups of roughly equal size, called “folds”. For the first iteration of resampling, the first fold of about 151 cells are held out for the purpose of measuring performance. This is similar to a test set but, to avoid confusion, we call these data the assessment set in the tidymodels framework.

    +

    The other 90% of the data (about 1362 cells) are used to fit the model. Again, this sounds similar to a training set, so in tidymodels we call this data the analysis set. This model, trained on the analysis set, is applied to the assessment set to generate predictions, and performance statistics are computed based on those predictions.

    +

    In this example, 10-fold CV moves iteratively through the folds and leaves a different 10% out each time for model assessment. At the end of this process, there are 10 sets of performance statistics that were created on 10 data sets that were not used in the modeling process. For the cell example, this means 10 accuracies and 10 areas under the ROC curve. While 10 models were created, these are not used further; we do not keep the models themselves trained on these folds because their only purpose is calculating performance metrics.

    +

    The final resampling estimates for the model are the averages of the performance statistics replicates. For example, suppose for our data the results were:

    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    resampleaccuracyroc_aucassessment size
    Fold010.82894740.8937128152
    Fold020.76973680.8768989152
    Fold030.85526320.9017666152
    Fold040.85526320.8928076152
    Fold050.79470200.8816342151
    Fold060.84768210.9244306151
    Fold070.81456950.8960339151
    Fold080.85430460.9267677151
    Fold090.85430460.9231392151
    Fold100.84768210.9266917151
    + + +
    +
    +

    From these resampling statistics, the final estimate of performance for this random forest model would be 0.904 for the area under the ROC curve and 0.832 for accuracy.

    +

    These resampling statistics are an effective method for measuring model performance without predicting the training set directly as a whole.

    +
    +
    +

    Fit a model with resampling

    +

    To generate these results, the first step is to create a resampling object using rsample. There are several resampling methods implemented in rsample; cross-validation folds can be created using vfold_cv():

    +
    +
    set.seed(345)
    +folds <- vfold_cv(cell_train, v = 10)
    +folds
    +#> #  10-fold cross-validation 
    +#> # A tibble: 10 × 2
    +#>    splits             id    
    +#>    <list>             <chr> 
    +#>  1 <split [1362/152]> Fold01
    +#>  2 <split [1362/152]> Fold02
    +#>  3 <split [1362/152]> Fold03
    +#>  4 <split [1362/152]> Fold04
    +#>  5 <split [1363/151]> Fold05
    +#>  6 <split [1363/151]> Fold06
    +#>  7 <split [1363/151]> Fold07
    +#>  8 <split [1363/151]> Fold08
    +#>  9 <split [1363/151]> Fold09
    +#> 10 <split [1363/151]> Fold10
    +
    +

    The list column for splits contains the information on which rows belong in the analysis and assessment sets. There are functions that can be used to extract the individual resampled data called analysis() and assessment().

    +

    However, the tune package contains high-level functions that can do the required computations to resample a model for the purpose of measuring performance. You have several options for building an object for resampling:

    +
      +
    • Resample a model specification preprocessed with a formula or recipe, or

    • +
    • Resample a workflow() that bundles together a model specification and formula/recipe.

    • +
    +

    For this example, let’s use a workflow() that bundles together the random forest model and a formula, since we are not using a recipe. Whichever of these options you use, the syntax to fit_resamples() is very similar to fit():

    +
    +
    rf_wf <- 
    +  workflow() %>%
    +  add_model(rf_mod) %>%
    +  add_formula(class ~ .)
    +
    +set.seed(456)
    +rf_fit_rs <- 
    +  rf_wf %>% 
    +  fit_resamples(folds)
    +
    +
    +
    rf_fit_rs
    +#> # Resampling results
    +#> # 10-fold cross-validation 
    +#> # A tibble: 10 × 4
    +#>    splits             id     .metrics         .notes          
    +#>    <list>             <chr>  <list>           <list>          
    +#>  1 <split [1362/152]> Fold01 <tibble [2 × 4]> <tibble [0 × 3]>
    +#>  2 <split [1362/152]> Fold02 <tibble [2 × 4]> <tibble [0 × 3]>
    +#>  3 <split [1362/152]> Fold03 <tibble [2 × 4]> <tibble [0 × 3]>
    +#>  4 <split [1362/152]> Fold04 <tibble [2 × 4]> <tibble [0 × 3]>
    +#>  5 <split [1363/151]> Fold05 <tibble [2 × 4]> <tibble [0 × 3]>
    +#>  6 <split [1363/151]> Fold06 <tibble [2 × 4]> <tibble [0 × 3]>
    +#>  7 <split [1363/151]> Fold07 <tibble [2 × 4]> <tibble [0 × 3]>
    +#>  8 <split [1363/151]> Fold08 <tibble [2 × 4]> <tibble [0 × 3]>
    +#>  9 <split [1363/151]> Fold09 <tibble [2 × 4]> <tibble [0 × 3]>
    +#> 10 <split [1363/151]> Fold10 <tibble [2 × 4]> <tibble [0 × 3]>
    +
    +

    The results are similar to the folds results with some extra columns. The column .metrics contains the performance statistics created from the 10 assessment sets. These can be manually unnested but the tune package contains a number of simple functions that can extract these data:

    +
    +
    collect_metrics(rf_fit_rs)
    +#> # A tibble: 2 × 6
    +#>   .metric  .estimator  mean     n std_err .config             
    +#>   <chr>    <chr>      <dbl> <int>   <dbl> <chr>               
    +#> 1 accuracy binary     0.832    10 0.00952 Preprocessor1_Model1
    +#> 2 roc_auc  binary     0.904    10 0.00610 Preprocessor1_Model1
    +
    +

    Think about these values we now have for accuracy and AUC. These performance metrics are now more realistic (i.e. lower) than our ill-advised first attempt at computing performance metrics in the section above. If we wanted to try different model types for this data set, we could more confidently compare performance metrics computed using resampling to choose between models. Also, remember that at the end of our project, we return to our test set to estimate final model performance. We have looked at this once already before we started using resampling, but let’s remind ourselves of the results:

    +
    +
    rf_testing_pred %>%                   # test set predictions
    +  roc_auc(truth = class, .pred_PS)
    +#> # A tibble: 1 × 3
    +#>   .metric .estimator .estimate
    +#>   <chr>   <chr>          <dbl>
    +#> 1 roc_auc binary         0.891
    +rf_testing_pred %>%                   # test set predictions
    +  accuracy(truth = class, .pred_class)
    +#> # A tibble: 1 × 3
    +#>   .metric  .estimator .estimate
    +#>   <chr>    <chr>          <dbl>
    +#> 1 accuracy binary         0.816
    +
    +

    The performance metrics from the test set are much closer to the performance metrics computed using resampling than our first (“bad idea”) attempt. Resampling allows us to simulate how well our model will perform on new data, and the test set acts as the final, unbiased check for our model’s performance.

    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  modeldata  * 1.1.0   2023-01-25 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  ranger     * 0.15.1  2023-04-03 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/docs/start/styles.css b/docs/start/styles.css new file mode 100644 index 00000000..89b1cc8b --- /dev/null +++ b/docs/start/styles.css @@ -0,0 +1,25 @@ +.sidebar-menu-container { + border-right: solid : #DDDDDD 1pt; /* $theme-grey */ + counter-reset: section; +} + +#quarto-sidebar > * { + padding-top: 3em; + padding-right: 51px; +} + +.sidebar-item { + display: flex; + margin-bottom: 30px; +} + +div.sidebar-item-container { + color: rgba(26, 22, 45, 0.6); +} + +.sidebar-item:not(:first-child)::before { + counter-increment: section; + content: counter(section, decimal) ""; + padding-right: 0.5em; + color: #CA225E; /* $theme-cranberry */ +} diff --git a/content/start/tuning/figs/best-tree-1.svg b/docs/start/tuning/figs/best-tree-1.svg similarity index 100% rename from content/start/tuning/figs/best-tree-1.svg rename to docs/start/tuning/figs/best-tree-1.svg diff --git a/content/start/tuning/figs/last-fit-1.svg b/docs/start/tuning/figs/last-fit-1.svg similarity index 100% rename from content/start/tuning/figs/last-fit-1.svg rename to docs/start/tuning/figs/last-fit-1.svg diff --git a/content/start/tuning/figs/rpart-plot-1.svg b/docs/start/tuning/figs/rpart-plot-1.svg similarity index 100% rename from content/start/tuning/figs/rpart-plot-1.svg rename to docs/start/tuning/figs/rpart-plot-1.svg diff --git a/content/start/tuning/figs/vip-1.svg b/docs/start/tuning/figs/vip-1.svg similarity index 100% rename from content/start/tuning/figs/vip-1.svg rename to docs/start/tuning/figs/vip-1.svg diff --git a/docs/start/tuning/index.html b/docs/start/tuning/index.html new file mode 100644 index 00000000..f300caac --- /dev/null +++ b/docs/start/tuning/index.html @@ -0,0 +1,919 @@ + + + + + + + + + + +tidymodels - Tune model parameters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + +
    + +
    + + +
    + + + +
    + +
    +
    +

    Tune model parameters

    +
    +
    tuning
    +
    rsample
    +
    parsnip
    +
    tune
    +
    dials
    +
    workflows
    +
    yardstick
    +
    +
    + +
    +
    +

    Estimate the best values for hyperparameters that cannot be learned directly during model training.

    +
    +
    + + +
    + + + + +
    + + +
    + +
    +

    Introduction

    +

    Some model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called hyperparameters. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this mtry in tidymodels) or the learning rate in a boosted tree model (we call this learn_rate). Instead of learning these kinds of hyperparameters during model training, we can estimate the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called tuning.

    +

    To use code in this article, you will need to install the following packages: rpart, rpart.plot, tidymodels, and vip.

    +
    +
    library(tidymodels)  # for the tune package, along with the rest of tidymodels
    +
    +# Helper packages
    +library(rpart.plot)  # for visualizing a decision tree
    +library(vip)         # for variable importance plots
    +
    +

    +
    +
    +

    The cell image data, revisited

    +

    In our previous Evaluate your model with resampling article, we introduced a data set of images of cells that were labeled by experts as well-segmented (WS) or poorly segmented (PS). We trained a random forest model to predict which images are segmented well vs. poorly, so that a biologist could filter out poorly segmented cell images in their analysis. We used resampling to estimate the performance of our model on this data.

    +
    +
    data(cells, package = "modeldata")
    +cells
    +#> # A tibble: 2,019 × 58
    +#>    case  class angle_ch_1 area_ch_1 avg_inten_ch_1 avg_inten_ch_2 avg_inten_ch_3
    +#>    <fct> <fct>      <dbl>     <int>          <dbl>          <dbl>          <dbl>
    +#>  1 Test  PS        143.         185           15.7           4.95           9.55
    +#>  2 Train PS        134.         819           31.9         207.            69.9 
    +#>  3 Train WS        107.         431           28.0         116.            63.9 
    +#>  4 Train PS         69.2        298           19.5         102.            28.2 
    +#>  5 Test  PS          2.89       285           24.3         112.            20.5 
    +#>  6 Test  WS         40.7        172          326.          654.           129.  
    +#>  7 Test  WS        174.         177          260.          596.           124.  
    +#>  8 Test  PS        180.         251           18.3           5.73          17.2 
    +#>  9 Test  WS         18.9        495           16.1          89.5           13.7 
    +#> 10 Test  WS        153.         384           17.7          89.9           20.4 
    +#> # ℹ 2,009 more rows
    +#> # ℹ 51 more variables: avg_inten_ch_4 <dbl>, convex_hull_area_ratio_ch_1 <dbl>,
    +#> #   convex_hull_perim_ratio_ch_1 <dbl>, diff_inten_density_ch_1 <dbl>,
    +#> #   diff_inten_density_ch_3 <dbl>, diff_inten_density_ch_4 <dbl>,
    +#> #   entropy_inten_ch_1 <dbl>, entropy_inten_ch_3 <dbl>,
    +#> #   entropy_inten_ch_4 <dbl>, eq_circ_diam_ch_1 <dbl>,
    +#> #   eq_ellipse_lwr_ch_1 <dbl>, eq_ellipse_oblate_vol_ch_1 <dbl>, …
    +
    +
    +
    +

    Predicting image segmentation, but better

    +

    Random forest models are a tree-based ensemble method, and typically perform well with default hyperparameters. However, the accuracy of some other tree-based models, such as boosted tree models or decision tree models, can be sensitive to the values of hyperparameters. In this article, we will train a decision tree model. There are several hyperparameters for decision tree models that can be tuned for better performance. Let’s explore:

    +
      +
    • the complexity parameter (which we call cost_complexity in tidymodels) for the tree, and
    • +
    • the maximum tree_depth.
    • +
    +

    Tuning these hyperparameters can improve model performance because decision tree models are prone to overfitting. This happens because single tree models tend to fit the training data too well — so well, in fact, that they over-learn patterns present in the training data that end up being detrimental when predicting new data.

    +

    We will tune the model hyperparameters to avoid overfitting. Tuning the value of cost_complexity helps by pruning back our tree. It adds a cost, or penalty, to error rates of more complex trees; a cost closer to zero decreases the number tree nodes pruned and is more likely to result in an overfit tree. However, a high cost increases the number of tree nodes pruned and can result in the opposite problem—an underfit tree. Tuning tree_depth, on the other hand, helps by stopping our tree from growing after it reaches a certain depth. We want to tune these hyperparameters to find what those two values should be for our model to do the best job predicting image segmentation.

    +

    Before we start the tuning process, we split our data into training and testing sets, just like when we trained the model with one default set of hyperparameters. As before, we can use strata = class if we want our training and testing sets to be created using stratified sampling so that both have the same proportion of both kinds of segmentation.

    +
    +
    set.seed(123)
    +cell_split <- initial_split(cells %>% select(-case), 
    +                            strata = class)
    +cell_train <- training(cell_split)
    +cell_test  <- testing(cell_split)
    +
    +

    We use the training data for tuning the model.

    +
    +
    +

    Tuning hyperparameters

    +

    Let’s start with the parsnip package, using a decision_tree() model with the rpart engine. To tune the decision tree hyperparameters cost_complexity and tree_depth, we create a model specification that identifies which hyperparameters we plan to tune.

    +
    +
    tune_spec <- 
    +  decision_tree(
    +    cost_complexity = tune(),
    +    tree_depth = tune()
    +  ) %>% 
    +  set_engine("rpart") %>% 
    +  set_mode("classification")
    +
    +tune_spec
    +#> Decision Tree Model Specification (classification)
    +#> 
    +#> Main Arguments:
    +#>   cost_complexity = tune()
    +#>   tree_depth = tune()
    +#> 
    +#> Computational engine: rpart
    +
    +

    Think of tune() here as a placeholder. After the tuning process, we will select a single numeric value for each of these hyperparameters. For now, we specify our parsnip model object and identify the hyperparameters we will tune().

    +

    We can’t train this specification on a single data set (such as the entire training set) and learn what the hyperparameter values should be, but we can train many models using resampled data and see which models turn out best. We can create a regular grid of values to try using some convenience functions for each hyperparameter:

    +
    +
    tree_grid <- grid_regular(cost_complexity(),
    +                          tree_depth(),
    +                          levels = 5)
    +
    +

    The function grid_regular() is from the dials package. It chooses sensible values to try for each hyperparameter; here, we asked for 5 of each. Since we have two to tune, grid_regular() returns 5 \(\times\) 5 = 25 different possible tuning combinations to try in a tidy tibble format.

    +
    +
    tree_grid
    +#> # A tibble: 25 × 2
    +#>    cost_complexity tree_depth
    +#>              <dbl>      <int>
    +#>  1    0.0000000001          1
    +#>  2    0.0000000178          1
    +#>  3    0.00000316            1
    +#>  4    0.000562              1
    +#>  5    0.1                   1
    +#>  6    0.0000000001          4
    +#>  7    0.0000000178          4
    +#>  8    0.00000316            4
    +#>  9    0.000562              4
    +#> 10    0.1                   4
    +#> # ℹ 15 more rows
    +
    +

    Here, you can see all 5 values of cost_complexity ranging up to 0.1. These values get repeated for each of the 5 values of tree_depth:

    +
    +
    tree_grid %>% 
    +  count(tree_depth)
    +#> # A tibble: 5 × 2
    +#>   tree_depth     n
    +#>        <int> <int>
    +#> 1          1     5
    +#> 2          4     5
    +#> 3          8     5
    +#> 4         11     5
    +#> 5         15     5
    +
    +

    Armed with our grid filled with 25 candidate decision tree models, let’s create cross-validation folds for tuning:

    +
    +
    set.seed(234)
    +cell_folds <- vfold_cv(cell_train)
    +
    +

    Tuning in tidymodels requires a resampled object created with the rsample package.

    +
    +
    +

    Model tuning with a grid

    +

    We are ready to tune! Let’s use tune_grid() to fit models at all the different values we chose for each tuned hyperparameter. There are several options for building the object for tuning:

    +
      +
    • Tune a model specification along with a recipe or model, or

    • +
    • Tune a workflow() that bundles together a model specification and a recipe or model preprocessor.

    • +
    +

    Here we use a workflow() with a straightforward formula; if this model required more involved data preprocessing, we could use add_recipe() instead of add_formula().

    +
    +
    set.seed(345)
    +
    +tree_wf <- workflow() %>%
    +  add_model(tune_spec) %>%
    +  add_formula(class ~ .)
    +
    +tree_res <- 
    +  tree_wf %>% 
    +  tune_grid(
    +    resamples = cell_folds,
    +    grid = tree_grid
    +    )
    +
    +tree_res
    +#> # Tuning results
    +#> # 10-fold cross-validation 
    +#> # A tibble: 10 × 4
    +#>    splits             id     .metrics          .notes          
    +#>    <list>             <chr>  <list>            <list>          
    +#>  1 <split [1362/152]> Fold01 <tibble [50 × 6]> <tibble [0 × 3]>
    +#>  2 <split [1362/152]> Fold02 <tibble [50 × 6]> <tibble [0 × 3]>
    +#>  3 <split [1362/152]> Fold03 <tibble [50 × 6]> <tibble [0 × 3]>
    +#>  4 <split [1362/152]> Fold04 <tibble [50 × 6]> <tibble [0 × 3]>
    +#>  5 <split [1363/151]> Fold05 <tibble [50 × 6]> <tibble [0 × 3]>
    +#>  6 <split [1363/151]> Fold06 <tibble [50 × 6]> <tibble [0 × 3]>
    +#>  7 <split [1363/151]> Fold07 <tibble [50 × 6]> <tibble [0 × 3]>
    +#>  8 <split [1363/151]> Fold08 <tibble [50 × 6]> <tibble [0 × 3]>
    +#>  9 <split [1363/151]> Fold09 <tibble [50 × 6]> <tibble [0 × 3]>
    +#> 10 <split [1363/151]> Fold10 <tibble [50 × 6]> <tibble [0 × 3]>
    +
    +

    Once we have our tuning results, we can both explore them through visualization and then select the best result. The function collect_metrics() gives us a tidy tibble with all the results. We had 25 candidate models and two metrics, accuracy and roc_auc, and we get a row for each .metric and model.

    +
    +
    tree_res %>% 
    +  collect_metrics()
    +#> # A tibble: 50 × 8
    +#>    cost_complexity tree_depth .metric  .estimator  mean     n std_err .config   
    +#>              <dbl>      <int> <chr>    <chr>      <dbl> <int>   <dbl> <chr>     
    +#>  1    0.0000000001          1 accuracy binary     0.732    10  0.0148 Preproces…
    +#>  2    0.0000000001          1 roc_auc  binary     0.777    10  0.0107 Preproces…
    +#>  3    0.0000000178          1 accuracy binary     0.732    10  0.0148 Preproces…
    +#>  4    0.0000000178          1 roc_auc  binary     0.777    10  0.0107 Preproces…
    +#>  5    0.00000316            1 accuracy binary     0.732    10  0.0148 Preproces…
    +#>  6    0.00000316            1 roc_auc  binary     0.777    10  0.0107 Preproces…
    +#>  7    0.000562              1 accuracy binary     0.732    10  0.0148 Preproces…
    +#>  8    0.000562              1 roc_auc  binary     0.777    10  0.0107 Preproces…
    +#>  9    0.1                   1 accuracy binary     0.732    10  0.0148 Preproces…
    +#> 10    0.1                   1 roc_auc  binary     0.777    10  0.0107 Preproces…
    +#> # ℹ 40 more rows
    +
    +

    We might get more out of plotting these results:

    +
    +
    tree_res %>%
    +  collect_metrics() %>%
    +  mutate(tree_depth = factor(tree_depth)) %>%
    +  ggplot(aes(cost_complexity, mean, color = tree_depth)) +
    +  geom_line(size = 1.5, alpha = 0.6) +
    +  geom_point(size = 2) +
    +  facet_wrap(~ .metric, scales = "free", nrow = 2) +
    +  scale_x_log10(labels = scales::label_number()) +
    +  scale_color_viridis_d(option = "plasma", begin = .9, end = 0)
    +#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
    +#> ℹ Please use `linewidth` instead.
    +
    +
    +
    +

    +
    +
    +
    +
    +

    We can see that our “stubbiest” tree, with a depth of 1, is the worst model according to both metrics and across all candidate values of cost_complexity. Our deepest tree, with a depth of 15, did better. However, the best tree seems to be between these values with a tree depth of 4. The show_best() function shows us the top 5 candidate models by default:

    +
    +
    tree_res %>%
    +  show_best("accuracy")
    +#> # A tibble: 5 × 8
    +#>   cost_complexity tree_depth .metric  .estimator  mean     n std_err .config    
    +#>             <dbl>      <int> <chr>    <chr>      <dbl> <int>   <dbl> <chr>      
    +#> 1    0.0000000001          4 accuracy binary     0.807    10  0.0119 Preprocess…
    +#> 2    0.0000000178          4 accuracy binary     0.807    10  0.0119 Preprocess…
    +#> 3    0.00000316            4 accuracy binary     0.807    10  0.0119 Preprocess…
    +#> 4    0.000562              4 accuracy binary     0.807    10  0.0119 Preprocess…
    +#> 5    0.1                   4 accuracy binary     0.786    10  0.0124 Preprocess…
    +
    +

    We can also use the select_best() function to pull out the single set of hyperparameter values for our best decision tree model:

    +
    +
    best_tree <- tree_res %>%
    +  select_best("accuracy")
    +
    +best_tree
    +#> # A tibble: 1 × 3
    +#>   cost_complexity tree_depth .config              
    +#>             <dbl>      <int> <chr>                
    +#> 1    0.0000000001          4 Preprocessor1_Model06
    +
    +

    These are the values for tree_depth and cost_complexity that maximize accuracy in this data set of cell images.

    +
    +
    +

    Finalizing our model

    +

    We can update (or “finalize”) our workflow object tree_wf with the values from select_best().

    +
    +
    final_wf <- 
    +  tree_wf %>% 
    +  finalize_workflow(best_tree)
    +
    +final_wf
    +#> ══ Workflow ══════════════════════════════════════════════════════════
    +#> Preprocessor: Formula
    +#> Model: decision_tree()
    +#> 
    +#> ── Preprocessor ──────────────────────────────────────────────────────
    +#> class ~ .
    +#> 
    +#> ── Model ─────────────────────────────────────────────────────────────
    +#> Decision Tree Model Specification (classification)
    +#> 
    +#> Main Arguments:
    +#>   cost_complexity = 1e-10
    +#>   tree_depth = 4
    +#> 
    +#> Computational engine: rpart
    +
    +

    Our tuning is done!

    +
    +

    The last fit

    +

    Finally, let’s fit this final model to the training data and use our test data to estimate the model performance we expect to see with new data. We can use the function last_fit() with our finalized model; this function fits the finalized model on the full training data set and evaluates the finalized model on the testing data.

    +
    +
    final_fit <- 
    +  final_wf %>%
    +  last_fit(cell_split) 
    +
    +final_fit %>%
    +  collect_metrics()
    +#> # A tibble: 2 × 4
    +#>   .metric  .estimator .estimate .config             
    +#>   <chr>    <chr>          <dbl> <chr>               
    +#> 1 accuracy binary         0.802 Preprocessor1_Model1
    +#> 2 roc_auc  binary         0.840 Preprocessor1_Model1
    +
    +final_fit %>%
    +  collect_predictions() %>% 
    +  roc_curve(class, .pred_PS) %>% 
    +  autoplot()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    The performance metrics from the test set indicate that we did not overfit during our tuning procedure.

    +

    The final_fit object contains a finalized, fitted workflow that you can use for predicting on new data or further understanding the results. You may want to extract this object, using one of the extract_ helper functions.

    +
    +
    final_tree <- extract_workflow(final_fit)
    +final_tree
    +#> ══ Workflow [trained] ════════════════════════════════════════════════
    +#> Preprocessor: Formula
    +#> Model: decision_tree()
    +#> 
    +#> ── Preprocessor ──────────────────────────────────────────────────────
    +#> class ~ .
    +#> 
    +#> ── Model ─────────────────────────────────────────────────────────────
    +#> n= 1514 
    +#> 
    +#> node), split, n, loss, yval, (yprob)
    +#>       * denotes terminal node
    +#> 
    +#>  1) root 1514 539 PS (0.64398943 0.35601057)  
    +#>    2) total_inten_ch_2< 41732.5 642  33 PS (0.94859813 0.05140187)  
    +#>      4) shape_p_2_a_ch_1>=1.251801 631  27 PS (0.95721078 0.04278922) *
    +#>      5) shape_p_2_a_ch_1< 1.251801 11   5 WS (0.45454545 0.54545455) *
    +#>    3) total_inten_ch_2>=41732.5 872 366 WS (0.41972477 0.58027523)  
    +#>      6) fiber_width_ch_1< 11.37318 406 160 PS (0.60591133 0.39408867)  
    +#>       12) avg_inten_ch_1< 145.4883 293  85 PS (0.70989761 0.29010239) *
    +#>       13) avg_inten_ch_1>=145.4883 113  38 WS (0.33628319 0.66371681)  
    +#>         26) total_inten_ch_3>=57919.5 33  10 PS (0.69696970 0.30303030) *
    +#>         27) total_inten_ch_3< 57919.5 80  15 WS (0.18750000 0.81250000) *
    +#>      7) fiber_width_ch_1>=11.37318 466 120 WS (0.25751073 0.74248927)  
    +#>       14) eq_ellipse_oblate_vol_ch_1>=1673.942 30   8 PS (0.73333333 0.26666667)  
    +#>         28) var_inten_ch_3>=41.10858 20   2 PS (0.90000000 0.10000000) *
    +#>         29) var_inten_ch_3< 41.10858 10   4 WS (0.40000000 0.60000000) *
    +#>       15) eq_ellipse_oblate_vol_ch_1< 1673.942 436  98 WS (0.22477064 0.77522936) *
    +
    +

    We can create a visualization of the decision tree using another helper function to extract the underlying engine-specific fit.

    +
    +
    final_tree %>%
    +  extract_fit_engine() %>%
    +  rpart.plot(roundint = FALSE)
    +
    +
    +
    +

    +
    +
    +
    +
    +

    Perhaps we would also like to understand what variables are important in this final model. We can use the vip package to estimate variable importance based on the model’s structure.

    +
    +
    library(vip)
    +
    +final_tree %>% 
    +  extract_fit_parsnip() %>% 
    +  vip()
    +
    +
    +
    +

    +
    +
    +
    +
    +

    These are the automated image analysis measurements that are the most important in driving segmentation quality predictions.

    +

    We leave it to the reader to explore whether you can tune a different decision tree hyperparameter. You can explore the reference docs, or use the args() function to see which parsnip object arguments are available:

    +
    +
    args(decision_tree)
    +#> function (mode = "unknown", engine = "rpart", cost_complexity = NULL, 
    +#>     tree_depth = NULL, min_n = NULL) 
    +#> NULL
    +
    +

    You could tune the other hyperparameter we didn’t use here, min_n, which sets the minimum n to split at any node. This is another early stopping method for decision trees that can help prevent overfitting. Use this searchable table to find the original argument for min_n in the rpart package (hint). See whether you can tune a different combination of hyperparameters and/or values to improve a tree’s ability to predict cell segmentation quality.

    +
    +
    +
    +

    Session information

    +
    +
    #> ─ Session info ─────────────────────────────────────────────────────
    +#>  setting  value
    +#>  version  R version 4.3.0 (2023-04-21)
    +#>  os       macOS Monterey 12.6
    +#>  system   aarch64, darwin20
    +#>  ui       X11
    +#>  language (EN)
    +#>  collate  en_US.UTF-8
    +#>  ctype    en_US.UTF-8
    +#>  tz       America/Los_Angeles
    +#>  date     2023-05-25
    +#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    +#> 
    +#> ─ Packages ─────────────────────────────────────────────────────────
    +#>  package    * version date (UTC) lib source
    +#>  broom      * 1.0.4   2023-03-11 [1] CRAN (R 4.3.0)
    +#>  dials      * 1.2.0   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  dplyr      * 1.1.2   2023-04-20 [1] CRAN (R 4.3.0)
    +#>  ggplot2    * 3.4.2   2023-04-03 [1] CRAN (R 4.3.0)
    +#>  infer      * 1.0.4   2022-12-02 [1] CRAN (R 4.3.0)
    +#>  parsnip    * 1.1.0   2023-04-12 [1] CRAN (R 4.3.0)
    +#>  purrr      * 1.0.1   2023-01-10 [1] CRAN (R 4.3.0)
    +#>  recipes    * 1.0.6   2023-04-25 [1] CRAN (R 4.3.0)
    +#>  rlang        1.1.1   2023-04-28 [1] CRAN (R 4.3.0)
    +#>  rpart      * 4.1.19  2022-10-21 [2] CRAN (R 4.3.0)
    +#>  rpart.plot * 3.1.1   2022-05-21 [1] CRAN (R 4.3.0)
    +#>  rsample    * 1.1.1   2022-12-07 [1] CRAN (R 4.3.0)
    +#>  tibble     * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
    +#>  tidymodels * 1.1.0   2023-05-01 [1] CRAN (R 4.3.0)
    +#>  tune       * 1.1.1   2023-04-11 [1] CRAN (R 4.3.0)
    +#>  vip        * 0.3.2   2020-12-17 [1] CRAN (R 4.3.0)
    +#>  workflows  * 1.1.3   2023-02-22 [1] CRAN (R 4.3.0)
    +#>  yardstick  * 1.2.0   2023-04-21 [1] CRAN (R 4.3.0)
    +#> 
    +#>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.3/library
    +#>  [2] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
    +#> 
    +#> ────────────────────────────────────────────────────────────────────
    +
    + + +
    + +
    + + +
    +
    + +
    + + + + \ No newline at end of file diff --git a/find/all/index.qmd b/find/all/index.qmd new file mode 100644 index 00000000..3011fa5d --- /dev/null +++ b/find/all/index.qmd @@ -0,0 +1,29 @@ +--- +title: "Search all of tidymodels" +toc: true +toc-depth: 0 +include-after-body: ../../resources.html +--- + + +Here are all the functions available across all of the tidymodels packages. Click on the link in the topic column to find the relevant reference documentation. + +```{r} +#| include: false + +library(tidymodels) +library(DT) +tidymodels_prefer() +load("tidymodels_functions.RData") +``` + +```{r} +#| label: table-display +#| echo: false +tidymodels_functions %>% + datatable(rownames = FALSE, + class = 'cell-border stripe', + filter = 'top', + escape = FALSE, + options = list(pageLength = 25)) +``` diff --git a/find/all/tidymodels_functions.RData b/find/all/tidymodels_functions.RData new file mode 100644 index 00000000..4b7effcd Binary files /dev/null and b/find/all/tidymodels_functions.RData differ diff --git a/find/broom/broom_functions.RData b/find/broom/broom_functions.RData new file mode 100644 index 00000000..2cfd0927 Binary files /dev/null and b/find/broom/broom_functions.RData differ diff --git a/find/broom/index.qmd b/find/broom/index.qmd new file mode 100644 index 00000000..c9ced3be --- /dev/null +++ b/find/broom/index.qmd @@ -0,0 +1,34 @@ +--- +subtitle: Broom +title: Search broom methods +weight: 3 +description: | + Find `tidy()`, `augment()`, and `glance()` methods for different objects. +toc: true +toc-depth: 0 +include-after-body: ../../resources.html +--- + +Here are all the broom functions available across CRAN packages. Click on the link in the topic column to find more information. + + + +```{r} +#| include: false + +library(tidymodels) +library(DT) +tidymodels_prefer() +load("broom_functions.RData") +``` + +```{r} +#| label: table-display +#| echo: false +broom_functions %>% + datatable(rownames = FALSE, + class = 'cell-border stripe', + filter = 'top', + escape = FALSE, + options = list(pageLength = 25)) +``` diff --git a/find/index.qmd b/find/index.qmd new file mode 100644 index 00000000..3816fc15 --- /dev/null +++ b/find/index.qmd @@ -0,0 +1,23 @@ +--- +title: "Explore tidymodels" +description: "Explore searchable tables of all tidymodels packages and functions." +toc: true +toc-depth: 0 +include-after-body: ../resources.html +--- + +Below you'll find searchable tables to help you explore the tidymodels packages and functions. The tables also include links to the relevant reference page to help you navigate the package documentation. Use the following categories to guide you: + +* [Search all of tidymodels](all/index.qmd) + +* [Search parsnip models](parsnip/index.qmd) + +* [Search recipe steps](recipes/index.qmd) + +* [Search broom methods](broom/index.qmd) + +# Books + +If you want to read more long form there are a number of books written about and using tidymodels + +* [List of books](../books/index.qmd) diff --git a/find/parsnip/index.qmd b/find/parsnip/index.qmd new file mode 100644 index 00000000..7d6773b8 --- /dev/null +++ b/find/parsnip/index.qmd @@ -0,0 +1,31 @@ +--- +title: Search parsnip models +weight: 2 +description: | + Find model types, engines, and arguments to fit and predict in the tidymodels framework. +toc: true +toc-depth: 0 +include-after-body: ../../resources.html +--- + +To learn about the parsnip package, see [*Get Started: Build a Model*](/start/models/). Use the tables below to find [model types and engines](#models). + +```{r} +#| include: false + +library(tidymodels) +library(DT) +tidymodels_prefer() +load("parsnip_models.RData") +``` + +```{r} +#| label: table-display +#| echo: false +parsnip_models %>% + datatable(rownames = FALSE, + class = 'cell-border stripe', + filter = 'top', + escape = FALSE, + options = list(pageLength = 25)) +``` diff --git a/find/parsnip/parsnip_models.RData b/find/parsnip/parsnip_models.RData new file mode 100644 index 00000000..f94c67d0 Binary files /dev/null and b/find/parsnip/parsnip_models.RData differ diff --git a/content/find/parsnip/parsnip_thumbnail.png b/find/parsnip/parsnip_thumbnail.png similarity index 100% rename from content/find/parsnip/parsnip_thumbnail.png rename to find/parsnip/parsnip_thumbnail.png diff --git a/find/recipes/index.qmd b/find/recipes/index.qmd new file mode 100644 index 00000000..800e78f6 --- /dev/null +++ b/find/recipes/index.qmd @@ -0,0 +1,34 @@ +--- +subtitle: Recipes +title: Search recipe steps +weight: 3 +description: | + Find recipe steps in the tidymodels framework to help you prep your data for modeling. +toc: true +toc-depth: 0 +include-after-body: ../../resources.html +--- + + +To learn about the recipes package, see [*Get Started: Preprocess your data with recipes*](/start/recipes/). The table below allows you to search for recipe steps across tidymodels packages. + + +```{r} +#| include: false + +library(tidymodels) +library(DT) +tidymodels_prefer() +load("recipe_functions.RData") +``` + +```{r} +#| label: table-display +#| echo: false +recipe_functions %>% + datatable(rownames = FALSE, + class = 'cell-border stripe', + filter = 'top', + escape = FALSE, + options = list(pageLength = 25)) +``` diff --git a/find/recipes/recipe_functions.RData b/find/recipes/recipe_functions.RData new file mode 100644 index 00000000..06e5fe51 Binary files /dev/null and b/find/recipes/recipe_functions.RData differ diff --git a/content/find/recipes/recipes_thumbnail.jpg b/find/recipes/recipes_thumbnail.jpg similarity index 100% rename from content/find/recipes/recipes_thumbnail.jpg rename to find/recipes/recipes_thumbnail.jpg diff --git a/help/index.qmd b/help/index.qmd new file mode 100644 index 00000000..5426102e --- /dev/null +++ b/help/index.qmd @@ -0,0 +1,45 @@ +--- +title: Get Help +toc: true +toc-depth: 1 +include-after-body: ../resources.html +--- + +## Asking for help + +If you're asking for R help, reporting a bug, or requesting a new feature, you're more likely to succeed if you include a good reproducible example, which is precisely what the [reprex](https://reprex.tidyverse.org/) package is built for. You can learn more about reprex, along with other tips on how to help others help you in the [tidyverse.org help section](https://www.tidyverse.org/help/). + +## Where to ask + + + +Now that you've made a reprex, you need to share it in an appropriate forum. Here are some options: + +* [__community.rstudio.com__](https://community.rstudio.com/c/ml/15): This is a warm + and welcoming place to ask any questions you might have about + tidymodels or more generally about modeling, machine learning, and deep learning. (You can also ask questions about the tidyverse and RStudio there, too!) + +* [__Stack Overflow__](https://stackoverflow.com/questions/tagged/tidymodels). You're probably already familiar + with Stack Overflow from googling; it's a frequent source of answers to + coding related questions. Asking a question on Stack Overflow can be + intimidating, but if you've taken the time to create a reprex, you're much + more likely to get a useful answer. Make sure to [tag your question](https://stackoverflow.com/help/tagging) with `r` + and `tidymodels` so that the right people are more likely to see it. + +* [__Twitter__][twitter-rstats] and [__Mastodon__](https://fosstodon.org/tags/tidymodels). These sites are great places to share a link to your reprex that's hosted elsewhere! The [#rstats twitter][twitter-rstats] and [#rstats fosstodon](https://fosstodon.org/tags/tidymodels) communities are extremely friendly and active, and have great crowds to be a part of. Make sure you tag your tweet with `#rstats` and `#tidymodels`. + +* If you think you've found a __bug__, please follow the instructions for filing an issue on + [contributing to tidymodels](/contribute/). + +[twitter-rstats]: https://twitter.com/search?q=%23rstats&src=typd + +## Resources + +- See what you need to know to [get started with tidymodels](/start/), and learn more about [using tidymodels for specific tasks](/learn/). + +- Each tidymodels package has its own documentation site, full of helpful information. Find [links to all package documentation sites](/packages/) and explore them! + +- Search [all tidymodels functions](/find/), and check out [our books on these topics](/books/). + +- Stay up to date with the latest news about tidymodels through our posts on the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/). + diff --git a/images/broom.png b/images/broom.png new file mode 100644 index 00000000..41a04d5c Binary files /dev/null and b/images/broom.png differ diff --git a/images/cover.png b/images/cover.png new file mode 100644 index 00000000..c9d15402 Binary files /dev/null and b/images/cover.png differ diff --git a/images/dials.png b/images/dials.png new file mode 100644 index 00000000..724c65bd Binary files /dev/null and b/images/dials.png differ diff --git a/static/images/error.png b/images/error.png similarity index 100% rename from static/images/error.png rename to images/error.png diff --git a/static/images/favicons/android-chrome-192x192.png b/images/favicons/android-chrome-192x192.png similarity index 100% rename from static/images/favicons/android-chrome-192x192.png rename to images/favicons/android-chrome-192x192.png diff --git a/static/images/favicons/android-chrome-512x512.png b/images/favicons/android-chrome-512x512.png similarity index 100% rename from static/images/favicons/android-chrome-512x512.png rename to images/favicons/android-chrome-512x512.png diff --git a/static/images/favicons/apple-touch-icon.png b/images/favicons/apple-touch-icon.png similarity index 100% rename from static/images/favicons/apple-touch-icon.png rename to images/favicons/apple-touch-icon.png diff --git a/static/images/favicons/favicon-16x16.png b/images/favicons/favicon-16x16.png similarity index 100% rename from static/images/favicons/favicon-16x16.png rename to images/favicons/favicon-16x16.png diff --git a/static/images/favicons/favicon-32x32.png b/images/favicons/favicon-32x32.png similarity index 100% rename from static/images/favicons/favicon-32x32.png rename to images/favicons/favicon-32x32.png diff --git a/static/images/favicons/favicon.ico b/images/favicons/favicon.ico similarity index 100% rename from static/images/favicons/favicon.ico rename to images/favicons/favicon.ico diff --git a/static/images/favicons/site.webmanifest b/images/favicons/site.webmanifest similarity index 100% rename from static/images/favicons/site.webmanifest rename to images/favicons/site.webmanifest diff --git a/static/images/feature_summary_large_image.jpg b/images/feature_summary_large_image.jpg similarity index 100% rename from static/images/feature_summary_large_image.jpg rename to images/feature_summary_large_image.jpg diff --git a/static/images/giraffe.jpg b/images/giraffe.jpg similarity index 100% rename from static/images/giraffe.jpg rename to images/giraffe.jpg diff --git a/static/images/help.png b/images/help.png similarity index 100% rename from static/images/help.png rename to images/help.png diff --git a/static/css/images/menuToggler.svg b/images/menuToggler.svg similarity index 100% rename from static/css/images/menuToggler.svg rename to images/menuToggler.svg diff --git a/images/parsnip-flagger.jpg b/images/parsnip-flagger.jpg new file mode 100644 index 00000000..f010c873 Binary files /dev/null and b/images/parsnip-flagger.jpg differ diff --git a/images/parsnip.png b/images/parsnip.png new file mode 100644 index 00000000..db6dd34c Binary files /dev/null and b/images/parsnip.png differ diff --git a/static/images/placeholder.png b/images/placeholder.png similarity index 100% rename from static/images/placeholder.png rename to images/placeholder.png diff --git a/images/recipes.png b/images/recipes.png new file mode 100644 index 00000000..0471bdb6 Binary files /dev/null and b/images/recipes.png differ diff --git a/static/images/resampling.svg b/images/resampling.svg similarity index 100% rename from static/images/resampling.svg rename to images/resampling.svg diff --git a/static/images/robot.png b/images/robot.png similarity index 100% rename from static/images/robot.png rename to images/robot.png diff --git a/images/rsample.png b/images/rsample.png new file mode 100644 index 00000000..121bc043 Binary files /dev/null and b/images/rsample.png differ diff --git a/static/images/rstudio.png b/images/rstudio.png similarity index 100% rename from static/images/rstudio.png rename to images/rstudio.png diff --git a/images/tidy-back-01.svg b/images/tidy-back-01.svg new file mode 100644 index 00000000..7a0f5339 --- /dev/null +++ b/images/tidy-back-01.svg @@ -0,0 +1,28 @@ + + + + +tidy-back + + + + + + + + + + + + diff --git a/static/css/images/tidy-back-01a.svg b/images/tidy-back-01a.svg similarity index 100% rename from static/css/images/tidy-back-01a.svg rename to images/tidy-back-01a.svg diff --git a/images/tidy-back-02.svg b/images/tidy-back-02.svg new file mode 100755 index 00000000..a22dba31 --- /dev/null +++ b/images/tidy-back-02.svg @@ -0,0 +1,59 @@ + + + + +tidy-back + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/static/css/images/tidy-back-02a.svg b/images/tidy-back-02a.svg similarity index 100% rename from static/css/images/tidy-back-02a.svg rename to images/tidy-back-02a.svg diff --git a/images/tidy-back-03.svg b/images/tidy-back-03.svg new file mode 100644 index 00000000..09d37ecd --- /dev/null +++ b/images/tidy-back-03.svg @@ -0,0 +1,34 @@ + + + + +tidy-back + + + + + + + + + + + + + + diff --git a/static/css/images/tidy-back-03a.svg b/images/tidy-back-03a.svg similarity index 100% rename from static/css/images/tidy-back-03a.svg rename to images/tidy-back-03a.svg diff --git a/static/css/images/tidy-event-back-conf.svg b/images/tidy-event-back-conf.svg similarity index 100% rename from static/css/images/tidy-event-back-conf.svg rename to images/tidy-event-back-conf.svg diff --git a/static/css/images/tidy-event-back-meetup.svg b/images/tidy-event-back-meetup.svg similarity index 100% rename from static/css/images/tidy-event-back-meetup.svg rename to images/tidy-event-back-meetup.svg diff --git a/images/tidy-packages-back-01.svg b/images/tidy-packages-back-01.svg new file mode 100755 index 00000000..99e2704f --- /dev/null +++ b/images/tidy-packages-back-01.svg @@ -0,0 +1,14 @@ + + + + +tidy-packages-back + + + + diff --git a/images/tidy-packages-back-02.svg b/images/tidy-packages-back-02.svg new file mode 100755 index 00000000..8cf61949 --- /dev/null +++ b/images/tidy-packages-back-02.svg @@ -0,0 +1,13 @@ + + + + +tidy-packages-back + + + diff --git a/images/tidy-packages-back-03.svg b/images/tidy-packages-back-03.svg new file mode 100755 index 00000000..a0f2de6e --- /dev/null +++ b/images/tidy-packages-back-03.svg @@ -0,0 +1,12 @@ + + + + +tidy-packages-back + + + diff --git a/images/tidymodels.png b/images/tidymodels.png new file mode 100644 index 00000000..a92aa4f3 Binary files /dev/null and b/images/tidymodels.png differ diff --git a/images/tune.png b/images/tune.png new file mode 100644 index 00000000..e921cd13 Binary files /dev/null and b/images/tune.png differ diff --git a/images/workflows.png b/images/workflows.png new file mode 100644 index 00000000..e6f38d4e Binary files /dev/null and b/images/workflows.png differ diff --git a/images/yardstick.png b/images/yardstick.png new file mode 100644 index 00000000..e4d132ac Binary files /dev/null and b/images/yardstick.png differ diff --git a/index.Rmd b/index.Rmd deleted file mode 100644 index b8510951..00000000 --- a/index.Rmd +++ /dev/null @@ -1,3 +0,0 @@ ---- -site: blogdown:::blogdown_site ---- diff --git a/index.qmd b/index.qmd new file mode 100644 index 00000000..29fbe548 --- /dev/null +++ b/index.qmd @@ -0,0 +1,96 @@ +--- +page-layout: custom +--- + +::::: {#FrontPage} + +:::: {.band .first} +:::: {.bandContent} + +::: {.hexBadges} + +
    + tidymodels hex sticker +
    + + +
    + rsample hex sticker +
    + + +
    + parsnip hex sticker +
    + + + +
    + recipes hex sticker +
    + + +
    + tune hex sticker +
    + + +
    + yardstick hex sticker +
    +::: + +::: {.blurb} +[TIDYMODELS]{.tagline} + +The tidymodels framework is a collection of packages for modeling and machine learning using [tidyverse](https://www.tidyverse.org/) principles. + +Install tidymodels with: + +```r +install.packages("tidymodels") +``` +::: + +:::: +:::: + +:::: {.band .second} +:::: {.bandContent} +::: {.blurb} +[LEARN TIDYMODELS]{.tagline} + +Whether you are just starting out today or have years of experience with modeling, tidymodels offers a consistent, flexible framework for your work. + +![](images/cover.png){.bookCover fig-alt="Parsnip set your engine, illustrated by Allison Horst"} +::: + +::: {.blurb} +
    + +
    What do you need to know to start using tidymodels? Learn what you need in 5 articles, starting with how to create a model and ending with a beginning-to-end modeling case study.
    +
    + +
    + +
    After you are comfortable with the basics, you can learn how to go farther with tidymodels in your modeling and machine learning projects.
    +
    + +::: +:::: +:::: + +:::: {.band .third} +:::: {.bandContent} + +
    + +::: {.blurb} +[STAY UP TO DATE]{.tagline} + +Hear about the latest tidymodels news at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/). +::: +:::: +:::: + +::::: diff --git a/layouts/_default/list.html b/layouts/_default/list.html deleted file mode 100644 index b928e504..00000000 --- a/layouts/_default/list.html +++ /dev/null @@ -1,91 +0,0 @@ -{{ define "main" }} - - -
    -
    -
    -
    -
    -
    - - {{ if eq .Section "tags" }} - -  Back to Learn - - {{ end }} - - {{ if not (eq .Section "tags" "books") }} - -  Back to {{ .Section | title }} - - {{ end }} -
    -
    - -
    {{ .Title }}
    - -
    {{ range .Pages }} - {{ $post := . }} -
    - - - -
    - {{ $cover := (.Resources.ByType "image").GetMatch "*cover*" }} - {{ with $cover }} - {{ $fit_thumb := .Fill "500x500 Top" }} - - {{ end }} -
    - -
    - -
    -
    {{ default .Description | markdownify }} - Read more » -
    - - {{ if .Params.tags }} -
    -
    -   - {{ range $i, $tags := (.GetTerms "tags") }} - {{- if $i -}}, {{ end -}} - {{ with $tags }} - {{- .LinkTitle -}} - {{- end -}} - {{- end -}} -
    -
    - {{ end }} - -
    -
    -
    - {{ end }}
    - {{ template "_internal/pagination.html" . }} -
    - -
    - {{ if not (eq .Section "tags" "books") }} - {{ partial "topics-sidebar.html" . }} - {{ end }} - -
    -
    Resources
    - {{ partial "resources_sticky.html" .}} -
    -
    - - - -
    -
    -
    - -{{ end }} diff --git a/layouts/_default/single.html b/layouts/_default/single.html deleted file mode 100644 index a7bd7ca4..00000000 --- a/layouts/_default/single.html +++ /dev/null @@ -1,33 +0,0 @@ -{{ define "main" }} - -
    -
    -
    -
    - -

    {{ .Title | markdownify }}

    - -
    - {{ .Content }} -
    - -
    - -
    - {{ if ge (len .TableOfContents) 50 }} -
    -
    Contents
    - {{ .TableOfContents }} -
    - {{ end }} -
    -
    Resources
    - {{ partial "resources_sticky.html" .}} -
    -
    - -
    -
    -
    - -{{ end }} diff --git a/layouts/find/list.html b/layouts/find/list.html deleted file mode 100644 index 90cdfc8c..00000000 --- a/layouts/find/list.html +++ /dev/null @@ -1,89 +0,0 @@ -{{ define "main" }} - - -
    -
    -
    -
    -
    {{ .Title }}
    - -

    {{ .Content | markdownify }}

    - - -
    - -
    - - -
    {{ range $index, $element := .Pages }} - {{ $post := . }} - - -
    -
    -
    - -
    - {{ if eq .Weight 1 }} - -

    {{ .Content }}

    - {{ end }} -
    - - {{ if ne .Weight 1 }} -
    - - -
    - {{ $thumb := (.Resources.ByType "image").GetMatch "*thumbnail*" }} - {{ with $thumb }} - - {{ end }} -
    - - -
    - - -
    -
    {{ default .Description | markdownify }} - Read more ... -
    -
    -
    - -
    - {{ end }} - - -
    -
    - {{ end }}
    -
    - -
    -
    -
    Resources
    - {{ partial "resources_sticky.html" .}} -
    -
    - -
    -
    -
    - -{{ end }} - - - diff --git a/layouts/find/single.html b/layouts/find/single.html deleted file mode 100644 index 8a5e15b6..00000000 --- a/layouts/find/single.html +++ /dev/null @@ -1,31 +0,0 @@ -{{ define "main" }} - -
    -
    -
    -
    - -

    {{ .Title | markdownify }}

    - -
    - {{ .Content }} -
    - -
    - -
    -
    -
    Explore tidymodels
    - {{ partial "find-sidebar.html" .}} -
    -
    -
    Resources
    - {{ partial "resources_sticky.html" .}} -
    -
    - -
    -
    -
    - -{{ end }} diff --git a/layouts/index.html b/layouts/index.html deleted file mode 100644 index e629abb2..00000000 --- a/layouts/index.html +++ /dev/null @@ -1,86 +0,0 @@ -{{ define "main" }} - - -
    - - -{{ with .Site.GetPage "/home/band_one.md" }} -
    -
    - - -
    - {{ $col0 := .Params.col0 }} - {{ with $col0 }} - {{ with .row1 }}
    {{ . }} hex sticker
    {{ end }} - {{ with .row2 }}
    {{ . }} hex sticker
    {{ end }} - {{ with .row3 }}
    {{ . }} hex sticker
    {{ end }} - {{ end }} - {{ $col2 := .Params.col2 }} - {{ with $col2 }} - {{ with .row2 }}
    {{ . }} hex sticker
    {{ end }} - {{ with .row3 }}
    {{ . }} hex sticker
    {{ end }} - {{ with .row4 }}
    {{ . }} hex sticker
    {{ end }} - {{ end }} -
    - - -
    -
    - {{ .Title }} -
    -
    - {{ .Content | markdownify }} -
    -
    - -
    -
    -{{ end }} - - -{{ with .Site.GetPage "/home/band_two.md" }} - -
    -
    -
    -
    - {{ .Title }} -
    -
    - {{ .Content | markdownify }} -
    - Parsnip set your engine, illustrated by Allison Horst -
    -
    -
    - {{ partial "homepage_sticky" . }} -
    -
    -
    - -{{ end }} - - - -{{ with .Site.GetPage "/home/band_three.md" }} - -
    -
    -
    -
    -
    - {{ .Title }} -
    -
    - {{.Content | markdownify }} -
    -
    -
    -
    -
    -{{ end }} - -{{ end }} - - diff --git a/layouts/learn-subsection/list.html b/layouts/learn-subsection/list.html deleted file mode 100644 index a0b7abd1..00000000 --- a/layouts/learn-subsection/list.html +++ /dev/null @@ -1,76 +0,0 @@ -{{ define "main" }} - - -
    -
    -
    -
    -
    -
    - - {{ if eq .Section "tags" }} - -  Back to Learn - - {{ end }} - - {{ if ne .Section "tags" }} - -  Back to {{ .Section | title }} - - {{ end }} -
    -
    - -
    {{ .Title }}
    - -
    {{ range .Pages }} - {{ $post := . }} -
    -
    - -
    -
    {{ default .Description | markdownify }} - Read article » -
    -
    -
    -   - {{ range $i, $tags := (.GetTerms "tags") }} - {{- if $i -}}, {{ end -}} - {{ with $tags }} - {{- .LinkTitle -}} - {{- end -}} - {{- end -}} -
    -
    -
    -
    -
    - {{ end }}
    - {{ template "_internal/pagination.html" . }} -
    - -
    - {{ if ne .Section "tags" }} - {{ partial "topics-sidebar.html" . }} - {{ end }} - -
    -
    Resources
    - {{ partial "resources_sticky.html" .}} -
    -
    - - - -
    -
    -
    - -{{ end }} diff --git a/layouts/learn-subsection/single.html b/layouts/learn-subsection/single.html deleted file mode 100644 index 30232381..00000000 --- a/layouts/learn-subsection/single.html +++ /dev/null @@ -1,90 +0,0 @@ -{{ define "main" }} - -{{ $page := . }} -{{ $banner := ($page.Resources.ByType "image").GetMatch "*-wd*" }} - -
    -
    -
    -
    - - - -

    {{ .Title | markdownify }}

    - - {{ $slug := .Param "slug" }} -
    - {{ with $banner }}
    {{ end }}
    - {{ with $.Params.photo }}
    Photo by {{ .author }}
    -
    - {{ end }} - - {{ if or (eq .Section "articles") (eq .Section "blog") }} - - {{ end }} - - {{ $show_authors := $.Site.Params.authors.show_single | default true }} - {{ if and $show_authors (or (eq .Section "articles") (eq .Section "blog")) }} -

    - - {{ partial "pretty_author_names" . }} - {{ end }} -

    - - {{ with $.Params.Description }} -
    -

    {{ print "Learning objective" }}

    -

    {{ . | markdownify }}

    -
    - {{ end }} - -
    - {{ .Content }} -
    - - - - -
    - -
    -
    -
    Contents
    - {{ .TableOfContents }} -
    - -
    -
    Resources
    - {{ partial "resources_sticky.html" .}} -
    -
    - - - -
    -
    -
    - -{{ end }} diff --git a/layouts/learn/list.html b/layouts/learn/list.html deleted file mode 100644 index a4a3f332..00000000 --- a/layouts/learn/list.html +++ /dev/null @@ -1,109 +0,0 @@ -{{ define "main" }} - - -
    -
    -
    -
    -
    -

    {{ .Title }}

    -
    - -

    {{ .Content | markdownify }}

    - - -
    - -
    - - -
    {{ range $index, $element := .Sections }} - {{ $post := . }} - - -
    -
    -
    -
    - - - - {{ if ne $index 0 }} - - {{ end }} -
    - -
    -
    - - -
    -
    - {{ $thumb := (.Resources.ByType "image").GetMatch "*thumbnail*" }} - {{ with $thumb }} - - {{ end }} -
    -
    - -
    -
      - - {{ range first 5 .Pages.ByWeight }} -
    • - {{.Title}} - {{ with .Description }} -

      {{ . | markdownify }}

      - {{ end }} -
    • - {{ end }} -
    -
    {{ default .Description | markdownify }} - See all » -
    -
    -
    - -
    -
    - -
    -
    -
    -
    -
    - {{ end }}
    -
    - -{{ partial "learn-sidebar.html" . }} - -
    -

    {{ .Params.Icon_attribution | markdownify }}

    -
    -
    - -{{ end }} - - - diff --git a/layouts/partials/find-sidebar.html b/layouts/partials/find-sidebar.html deleted file mode 100644 index 4dfd3aa6..00000000 --- a/layouts/partials/find-sidebar.html +++ /dev/null @@ -1,10 +0,0 @@ -
    -{{ $page := . }} -{{ range (where .Site.RegularPages.ByWeight "Section" "find" ) }} -
    - diff --git a/layouts/partials/footer.html b/layouts/partials/footer.html deleted file mode 100755 index 192291c9..00000000 --- a/layouts/partials/footer.html +++ /dev/null @@ -1,39 +0,0 @@ -
    -
    - -
    - {{ with .Param "footer.github_url"}} - - {{ end }} - {{ with .Param "footer.twitter_url"}} - - {{ end }} -
    - - - -
    -
    - -
    - - - {{ partial "footer_highlightjs" . }} - {{ partial "footer_mathjax" . }} - {{ template "_internal/google_analytics.html" . }} - - - diff --git a/layouts/partials/hanchor.html b/layouts/partials/hanchor.html deleted file mode 100644 index c473ef02..00000000 --- a/layouts/partials/hanchor.html +++ /dev/null @@ -1 +0,0 @@ -{{ . | replaceRE "()" "${1} 🔗︎ ${3}" | safeHTML }} diff --git a/layouts/partials/head_custom.html b/layouts/partials/head_custom.html deleted file mode 100644 index e7c47daf..00000000 --- a/layouts/partials/head_custom.html +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/layouts/partials/help.html b/layouts/partials/help.html deleted file mode 100644 index 7ca27753..00000000 --- a/layouts/partials/help.html +++ /dev/null @@ -1 +0,0 @@ -
    Stuck? Confused? Ask for help.
    diff --git a/layouts/partials/homepage_sticky.html b/layouts/partials/homepage_sticky.html deleted file mode 100644 index c38ec1c3..00000000 --- a/layouts/partials/homepage_sticky.html +++ /dev/null @@ -1,11 +0,0 @@ -{{ range $i, $element := $.Site.Data.homepage.bandtwo }} - -{{ $pack := .icon_pack }} -{{ $icon := .icon }} - -
    - -
    {{ .what | markdownify }}
    -
    - -{{ end }} diff --git a/layouts/partials/learn-sidebar.html b/layouts/partials/learn-sidebar.html deleted file mode 100644 index e3f55ca6..00000000 --- a/layouts/partials/learn-sidebar.html +++ /dev/null @@ -1,10 +0,0 @@ -
    -
    -
    Filter by package
    - {{ range $key, $value := $.Site.Taxonomies.tags }} - - {{ end }} -
    -
    diff --git a/layouts/partials/meta.html b/layouts/partials/meta.html deleted file mode 100644 index 9e9f709f..00000000 --- a/layouts/partials/meta.html +++ /dev/null @@ -1,52 +0,0 @@ - - - - -{{ $og_image := "" }} -{{ $og_image = printf "images/%s" .Site.Params.sharing_image | absURL }} -{{- with $og_image -}} - -{{- end -}} - - -{{ $twitter_card := "summary_large_image" }} - -{{ $twitter_image := "" }} -{{ $twitter_image = printf "images/%s" .Site.Params.twitter_image | absURL }} -{{- with $twitter_image -}} - -{{- end -}} - -{{ hugo.Generator }} - - -{{- if .IsHome -}} -{{ .Site.Title }} - - - - -{{- end }} - -{{- if not .IsHome -}} - - {{- if eq .Title .FirstSection.Title -}} - {{ .Title }} - - {{ $page_description := "" }} - {{ $page_description := cond (isset .Params "description") .Params.Description .Site.Params.description }} - - - {{- end -}} - - {{- if ne .Title .FirstSection.Title -}} - {{ .FirstSection.Title }} - {{ .Title }} - - {{ $section_description := "" }} - {{ $section_description := cond (ne .FirstSection.Description "") .FirstSection.Description .Site.Params.description }} - {{ $page_description := "" }} - {{ $page_description := cond (isset .Params "description") .Params.Description $section_description }} - - - {{- end }} -{{- end }} diff --git a/layouts/partials/resources_sticky.html b/layouts/partials/resources_sticky.html deleted file mode 100644 index 97316cd1..00000000 --- a/layouts/partials/resources_sticky.html +++ /dev/null @@ -1,14 +0,0 @@ -{{ range $i, $element := $.Site.Data.resources.resources }} - -{{ $pack := .icon_pack }} -{{ $icon := .icon }} - -
    - - -
    {{ .what | markdownify }}
    -
    - -{{ end }} diff --git a/layouts/partials/start-sidebar.html b/layouts/partials/start-sidebar.html deleted file mode 100644 index f7974aee..00000000 --- a/layouts/partials/start-sidebar.html +++ /dev/null @@ -1,26 +0,0 @@ -{{ $page := . }} -
    -
    -
    -
    - {{ with .Site.GetPage "section" "start" }} - - {{ .Title }} - {{ end }} - -
    -
    - {{ range (where .Site.RegularPages.ByWeight "Section" "start" ) }} -
    -
    {{ .Weight }}  {{ .Title }} -
    -
    - {{ end }} -
    - - - {{ if .IsPage }} - {{ partial "help.html" . }} - {{ end }} - -
    diff --git a/layouts/partials/topics-sidebar.html b/layouts/partials/topics-sidebar.html deleted file mode 100644 index 071048dd..00000000 --- a/layouts/partials/topics-sidebar.html +++ /dev/null @@ -1,12 +0,0 @@ -
    -
    Explore topics -
    - {{ range $index, $element := .FirstSection.Sections }} - - {{ end }} -
    - diff --git a/layouts/shortcodes/blogdown/postref.html b/layouts/shortcodes/blogdown/postref.html deleted file mode 100644 index 858ad3ec..00000000 --- a/layouts/shortcodes/blogdown/postref.html +++ /dev/null @@ -1 +0,0 @@ -{{ if eq (getenv "HUGO_BLOGDOWN_POST_RELREF") "true" }}{{ .Page.RelPermalink }}{{ else }}{{ .Page.Permalink }}{{ end }} \ No newline at end of file diff --git a/layouts/shortcodes/note.html b/layouts/shortcodes/note.html deleted file mode 100644 index 7cb86cf7..00000000 --- a/layouts/shortcodes/note.html +++ /dev/null @@ -1 +0,0 @@ -
    {{ .Inner | markdownify }}
    diff --git a/layouts/shortcodes/rstudio-tip.html b/layouts/shortcodes/rstudio-tip.html deleted file mode 100644 index 0b112b4d..00000000 --- a/layouts/shortcodes/rstudio-tip.html +++ /dev/null @@ -1 +0,0 @@ -
    {{ .Inner | markdownify }}
    diff --git a/layouts/shortcodes/test-drive.html b/layouts/shortcodes/test-drive.html deleted file mode 100644 index 940bbfcd..00000000 --- a/layouts/shortcodes/test-drive.html +++ /dev/null @@ -1,5 +0,0 @@ -

    Alternatively, open an interactive version of this article in your browser:

    - - - - diff --git a/layouts/shortcodes/warning.html b/layouts/shortcodes/warning.html deleted file mode 100644 index 45b7cbc3..00000000 --- a/layouts/shortcodes/warning.html +++ /dev/null @@ -1 +0,0 @@ -
    {{ .Inner | markdownify }}
    diff --git a/layouts/start/list.html b/layouts/start/list.html deleted file mode 100644 index cb65c129..00000000 --- a/layouts/start/list.html +++ /dev/null @@ -1,19 +0,0 @@ -{{ define "main" }} - - -
    -
    -
    - - {{ partial "start-sidebar.html" . }} - -
    -
    {{ .Params.subtitle }}
    -
    - {{ .Content }} -
    -
    -
    -
    -
    -{{ end }} diff --git a/layouts/start/single.html b/layouts/start/single.html deleted file mode 100644 index 52261851..00000000 --- a/layouts/start/single.html +++ /dev/null @@ -1,67 +0,0 @@ -{{ define "main" }} - -{{ $page := . }} -{{ $banner := ($page.Resources.ByType "image").GetMatch "*-wd*" }} - -
    -
    -
    - - {{ partial "start-sidebar.html" . }} - -
    - -

    {{ .Weight }}   {{ .Title | markdownify }}

    - -
    -   -

    Tidymodels packages:

    - {{ range $i, $tags := (.GetTerms "tags") }} - {{- if $i -}}, {{ end -}} - {{ with $tags }} - {{- .LinkTitle -}} - {{- end -}} - {{- end -}} -
    - -
    -
    -
    {{ .TableOfContents }}
    -
    -
    - -
    - {{ partial "hanchor.html" .Content }} -
    - - - - -
    - -
    -
    -
    - -{{ end }} diff --git a/learn/develop/broom/index.qmd b/learn/develop/broom/index.qmd new file mode 100644 index 00000000..5539b4df --- /dev/null +++ b/learn/develop/broom/index.qmd @@ -0,0 +1,408 @@ +--- +title: "Create your own broom tidier methods" +categories: + - developer tools +type: learn-subsection +weight: 5 +description: | + Write tidy(), glance(), and augment() methods for new model objects. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(tidymodels) +library(tidyverse) +library(generics) +library(DT) +pkgs <- c("tidymodels", "tidyverse", "generics", "usethis") +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +The broom package provides tools to summarize key information about models in tidy `tibble()`s. The package provides three verbs, or "tidiers," to help make model objects easier to work with: + +* `tidy()` summarizes information about model components +* `glance()` reports information about the entire model +* `augment()` adds information about observations to a dataset + +Each of the three verbs above are _generic_, in that they do not define a procedure to tidy a given model object, but instead redirect to the relevant _method_ implemented to tidy a specific type of model object. The broom package provides methods for model objects from over 100 modeling packages along with nearly all of the model objects in the stats package that comes with base R. However, for maintainability purposes, the broom package authors now ask that requests for new methods be first directed to the parent package (i.e. the package that supplies the model object) rather than to broom. New methods will generally only be integrated into broom in the case that the requester has already asked the maintainers of the model-owning package to implement tidier methods in the parent package. + +We'd like to make implementing external tidier methods as painless as possible. The general process for doing so is: + +* re-export the tidier generics +* implement tidying methods +* document the new methods + +In this article, we'll walk through each of the above steps in detail, giving examples and pointing out helpful functions when possible. + +## Re-export the tidier generics + +The first step is to re-export the generic functions for `tidy()`, `glance()`, and/or `augment()`. You could do so from `broom` itself, but we've provided an alternative, much lighter dependency called `generics`. + +First you'll need to add the [generics](https://github.com/r-lib/generics) package to `Imports`. We recommend using the [usethis](https://github.com/r-lib/usethis) package for this: + +```{r} +#| eval: false +usethis::use_package("generics", "Imports") +``` + +Next, you'll need to re-export the appropriate tidying methods. If you plan to implement a `glance()` method, for example, you can re-export the `glance()` generic by adding the following somewhere inside the `/R` folder of your package: + +```{r} +#| eval: false +#' @importFrom generics glance +#' @export +generics::glance +``` + +Oftentimes it doesn't make sense to define one or more of these methods for a particular model. In this case, only implement the methods that do make sense. + +::: {.callout-warning} + Please do not define `tidy()`, `glance()`, or `augment()` generics in your package. This will result in namespace conflicts whenever your package is used along other packages that also export tidying methods. +::: + +## Implement tidying methods + +You'll now need to implement specific tidying methods for each of the generics you've re-exported in the above step. For each of `tidy()`, `glance()`, and `augment()`, we'll walk through the big picture, an example, and helpful resources. + +In this article, we'll use the base R dataset `trees`, giving the tree girth (in inches), height (in feet), and volume (in cubic feet), to fit an example linear model using the base R `lm()` function. + +```{r} +# load in the trees dataset +data(trees) + +# take a look! +str(trees) + +# fit the timber volume as a function of girth and height +trees_model <- lm(Volume ~ Girth + Height, data = trees) +``` + +Let's take a look at the `summary()` of our `trees_model` fit. + +```{r} +summary(trees_model) +``` + +This output gives some summary statistics on the residuals (which would be described more fully in an `augment()` output), model coefficients (which, in this case, make up the `tidy()` output), and some model-level summarizations such as RSE, $R^2$, etc. (which make up the `glance()` output.) + +### Implementing the `tidy()` method + +The `tidy(x, ...)` method will return a tibble where each row contains information about a component of the model. The `x` input is a model object, and the dots (`...`) are an optional argument to supply additional information to any calls inside your method. New `tidy()` methods can take additional arguments, but _must_ include the `x` and `...` arguments to be compatible with the generic function. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).) Examples of model components include regression coefficients (for regression models), clusters (for classification/clustering models), etc. These `tidy()` methods are useful for inspecting model details and creating custom model visualizations. + +Returning to the example of our linear model on timber volume, we'd like to extract information on the model components. In this example, the components are the regression coefficients. After taking a look at the model object and its `summary()`, you might notice that you can extract the regression coefficients as follows: + +```{r} +summary(trees_model)$coefficients +``` + +This object contains the model coefficients as a table, where the information giving which coefficient is being described in each row is given in the row names. Converting to a tibble where the row names are contained in a column, you might write: + +```{r} +trees_model_tidy <- summary(trees_model)$coefficients %>% + as_tibble(rownames = "term") + +trees_model_tidy +``` + +The broom package standardizes common column names used to describe coefficients. In this case, the column names are: + +```{r} +colnames(trees_model_tidy) <- c("term", "estimate", "std.error", "statistic", "p.value") +``` + +A glossary giving the currently acceptable column names outputted by `tidy()` methods can be found [at the end of this article](#glossary). As a rule of thumb, column names resulting from `tidy()` methods should be all lowercase and contain only alphanumerics or periods (though there are plenty of exceptions). + +Finally, it is common for `tidy()` methods to include an option to calculate confidence/credible intervals for each component based on the model, when possible. In this example, the `confint()` function can be used to calculate confidence intervals from a model object resulting from `lm()`: + +```{r} +confint(trees_model) +``` + +With these considerations in mind, a reasonable `tidy()` method for `lm()` might look something like: + +```{r} +#| eval: false +tidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) { + + result <- summary(x)$coefficients %>% + tibble::as_tibble(rownames = "term") %>% + dplyr::rename(estimate = Estimate, + std.error = `Std. Error`, + statistic = `t value`, + p.value = `Pr(>|t|)`) + + if (conf.int) { + ci <- confint(x, level = conf.level) + result <- dplyr::left_join(result, ci, by = "term") + } + + result +} +``` + +::: {.callout-note} + If you're interested, the actual `tidy.lm()` source can be found [here](https://github.com/tidymodels/broom/blob/master/R/stats-lm-tidiers.R)! It's not too different from the version above except for some argument checking and additional columns. +::: + +With this method exported, then, if a user calls `tidy(fit)`, where `fit` is an output from `lm()`, the `tidy()` generic would "redirect" the call to the `tidy.lm()` function above. + +Some things to keep in mind while writing your `tidy()` method: + +* Sometimes a model will have several different types of components. For example, in mixed models, there is different information associated with fixed effects and random effects. Since this information doesn't have the same interpretation, it doesn't make sense to summarize the fixed and random effects in the same table. In cases like this you should add an argument that allows the user to specify which type of information they want. For example, you might implement an interface along the lines of: + +```{r} +#| eval: false +model <- mixed_model(...) +tidy(model, effects = "fixed") +tidy(model, effects = "random") +``` + +* How are missing values encoded in the model object and its `summary()`? Ensure that rows are included even when the associated model component is missing or rank deficient. +* Are there other measures specific to each component that could reasonably be expected to be included in their summarizations? Some common arguments to `tidy()` methods include: + - `conf.int`: A logical indicating whether or not to calculate confidence/credible intervals. This should default to `FALSE`. + - `conf.level`: The confidence level to use for the interval when `conf.int = TRUE`. Typically defaults to `.95`. + - `exponentiate`: A logical indicating whether or not model terms should be presented on an exponential scale (typical for logistic regression). + +### Implementing the `glance()` method + +`glance()` returns a one-row tibble providing model-level summarizations (e.g. goodness of fit measures and related statistics). This is useful to check for model misspecification and to compare many models. Again, the `x` input is a model object, and the `...` is an optional argument to supply additional information to any calls inside your method. New `glance()` methods can also take additional arguments and _must_ include the `x` and `...` arguments. (For a glossary of currently acceptable additional arguments, see [the end of this article](#glossary).) + +Returning to the `trees_model` example, we could pull out the $R^2$ value with the following code: + +```{r} +summary(trees_model)$r.squared +``` + +Similarly, for the adjusted $R^2$: + +```{r} +summary(trees_model)$adj.r.squared +``` + +Unfortunately, for many model objects, the extraction of model-level information is largely a manual process. You will likely need to build a `tibble()` element-by-element by subsetting the `summary()` object repeatedly. The `with()` function, however, can help make this process a bit less tedious by evaluating expressions inside of the `summary(trees_model)` environment. To grab those those same two model elements from above using `with()`: + +```{r} +with(summary(trees_model), + tibble::tibble(r.squared = r.squared, + adj.r.squared = adj.r.squared)) +``` + +A reasonable `glance()` method for `lm()`, then, might look something like: + +```{r} +#| eval: false +glance.lm <- function(x, ...) { + with( + summary(x), + tibble::tibble( + r.squared = r.squared, + adj.r.squared = adj.r.squared, + sigma = sigma, + statistic = fstatistic["value"], + p.value = pf( + fstatistic["value"], + fstatistic["numdf"], + fstatistic["dendf"], + lower.tail = FALSE + ), + df = fstatistic["numdf"], + logLik = as.numeric(stats::logLik(x)), + AIC = stats::AIC(x), + BIC = stats::BIC(x), + deviance = stats::deviance(x), + df.residual = df.residual(x), + nobs = stats::nobs(x) + ) + ) +} +``` + +::: {.callout-note} +This is the actual definition of `glance.lm()` provided by broom! +::: + +Some things to keep in mind while writing `glance()` methods: +* Output should not include the name of the modeling function or any arguments given to the modeling function. +* In some cases, you may wish to provide model-level diagnostics not returned by the original object. For example, the above `glance.lm()` calculates `AIC` and `BIC` from the model fit. If these are easy to compute, feel free to add them. However, tidier methods are generally not an appropriate place to implement complex or time consuming calculations. +* The `glance` method should always return the same columns in the same order when given an object of a given model class. If a summary metric (such as `AIC`) is not defined in certain circumstances, use `NA`. + +### Implementing the `augment()` method + +`augment()` methods add columns to a dataset containing information such as fitted values, residuals or cluster assignments. All columns added to a dataset have a `.` prefix to prevent existing columns from being overwritten. (Currently acceptable column names are given in [the glossary](#glossary).) The `x` and `...` arguments share their meaning with the two functions described above. `augment` methods also optionally accept a `data` argument that is a `data.frame` (or `tibble`) to add observation-level information to, returning a `tibble` object with the same number of rows as `data`. Many `augment()` methods also accept a `newdata` argument, following the same conventions as the `data` argument, except with the underlying assumption that the model has not "seen" the data yet. As a result, `newdata` arguments need not contain the response columns in `data`. Only one of `data` or `newdata` should be supplied. A full glossary of acceptable arguments to `augment()` methods can be found at [the end of this article](#glossary). + +If a `data` argument is not specified, `augment()` should try to reconstruct the original data as much as possible from the model object. This may not always be possible, and often it will not be possible to recover columns not used by the model. + +With this is mind, we can look back to our `trees_model` example. For one, the `model` element inside of the `trees_model` object will allow us to recover the original data: + +```{r} +#| rows.print = 5 +trees_model$model +``` + +Similarly, the fitted values and residuals can be accessed with the following code: + +```{r} +head(trees_model$fitted.values) +head(trees_model$residuals) +``` + +As with `glance()` methods, it's fine (and encouraged!) to include common metrics associated with observations if they are not computationally intensive to compute. A common metric associated with linear models, for example, is the standard error of fitted values: + +```{r} +se.fit <- predict(trees_model, newdata = trees, se.fit = TRUE)$se.fit %>% + unname() + +head(se.fit) +``` + +Thus, a reasonable `augment()` method for `lm` might look something like this: + +```{r} +augment.lm <- function(x, data = x$model, newdata = NULL, ...) { + if (is.null(newdata)) { + dplyr::bind_cols(tibble::as_tibble(data), + tibble::tibble(.fitted = x$fitted.values, + .se.fit = predict(x, + newdata = data, + se.fit = TRUE)$se.fit, + .resid = x$residuals)) + } else { + predictions <- predict(x, newdata = newdata, se.fit = TRUE) + dplyr::bind_cols(tibble::as_tibble(newdata), + tibble::tibble(.fitted = predictions$fit, + .se.fit = predictions$se.fit)) + } +} +``` + +Some other things to keep in mind while writing `augment()` methods: +* The `newdata` argument should default to `NULL`. Users should only ever specify one of `data` or `newdata`. Providing both `data` and `newdata` should result in an error. The `newdata` argument should accept both `data.frame`s and `tibble`s. +* Data given to the `data` argument must have both the original predictors and the original response. Data given to the `newdata` argument only needs to have the original predictors. This is important because there may be important information associated with training data that is not associated with test data. This means that the `original_data` object in `augment(model, data = original_data)` should provide `.fitted` and `.resid` columns (in most cases), whereas `test_data` in `augment(model, data = test_data)` only needs a `.fitted` column, even if the response is present in `test_data`. +* If the `data` or `newdata` is specified as a `data.frame` with rownames, `augment` should return them in a column called `.rownames`. +* For observations where no fitted values or summaries are available (where there's missing data, for example), return `NA`. +* *The `augment()` method should always return as many rows as were in `data` or `newdata`*, depending on which is supplied + +::: {.callout-note} +The recommended interface and functionality for `augment()` methods may change soon. +::: + +## Document the new methods + +The only remaining step is to integrate the new methods into the parent package! To do so, just drop the methods into a `.R` file inside of the `/R` folder and document them using roxygen2. If you're unfamiliar with the process of documenting objects, you can read more about it [here](http://r-pkgs.had.co.nz/man.html). Here's an example of how our `tidy.lm()` method might be documented: + +```{r} +#| eval: false +#' Tidy a(n) lm object +#' +#' @param x A `lm` object. +#' @param conf.int Logical indicating whether or not to include +#' a confidence interval in the tidied output. Defaults to FALSE. +#' @param conf.level The confidence level to use for the confidence +#' interval if conf.int = TRUE. Must be strictly greater than 0 +#' and less than 1. Defaults to 0.95, which corresponds to a +#' 95 percent confidence interval. +#' @param ... Unused, included for generic consistency only. +#' @return A tidy [tibble::tibble()] summarizing component-level +#' information about the model +#' +#' @examples +#' # load the trees dataset +#' data(trees) +#' +#' # fit a linear model on timber volume +#' trees_model <- lm(Volume ~ Girth + Height, data = trees) +#' +#' # summarize model coefficients in a tidy tibble! +#' tidy(trees_model) +#' +#' @export +tidy.lm <- function(x, conf.int = FALSE, conf.level = 0.95, ...) { + + # ... the rest of the function definition goes here! +``` + +Once you've documented each of your new methods and executed `devtools::document()`, you're done! Congrats on implementing your own broom tidier methods for a new model object! + +## Glossaries + + +```{r} +#| include: false +# grab the argument glossary: make a temporary file, write the +# data to it, load it, and then delete it +args_url <- "https://github.com/alexpghayes/modeltests/blob/master/data/argument_glossary.rda?raw=true" +args_file <- tempfile() +args_get <- httr::GET(args_url) +httr::stop_for_status(args_get) +writeBin(httr::content(args_get, type = "raw"), args_file) +load(args_file) +unlink(args_file) + +# do the same thing for the columns +cols_url <- "https://github.com/alexpghayes/modeltests/blob/master/data/column_glossary.rda?raw=true" +cols_file <- tempfile() +cols_get <- httr::GET(cols_url) +httr::stop_for_status(cols_get) +writeBin(httr::content(cols_get, type = "raw"), cols_file) +load(cols_file) +unlink(cols_file) +``` + +### Arguments + +Tidier methods have a standardized set of acceptable argument and output column names. The currently acceptable argument names by tidier method are: + +```{r} +#| echo: false +argument_glossary %>% + select(Method = method, Argument = argument) %>% + mutate(Method = as.factor(Method)) %>% + datatable(rownames = FALSE, + class = 'cell-border stripe', + filter = 'top', + escape = FALSE, + options = list(pageLength = 5)) +``` + +### Column Names + +The currently acceptable column names by tidier method are: + +```{r} +#| echo: false +column_glossary %>% + select(Method = method, Column = column) %>% + mutate(Method = as.factor(Method)) %>% + datatable(rownames = FALSE, + class = 'cell-border stripe', + filter = 'top', + escape = FALSE, + options = list(pageLength = 5)) +``` + +The [alexpghayes/modeltests](https://github.com/alexpghayes/modeltests) package provides unit testing infrastructure to check your new tidier methods. Please file an issue there to request new arguments/columns to be added to the glossaries! + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/learn/develop/metrics/index.qmd b/learn/develop/metrics/index.qmd new file mode 100644 index 00000000..b6786c62 --- /dev/null +++ b/learn/develop/metrics/index.qmd @@ -0,0 +1,495 @@ +--- +title: "Custom performance metrics" +categories: + - developer tools +type: learn-subsection +weight: 3 +description: | + Create a new performance metric and integrate it with yardstick functions. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(tidymodels) +library(rlang) + +pkgs <- c("tidymodels", "rlang") +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +The [yardstick](https://yardstick.tidymodels.org/) package already includes a large number of metrics, but there's obviously a chance that you might have a custom metric that hasn't been implemented yet. In that case, you can use a few of the tools yardstick exposes to create custom metrics. + +Why create custom metrics? With the infrastructure yardstick provides, you get: + +- Standardization between your metric and other preexisting metrics +- Automatic error handling for types and lengths +- Automatic selection of binary / multiclass metric implementations +- Automatic `NA` handling +- Support for grouped data frames +- Support for use alongside other metrics in `metric_set()` + +The implementation for metrics differ slightly depending on whether you are implementing a numeric, class, or class probability metric. Examples for numeric and classification metrics are given below. We would encourage you to look into the implementation of `roc_auc()` after reading this vignette if you want to work on a class probability metric. + +## Numeric example: MSE + +Mean squared error (sometimes MSE or from here on, `mse()`) is a numeric metric that measures the average of the squared errors. Numeric metrics are generally the simplest to create with yardstick, as they do not have multiclass implementations. The formula for `mse()` is: + +$$ MSE = \frac{1}{N} \sum_{i=1}^{N} (truth_i - estimate_i) ^ 2 = mean( (truth - estimate) ^ 2) $$ + +All metrics should have a data frame version, and a vector version. The data frame version here will be named `mse()`, and the vector version will be `mse_vec()`. + +### Vector implementation + +To start, create the vector version. Generally, all metrics have the same arguments unless the metric requires an extra parameter (such as `beta` in `f_meas()`). To create the vector function, you need to do two things: + +1) Create an internal implementation function, `mse_impl()`. +2) Pass on that implementation function to `metric_vec_template()`. + +Below, `mse_impl()` contains the actual implementation of the metric, and takes `truth` and `estimate` as arguments along with any metric specific arguments. + +The yardstick function `metric_vec_template()` accepts the implementation function along with the other arguments to `mse_vec()` and actually executes `mse_impl()`. Additionally, it has a `cls` argument to specify the allowed class type of `truth` and `estimate`. If the classes are the same, a single character class can be passed, and if they are different a character vector of length 2 can be supplied. + +The `metric_vec_template()` helper handles the removal of `NA` values in your metric, so your implementation function does not have to worry about them. It performs type checking using `cls` and also checks that the `estimator` is valid, the second of which is covered in the classification example. This way, all you have to worry about is the core implementation. + +```{r} +library(tidymodels) + +mse_vec <- function(truth, estimate, na_rm = TRUE, ...) { + + mse_impl <- function(truth, estimate) { + mean((truth - estimate) ^ 2) + } + + metric_vec_template( + metric_impl = mse_impl, + truth = truth, + estimate = estimate, + na_rm = na_rm, + cls = "numeric", + ... + ) + +} +``` + +At this point, you've created the vector version of the mean squared error metric. + +```{r} +data("solubility_test") + +mse_vec( + truth = solubility_test$solubility, + estimate = solubility_test$prediction +) +``` + +Intelligent error handling is immediately available. + +```{r} +#| error: true +mse_vec(truth = "apple", estimate = 1) + +mse_vec(truth = 1, estimate = factor("xyz")) +``` + +`NA` values are removed if `na_rm = TRUE` (the default). If `na_rm = FALSE` and any `NA` values are detected, then the metric automatically returns `NA`. + +```{r} +# NA values removed +mse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5)) + +# NA returned +mse_vec(truth = c(NA, .5, .4), estimate = c(1, .6, .5), na_rm = FALSE) +``` + +### Data frame implementation + +The data frame version of the metric should be fairly simple. It is a generic function with a `data.frame` method that calls the yardstick helper, `metric_summarizer()`, and passes along the `mse_vec()` function to it along with versions of `truth` and `estimate` that have been wrapped in `rlang::enquo()` and then unquoted with `!!` so that non-standard evaluation can be supported. + +```{r} +library(rlang) + +mse <- function(data, ...) { + UseMethod("mse") +} + +mse <- new_numeric_metric(mse, direction = "minimize") + +mse.data.frame <- function(data, truth, estimate, na_rm = TRUE, ...) { + + metric_summarizer( + metric_nm = "mse", + metric_fn = mse_vec, + data = data, + truth = !! enquo(truth), + estimate = !! enquo(estimate), + na_rm = na_rm, + ... + ) + +} +``` + +And that's it. The yardstick package handles the rest with an internal call to `summarise()`. + +```{r} +#| error: false +#| eval: false +mse(solubility_test, truth = solubility, estimate = prediction) + +# Error handling +mse(solubility_test, truth = solubility, estimate = factor("xyz")) +``` + +Let's test it out on a grouped data frame. + +```{r} +#| message: false +library(dplyr) + +set.seed(1234) +size <- 100 +times <- 10 + +# create 10 resamples +solubility_resampled <- bind_rows( + replicate( + n = times, + expr = sample_n(solubility_test, size, replace = TRUE), + simplify = FALSE + ), + .id = "resample" +) + +solubility_resampled %>% + group_by(resample) %>% + mse(solubility, prediction) +``` + +## Class example: miss rate + +Miss rate is another name for the false negative rate, and is a classification metric in the same family as `sens()` and `spec()`. It follows the formula: + +$$ miss\_rate = \frac{FN}{FN + TP} $$ + +This metric, like other classification metrics, is more easily computed when expressed as a confusion matrix. As you will see in the example, you can achieve this with a call to `base::table(estimate, truth)` which correctly puts the "correct" result in the columns of the confusion matrix. + +Classification metrics are more complicated than numeric ones because you have to think about extensions to the multiclass case. For now, let's start with the binary case. + +### Vector implementation + +The vector implementation for classification metrics initially has the same setup as numeric metrics, but has an additional argument, `estimator` that determines the type of estimator to use (binary or some kind of multiclass implementation or averaging). This argument is auto-selected for the user, so default it to `NULL`. Additionally, pass it along to `metric_vec_template()` so that it can check the provided `estimator` against the classes of `truth` and `estimate` to see if they are allowed. + +```{r} +# Logic for `event_level` +event_col <- function(xtab, event_level) { + if (identical(event_level, "first")) { + colnames(xtab)[[1]] + } else { + colnames(xtab)[[2]] + } +} + +miss_rate_vec <- function(truth, + estimate, + estimator = NULL, + na_rm = TRUE, + event_level = "first", + ...) { + estimator <- finalize_estimator(truth, estimator) + + miss_rate_impl <- function(truth, estimate) { + # Create + xtab <- table(estimate, truth) + col <- event_col(xtab, event_level) + col2 <- setdiff(colnames(xtab), col) + + tp <- xtab[col, col] + fn <- xtab[col2, col] + + fn / (fn + tp) + } + + metric_vec_template( + metric_impl = miss_rate_impl, + truth = truth, + estimate = estimate, + na_rm = na_rm, + cls = "factor", + estimator = estimator, + ... + ) +} +``` + +Another change from the numeric metric is that a call to `finalize_estimator()` is made. This is the infrastructure that auto-selects the type of estimator to use. + +```{r} +data("two_class_example") +miss_rate_vec(two_class_example$truth, two_class_example$predicted) +``` + +What happens if you try and pass in a multiclass result? + +```{r} +data("hpc_cv") +fold1 <- filter(hpc_cv, Resample == "Fold01") +miss_rate_vec(fold1$obs, fold1$pred) +``` + +This isn't great, as currently multiclass `miss_rate()` isn't supported and it would have been better to throw an error if the `estimator` was not `"binary"`. Currently, `finalize_estimator()` uses its default implementation which selected `"macro"` as the `estimator` since `truth` was a factor with more than 2 classes. When we implement multiclass averaging, this is what you want, but if your metric only works with a binary implementation (or has other specialized multiclass versions), you might want to guard against this. + +To fix this, a generic counterpart to `finalize_estimator()`, called `finalize_estimator_internal()`, exists that helps you restrict the input types. If you provide a method to `finalize_estimator_internal()` where the method name is the same as your metric name, and then set the `metric_class` argument in `finalize_estimator()` to be the same thing, you can control how the auto-selection of the `estimator` is handled. + +Don't worry about the `metric_dispatcher` argument. This is handled for you and just exists as a dummy argument to dispatch off of. + +It is also good practice to call `validate_estimator()` which handles the case where a user passed in the estimator themselves. This validates that the supplied `estimator` is one of the allowed types and error otherwise. + +```{r} +#| error: false +finalize_estimator_internal.miss_rate <- function(metric_dispatcher, x, estimator) { + + validate_estimator(estimator, estimator_override = "binary") + if (!is.null(estimator)) { + return(estimator) + } + + lvls <- levels(x) + if (length(lvls) > 2) { + stop("A multiclass `truth` input was provided, but only `binary` is supported.") + } + "binary" +} + +miss_rate_vec <- function(truth, + estimate, + estimator = NULL, + na_rm = TRUE, + event_level = "first", + ...) { + # calls finalize_estimator_internal() internally + estimator <- finalize_estimator(truth, estimator, metric_class = "miss_rate") + + miss_rate_impl <- function(truth, estimate) { + # Create + xtab <- table(estimate, truth) + col <- event_col(xtab, event_level) + col2 <- setdiff(colnames(xtab), col) + + tp <- xtab[col, col] + fn <- xtab[col2, col] + + fn / (fn + tp) + + } + + metric_vec_template( + metric_impl = miss_rate_impl, + truth = truth, + estimate = estimate, + na_rm = na_rm, + cls = "factor", + estimator = estimator, + ... + ) +} + +# Error thrown by our custom handler +# miss_rate_vec(fold1$obs, fold1$pred) + +# Error thrown by validate_estimator() +# miss_rate_vec(fold1$obs, fold1$pred, estimator = "macro") +``` + +### Supporting multiclass miss rate + +Like many other classification metrics such as `precision()` or `recall()`, miss rate does not have a natural multiclass extension, but one can be created using methods such as macro, weighted macro, and micro averaging. If you have not, I encourage you to read `vignette("multiclass", "yardstick")` for more information about how these methods work. + +Generally, they require more effort to get right than the binary case, especially if you want to have a performant version. Luckily, a somewhat standard template is used in yardstick and can be used here as well. + +Let's first remove the "binary" restriction we created earlier. + +```{r} +rm(finalize_estimator_internal.miss_rate) +``` + +The main changes below are: + +- The binary implementation is moved to `miss_rate_binary()`. + +- `miss_rate_estimator_impl()` is a helper function for switching between binary and multiclass implementations. It also applies the weighting required for multiclass estimators. It is called from `miss_rate_impl()` and also accepts the `estimator` argument using R's function scoping rules. + +- `miss_rate_multiclass()` provides the implementation for the multiclass case. It calculates the true positive and false negative values as vectors with one value per class. For the macro case, it returns a vector of miss rate calculations, and for micro, it first sums the individual pieces and returns a single miss rate calculation. In the macro case, the vector is then weighted appropriately in `miss_rate_estimator_impl()` depending on whether or not it was macro or weighted macro. + +```{r} +miss_rate_vec <- function(truth, + estimate, + estimator = NULL, + na_rm = TRUE, + event_level = "first", + ...) { + # calls finalize_estimator_internal() internally + estimator <- finalize_estimator(truth, estimator, metric_class = "miss_rate") + + miss_rate_impl <- function(truth, estimate) { + xtab <- table(estimate, truth) + # Rather than implement the actual method here, we rely on + # an *_estimator_impl() function that can handle binary + # and multiclass cases + miss_rate_estimator_impl(xtab, estimator, event_level) + } + + metric_vec_template( + metric_impl = miss_rate_impl, + truth = truth, + estimate = estimate, + na_rm = na_rm, + cls = "factor", + estimator = estimator, + ... + ) +} + + +# This function switches between binary and multiclass implementations +miss_rate_estimator_impl <- function(data, estimator, event_level) { + if(estimator == "binary") { + miss_rate_binary(data, event_level) + } else { + # Encapsulates the macro, macro weighted, and micro cases + wt <- get_weights(data, estimator) + res <- miss_rate_multiclass(data, estimator) + weighted.mean(res, wt) + } +} + + +miss_rate_binary <- function(data, event_level) { + col <- event_col(data, event_level) + col2 <- setdiff(colnames(data), col) + + tp <- data[col, col] + fn <- data[col2, col] + + fn / (fn + tp) +} + +miss_rate_multiclass <- function(data, estimator) { + + # We need tp and fn for all classes individually + # we can get this by taking advantage of the fact + # that tp + fn = colSums(data) + tp <- diag(data) + tpfn <- colSums(data) + fn <- tpfn - tp + + # If using a micro estimator, we sum the individual + # pieces before performing the miss rate calculation + if (estimator == "micro") { + tp <- sum(tp) + fn <- sum(fn) + } + + # return the vector + tp / (tp + fn) +} +``` + +For the macro case, this separation of weighting from the core implementation might seem strange, but there is good reason for it. Some metrics are combinations of other metrics, and it is nice to be able to reuse code when calculating more complex metrics. For example, `f_meas()` is a combination of `recall()` and `precision()`. When calculating a macro averaged `f_meas()`, the weighting must be applied 1 time, at the very end of the calculation. `recall_multiclass()` and `precision_multiclass()` are defined similarly to how `miss_rate_multiclass()` is defined and returns the unweighted vector of calculations. This means we can directly use this in `f_meas()`, and then weight everything once at the end of that calculation. + +Let's try it out now: + +```{r} +# two class +miss_rate_vec(two_class_example$truth, two_class_example$predicted) + +# multiclass +miss_rate_vec(fold1$obs, fold1$pred) +``` + +#### Data frame implementation + +Luckily, the data frame implementation is as simple as the numeric case, we just need to add an extra `estimator` argument and pass that through. + +```{r} +miss_rate <- function(data, ...) { + UseMethod("miss_rate") +} + +miss_rate <- new_class_metric(miss_rate, direction = "minimize") + +miss_rate.data.frame <- function(data, + truth, + estimate, + estimator = NULL, + na_rm = TRUE, + event_level = "first", + ...) { + metric_summarizer( + metric_nm = "miss_rate", + metric_fn = miss_rate_vec, + data = data, + truth = !! enquo(truth), + estimate = !! enquo(estimate), + estimator = estimator, + na_rm = na_rm, + event_level = event_level, + ... + ) +} +``` + +```{r} +#| error: false +#| eval: false +# Macro weighted automatically selected +fold1 %>% + miss_rate(obs, pred) + +# Switch to micro +fold1 %>% + miss_rate(obs, pred, estimator = "micro") + +# Macro weighted by resample +hpc_cv %>% + group_by(Resample) %>% + miss_rate(obs, pred, estimator = "macro_weighted") + +# Error handling +miss_rate(hpc_cv, obs, VF) +``` + +## Using custom metrics + +The `metric_set()` function validates that all metric functions are of the same metric type by checking the class of the function. If any metrics are not of the right class, `metric_set()` fails. By using `new_numeric_metric()` and `new_class_metric()` in the above custom metrics, they work out of the box without any additional adjustments. + +```{r} +numeric_mets <- metric_set(mse, rmse) + +numeric_mets(solubility_test, solubility, prediction) +``` + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session("yardstick") +``` diff --git a/learn/develop/models/index.qmd b/learn/develop/models/index.qmd new file mode 100644 index 00000000..5695104d --- /dev/null +++ b/learn/develop/models/index.qmd @@ -0,0 +1,658 @@ +--- +title: "How to build a parsnip model" +categories: + - developer tools +type: learn-subsection +weight: 2 +description: | + Create a parsnip model function from an existing model implementation. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(mda) +library(tidymodels) + +pkgs <- c("tidymodels", "mda", "modeldata") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +The parsnip package constructs models and predictions by representing those actions in expressions. There are a few reasons for this: + + * It eliminates a lot of duplicate code. + * Since the expressions are not evaluated until fitting, it eliminates many package dependencies. + +A parsnip model function is itself very general. For example, the `logistic_reg()` function itself doesn't have any model code within it. Instead, each model function is associated with one or more computational _engines_. These might be different R packages or some function in another language (that can be evaluated by R). + +This article describes the process of creating a new model function. Before proceeding, take a minute and read our [guidelines on creating modeling packages](https://tidymodels.github.io/model-implementation-principles/) to understand the general themes and conventions that we use. + +## An example model + +As an example, we'll create a function for _mixture discriminant analysis_. There are [a few packages](http://search.r-project.org/cgi-bin/namazu.cgi?query=%22mixture+discriminant%22&max=100&result=normal&sort=score&idxname=functions) that implement this but we'll focus on `mda::mda`: + +```{r} +#| label: "mda-str" +str(mda::mda) +``` + +The main hyperparameter is the number of subclasses. We'll name our function `discrim_mixture`. + +## Aspects of models + +Before proceeding, it helps to to review how parsnip categorizes models: + +* The model _type_ is related to the structural aspect of the model. For example, the model type `linear_reg` represents linear models (slopes and intercepts) that model a numeric outcome. Other model types in the package are `nearest_neighbor`, `decision_tree`, and so on. + +* Within a model type is the _mode_, related to the modeling goal. Currently the two modes in the package are regression and classification. Some models have methods for both models (e.g. nearest neighbors) while others have only a single mode (e.g. logistic regression). + +* The computation _engine_ is a combination of the estimation method and the implementation. For example, for linear regression, one engine is `"lm"` which uses ordinary least squares analysis via the `lm()` function. Another engine is `"stan"` which uses the Stan infrastructure to estimate parameters using Bayes rule. + +When adding a model into parsnip, the user has to specify which modes and engines are used. The package also enables users to add a new mode or engine to an existing model. + +## The general process + +The parsnip package stores information about the models in an internal environment object. The environment can be accessed via the function `get_model_env()`. The package includes a variety of functions that can get or set the different aspects of the models. + +If you are adding a new model from your own package, you can use these functions to add new entries into the model environment. + +### Step 1. Register the model, modes, and arguments + +We will add the MDA model using the model type `discrim_mixture`. Since this is a classification method, we only have to register a single mode: + +```{r} +#| label: "mda-reg" +library(tidymodels) +set_new_model("discrim_mixture") +set_model_mode(model = "discrim_mixture", mode = "classification") +set_model_engine( + "discrim_mixture", + mode = "classification", + eng = "mda" +) +set_dependency("discrim_mixture", eng = "mda", pkg = "mda") +``` + +These functions should silently finish. There is also a function that can be used to show what aspects of the model have been added to parsnip: + +```{r} +#| label: "mda-show-1" +show_model_info("discrim_mixture") +``` + +The next step would be to declare the main arguments to the model. These are declared independent of the mode. To specify the argument, there are a few slots to fill in: + + * The name that parsnip uses for the argument. In general, we try to use non-jargony names for arguments (e.g. "penalty" instead of "lambda" for regularized regression). We recommend consulting [the model argument table available here](/find/parsnip/) to see if an existing argument name can be used before creating a new one. + + * The argument name that is used by the underlying modeling function. + + * A function reference for a _constructor_ that will be used to generate tuning parameter values. This should be a character vector with a named element called `fun` that is the constructor function. There is an optional element `pkg` that can be used to call the function using its namespace. If referencing functions from the dials package, quantitative parameters can have additional arguments in the list for `trans` and `range` while qualitative parameters can pass `values` via this list. + + * A logical value for whether the argument can be used to generate multiple predictions for a single R object. For example, for boosted trees, if a model is fit with 10 boosting iterations, many modeling packages allow the model object to make predictions for any iterations less than the one used to fit the model. In general this is not the case so one would use `has_submodels = FALSE`. + +For `mda::mda()`, the main tuning parameter is `subclasses` which we will rewrite as `sub_classes`. + +```{r} +#| label: "mda-args" +set_model_arg( + model = "discrim_mixture", + eng = "mda", + parsnip = "sub_classes", + original = "subclasses", + func = list(pkg = "foo", fun = "bar"), + has_submodel = FALSE +) +show_model_info("discrim_mixture") +``` + +### Step 2. Create the model function + +This is a fairly simple function that can follow a basic template. The main arguments to our function will be: + + * The mode. If the model can do more than one mode, you might default this to "unknown". In our case, since it is only a classification model, it makes sense to default it to that mode so that the users won't have to specify it. + + * The argument names (`sub_classes` here). These should be defaulted to `NULL`. + +A basic version of the function is: + +```{r} +#| label: "model-fun" +discrim_mixture <- + function(mode = "classification", sub_classes = NULL) { + # Check for correct mode + if (mode != "classification") { + rlang::abort("`mode` should be 'classification'") + } + + # Capture the arguments in quosures + args <- list(sub_classes = rlang::enquo(sub_classes)) + + # Save some empty slots for future parts of the specification + new_model_spec( + "discrim_mixture", + args = args, + eng_args = NULL, + mode = mode, + method = NULL, + engine = NULL + ) + } +``` + +This is pretty simple since the data are not exposed to this function. + +::: {.callout-warning} + We strongly suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former return better traceback results and have safer defaults for handling call objects. +::: + +### Step 3. Add a fit module + +Now that parsnip knows about the model, mode, and engine, we can give it the information on fitting the model for our engine. The information needed to fit the model is contained in another list. The elements are: + + * `interface` is a single character value that could be "formula", "data.frame", or "matrix". This defines the type of interface used by the underlying fit function (`mda::mda`, in this case). This helps the translation of the data to be in an appropriate format for the that function. + + * `protect` is an optional list of function arguments that **should not be changeable** by the user. In this case, we probably don't want users to pass data values to these arguments (until the `fit()` function is called). + + * `func` is the package and name of the function that will be called. If you are using a locally defined function, only `fun` is required. + + * `defaults` is an optional list of arguments to the fit function that the user can change, but whose defaults can be set here. This isn't needed in this case, but is described later in this document. + +For the first engine: + +```{r} +#| label: "fit-mod" +set_fit( + model = "discrim_mixture", + eng = "mda", + mode = "classification", + value = list( + interface = "formula", + protect = c("formula", "data"), + func = c(pkg = "mda", fun = "mda"), + defaults = list() + ) +) + +show_model_info("discrim_mixture") +``` + +We also set up the information on how the predictors should be handled. These options ensure that the data that parsnip gives to the underlying model allows for a model fit that is as similar as possible to what it would have produced directly. + + * `predictor_indicators` describes whether and how to create indicator/dummy variables from factor predictors. There are three options: `"none"` (do not expand factor predictors), `"traditional"` (apply the standard `model.matrix()` encodings), and `"one_hot"` (create the complete set including the baseline level for all factors). + + * `compute_intercept` controls whether `model.matrix()` should include the intercept in its formula. This affects more than the inclusion of an intercept column. With an intercept, `model.matrix()` computes dummy variables for all but one factor level. Without an intercept, `model.matrix()` computes a full set of indicators for the first factor variable, but an incomplete set for the remainder. + + * `remove_intercept` removes the intercept column *after* `model.matrix()` is finished. This can be useful if the model function (e.g. `lm()`) automatically generates an intercept. + +* `allow_sparse_x` specifies whether the model can accommodate a sparse representation for predictors during fitting and tuning. + +```{r} +set_encoding( + model = "discrim_mixture", + eng = "mda", + mode = "classification", + options = list( + predictor_indicators = "traditional", + compute_intercept = TRUE, + remove_intercept = TRUE, + allow_sparse_x = FALSE + ) +) +``` + + +### Step 4. Add modules for prediction + +Similar to the fitting module, we specify the code for making different types of predictions. To make hard class predictions, the `class` object contains the details. The elements of the list are: + + * `pre` and `post` are optional functions that can preprocess the data being fed to the prediction code and to postprocess the raw output of the predictions. These won't be needed for this example, but a section below has examples of how these can be used when the model code is not easy to use. If the data being predicted has a simple type requirement, you can avoid using a `pre` function with the `args` below. + * `func` is the prediction function (in the same format as above). In many cases, packages have a predict method for their model's class but this is typically not exported. In this case (and the example below), it is simple enough to make a generic call to `predict()` with no associated package. + * `args` is a list of arguments to pass to the prediction function. These will most likely be wrapped in `rlang::expr()` so that they are not evaluated when defining the method. For mda, the code would be `predict(object, newdata, type = "class")`. What is actually given to the function is the parsnip model fit object, which includes a sub-object called `fit()` that houses the mda model object. If the data need to be a matrix or data frame, you could also use `newdata = quote(as.data.frame(newdata))` or similar. + +The parsnip prediction code will expect the result to be an unnamed character string or factor. This will be coerced to a factor with the same levels as the original data. + +To add this method to the model environment, a similar `set()` function is used: + +```{r} +#| label: "mds-class" +class_info <- + list( + pre = NULL, + post = NULL, + func = c(fun = "predict"), + args = + # These lists should be of the form: + # {predict.mda argument name} = {values provided from parsnip objects} + list( + # We don't want the first two arguments evaluated right now + # since they don't exist yet. `type` is a simple object that + # doesn't need to have its evaluation deferred. + object = quote(object$fit), + newdata = quote(new_data), + type = "class" + ) + ) + +set_pred( + model = "discrim_mixture", + eng = "mda", + mode = "classification", + type = "class", + value = class_info +) +``` + +A similar call can be used to define the class probability module (if they can be computed). The format is identical to the `class` module but the output is expected to be a tibble with columns for each factor level. + +As an example of the `post` function, the data frame created by `mda:::predict.mda()` will be converted to a tibble. The arguments are `x` (the raw results coming from the predict method) and `object` (the parsnip model fit object). The latter has a sub-object called `lvl` which is a character string of the outcome's factor levels (if any). + +We register the probability module. There is a template function that makes this slightly easier to format the objects: + +```{r} +#| label: "mda-prob" +prob_info <- + pred_value_template( + post = function(x, object) { + tibble::as_tibble(x) + }, + func = c(fun = "predict"), + # Now everything else is put into the `args` slot + object = quote(object$fit), + newdata = quote(new_data), + type = "posterior" + ) + +set_pred( + model = "discrim_mixture", + eng = "mda", + mode = "classification", + type = "prob", + value = prob_info +) + +show_model_info("discrim_mixture") +``` + +If this model could be used for regression situations, we could also add a "numeric" module. For `pred`, the model requires an unnamed numeric vector output (usually). + +Examples are [here](https://github.com/tidymodels/parsnip/blob/master/R/linear_reg_data.R) and [here](https://github.com/tidymodels/parsnip/blob/master/R/rand_forest_data.R). + + +### Does it work? + +As a developer, one thing that may come in handy is the `translate()` function. This will tell you what the model's eventual syntax will be. + +For example: + +```{r} +#| label: "mda-code" +discrim_mixture(sub_classes = 2) %>% + translate(engine = "mda") +``` + +Let's try it on a data set from the modeldata package: + +```{r} +#| label: "mda-data" +data("two_class_dat", package = "modeldata") +set.seed(4622) +example_split <- initial_split(two_class_dat, prop = 0.99) +example_train <- training(example_split) +example_test <- testing(example_split) + +mda_spec <- discrim_mixture(sub_classes = 2) %>% + set_engine("mda") + +mda_fit <- mda_spec %>% + fit(Class ~ ., data = example_train, engine = "mda") +mda_fit + +predict(mda_fit, new_data = example_test, type = "prob") %>% + bind_cols(example_test %>% select(Class)) + +predict(mda_fit, new_data = example_test) %>% + bind_cols(example_test %>% select(Class)) +``` + + +## Add an engine + +The process for adding an engine to an existing model is _almost_ the same as building a new model but simpler with fewer steps. You only need to add the engine-specific aspects of the model. For example, if we wanted to fit a linear regression model using M-estimation, we could only add a new engine. The code for the `rlm()` function in MASS is pretty similar to `lm()`, so we can copy that code and change the package/function names: + +```{r} +#| label: "rlm" +set_model_engine("linear_reg", "regression", eng = "rlm") +set_dependency("linear_reg", eng = "rlm", pkg = "MASS") + +set_fit( + model = "linear_reg", + eng = "rlm", + mode = "regression", + value = list( + interface = "formula", + protect = c("formula", "data", "weights"), + func = c(pkg = "MASS", fun = "rlm"), + defaults = list() + ) +) + +set_encoding( + model = "linear_reg", + eng = "rlm", + mode = "regression", + options = list( + predictor_indicators = "traditional", + compute_intercept = TRUE, + remove_intercept = TRUE, + allow_sparse_x = FALSE + ) +) + +set_pred( + model = "linear_reg", + eng = "rlm", + mode = "regression", + type = "numeric", + value = list( + pre = NULL, + post = NULL, + func = c(fun = "predict"), + args = + list( + object = expr(object$fit), + newdata = expr(new_data), + type = "response" + ) + ) +) + +# testing: +linear_reg() %>% + set_engine("rlm") %>% + fit(mpg ~ ., data = mtcars) +``` + +## Add parsnip models to another package + +The process here is almost the same. All of the previous functions are still required but their execution is a little different. + +For parsnip to register them, that package must already be loaded. For this reason, it makes sense to have parsnip in the "Depends" category. + +The first difference is that the functions that define the model must be inside of a wrapper function that is called when your package is loaded. For our example here, this might look like: + +```{r} +#| eval: false +make_discrim_mixture_mda <- function() { + parsnip::set_new_model("discrim_mixture") + + parsnip::set_model_mode("discrim_mixture", "classification") + + # and so one... +} +``` + +This function is then executed when your package is loaded: + +```{r} +#| eval: false +.onLoad <- function(libname, pkgname) { + # This defines discrim_mixture in the model database + make_discrim_mixture_mda() +} +``` + +For an example package that uses parsnip definitions, take a look at the [discrim](https://github.com/tidymodels/discrim) package. + +::: {.callout-warning} + To use a new model and/or engine in the broader tidymodels infrastructure, we recommend your model definition declarations (e.g. `set_new_model()` and similar) reside in a package. If these definitions are in a script only, the new model may not work with the tune package, for example for parallel processing. +::: + +It is also important for parallel processing support to **list the home package as a dependency**. If the `discrim_mixture()` function lived in a package called `mixedup`, include the line: + +```r +set_dependency("discrim_mixture", eng = "mda", pkg = "mixedup") +``` + +Parallel processing requires this explicit dependency setting. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux will load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a parsnip model is not loaded in the worker processes, the model will not have an entry in parsnip's internal database (and produce an error). + + +## Your model, tuning parameters, and you + +The tune package can be used to find reasonable values of model arguments via tuning. There are some S3 methods that are useful to define for your model. `discrim_mixture()` has one main tuning parameter: `sub_classes`. To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. + +`tunable()` takes the model specification as its argument and returns a tibble with columns: + +* `name`: The name of the argument. + +* `call_info`: A list that describes how to call a function that returns a dials parameter object. + +* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `"model_spec"`. + +* `component`: A character string with more information about the source. For models, this is just the name of the function (e.g. `"discrim_mixture"`). + +* `component_id`: A character string to indicate where a unique identifier is for the object. For a model, this is indicates the type of model argument (e.g. "main"). + +The main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. + +For example, for a nearest-neighbors `neighbors` parameter, this value is just: + +```{r} +#| label: "mtry" +info <- list(pkg = "dials", fun = "neighbors") + +# FYI: how it is used under-the-hood: +new_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg) +rlang::eval_tidy(new_param_call) +``` + +For `discrim_mixture()`, a dials object is needed that returns an integer that is the number of sub-classes that should be create. We can create a dials parameter function for this: + +```{r} +#| label: "sub-classes" +sub_classes <- function(range = c(1L, 10L), trans = NULL) { + new_quant_param( + type = "integer", + range = range, + inclusive = c(TRUE, TRUE), + trans = trans, + label = c(sub_classes = "# Sub-Classes"), + finalize = NULL + ) +} +``` + +If this were in the dials package, we could use: + +```{r} +#| label: "tunable" +tunable.discrim_mixture <- function(x, ...) { + tibble::tibble( + name = c("sub_classes"), + call_info = list(list(pkg = NULL, fun = "sub_classes")), + source = "model_spec", + component = "discrim_mixture", + component_id = "main" + ) +} +``` + +Once this method is in place, the tuning functions can be used: + +```{r} +#| label: "tune-mda" +#| message: false +mda_spec <- + discrim_mixture(sub_classes = tune()) %>% + set_engine("mda") + +set.seed(452) +cv <- vfold_cv(example_train) +mda_tune_res <- mda_spec %>% + tune_grid(Class ~ ., cv, grid = 4) +show_best(mda_tune_res, metric = "roc_auc") +``` + + + +## Pro-tips, what-ifs, exceptions, FAQ, and minutiae + +There are various things that came to mind while developing this resource. + +**Do I have to return a simple vector for `predict` and `predict_class`?** + +Previously, when discussing the `pred` information: + +> For `pred`, the model requires an unnamed numeric vector output **(usually)**. + +There are some models (e.g. `glmnet`, `plsr`, `Cubist`, etc.) that can make predictions for different models from the same fitted model object. We want to facilitate that here so, for these cases, the current convention is to return a tibble with the prediction in a column called `values` and have extra columns for any parameters that define the different sub-models. + +For example, if I fit a linear regression model via `glmnet` and get four values of the regularization parameter (`lambda`): + +```{r} +#| label: "glmnet" +#| eval: false +linear_reg() %>% + set_engine("glmnet", nlambda = 4) %>% + fit(mpg ~ ., data = mtcars) %>% + multi_predict(new_data = mtcars[1:3, -1]) +``` + +_However_, the API is still being developed. Currently, there is not an interface in the prediction functions to pass in the values of the parameters to make predictions with (`lambda`, in this case). + +**What do I do about how my model handles factors or categorical data?** + +Some modeling functions in R create indicator/dummy variables from categorical data when you use a model formula (typically using `model.matrix()`), and some do not. Some examples of models that do _not_ create indicator variables include tree-based models, naive Bayes models, and multilevel or hierarchical models. The tidymodels ecosystem assumes a `model.matrix()`-like default encoding for categorical data used in a model formula, but you can change this encoding using `set_encoding()`. For example, you can set predictor encodings that say, "leave my data alone," and keep factors as is: + +```{r} +#| label: "encodinginfo" +#| eval: false +set_encoding( + model = "decision_tree", + eng = "rpart", + mode = "regression", + options = list( + predictor_indicators = "none", + compute_intercept = FALSE, + remove_intercept = FALSE + ) +) +``` + +::: {.callout-note} +There are three options for `predictor_indicators`: +- "none" (do not expand factor predictors) +- "traditional" (apply the standard `model.matrix()` encoding) +- "one_hot" (create the complete set including the baseline level for all factors) +::: + +To learn more about encoding categorical predictors, check out [this blog post](https://www.tidyverse.org/blog/2020/07/parsnip-0-1-2/#predictor-encoding-consistency). + +**What is the `defaults` slot and why do I need it?** + +You might want to set defaults that can be overridden by the user. For example, for logistic regression with `glm`, it make sense to default `family = binomial`. However, if someone wants to use a different link function, they should be able to do that. For that model/engine definition, it has: + +```{r} +#| label: "glm-alt" +#| eval: false +defaults = list(family = expr(binomial)) +``` + +So that is the default: + +```{r} +#| label: "glm-alt-show" +#| eval: false +logistic_reg() %>% translate(engine = "glm") + +# but you can change it: + +logistic_reg() %>% + set_engine("glm", family = expr(binomial(link = "probit"))) %>% + translate() +``` + +That's what `defaults` are for. + +Note that we wrapped `binomial` inside of `expr()`. If we didn't, it would substitute the results of executing `binomial()` inside of the expression (and that's a mess). + +**What if I want more complex defaults?** + +The `translate` function can be used to check values or set defaults once the model's mode is known. To do this, you can create a model-specific S3 method that first calls the general method (`translate.model_spec()`) and then makes modifications or conducts error traps. + +For example, the ranger and randomForest package functions have arguments for calculating importance. One is a logical and the other is a string. Since this is likely to lead to a bunch of frustration and GitHub issues, we can put in a check: + +```{r} +#| label: "rf-trans" +#| eval: false +# Simplified version +translate.rand_forest <- function (x, engine, ...){ + # Run the general method to get the real arguments in place + x <- translate.default(x, engine, ...) + + # Check and see if they make sense for the engine and/or mode: + if (x$engine == "ranger") { + if (any(names(x$method$fit$args) == "importance")) + if (is.logical(x$method$fit$args$importance)) + rlang::abort("`importance` should be a character value. See ?ranger::ranger.") + } + x +} +``` + +As another example, `nnet::nnet()` has an option for the final layer to be linear (called `linout`). If `mode = "regression"`, that should probably be set to `TRUE`. You couldn't do this with the `args` (described above) since you need the function translated first. + + +**My model fit requires more than one function call. So....?** + +The best course of action is to write wrapper so that it can be one call. This was the case with xgboost and keras. + +**Why would I preprocess my data?** + +There might be non-trivial transformations that the model prediction code requires (such as converting to a sparse matrix representation, etc.) + +This would **not** include making dummy variables and `model.matrix` stuff. The parsnip infrastructure already does that for you. + + +**Why would I post-process my predictions?** + +What comes back from some R functions may be somewhat... arcane or problematic. As an example, for xgboost, if you fit a multi-class boosted tree, you might expect the class probabilities to come back as a matrix (*narrator: they don't*). If you have four classes and make predictions on three samples, you get a vector of 12 probability values. You need to convert these to a rectangular data set. + +Another example is the predict method for ranger, which encapsulates the actual predictions in a more complex object structure. + +These are the types of problems that the post-processor will solve. + +**Are there other modes?** + +Not yet but there will be. For example, it might make sense to have a different mode when doing risk-based modeling via Cox regression models. That would enable different classes of objects and those might be needed since the types of models don't make direct predictions of the outcome. + +If you have a suggestion, please add a [GitHub issue](https://github.com/tidymodels/parsnip/issues) to discuss it. + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + + + diff --git a/learn/develop/parameters/index.qmd b/learn/develop/parameters/index.qmd new file mode 100644 index 00000000..c29fb8f2 --- /dev/null +++ b/learn/develop/parameters/index.qmd @@ -0,0 +1,225 @@ +--- +title: "How to create a tuning parameter function" +categories: + - developer tools +type: learn-subsection +weight: 4 +description: | + Build functions to use in tuning both quantitative and qualitative parameters. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(tidymodels) + +pkgs <- c("dials", "scales") +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +Some models and recipe steps contain parameters that dials does not know about. You can construct new quantitative and qualitative parameters using `new_quant_param()` or `new_qual_param()`, respectively. This article is a guide to creating new parameters. + +## Quantitative parameters + +As an example, let's consider the multivariate adaptive regression spline ([MARS](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline)) model, which creates nonlinear features from predictors and adds them to a linear regression models. The earth package is an excellent implementation of this method. + +MARS creates an initial set of features and then prunes them back to an appropriate size. This can be done automatically by `earth::earth()` or the number of final terms can be set by the user. The parsnip function `mars()` has a parameter called `num_terms` that defines this. + +What if we want to create a parameter for the number of *initial terms* included in the model. There is no argument in `parsnip::mars()` for this but we will make one now. The argument name in `earth::earth()` is `nk`, which is not very descriptive. Our parameter will be called `num_initial_terms`. + +We use the `new_quant_param()` function since this is a numeric parameter. The main two arguments to a numeric parameter function are `range` and `trans`. + +The `range` specifies the possible values of the parameter. For our example, a minimal value might be one or two. What is the upper limit? The default in the earth package is + +```{r} +#| label: "eart" +#| eval: false +min(200, max(20, 2 * ncol(x))) + 1 +``` + +where `x` is the predictor matrix. We often put in values that are either sensible defaults or are minimal enough to work for the majority of data sets. For now, let's specify an upper limit of 10 but this will be discussed more in the next section. + +The other argument is `trans`, which represents a transformation that should be applied to the parameter values when working with them. For example, many regularization methods have a `penalty` parameter that tends to range between zero and some upper bound (let's say 1). The effect of going from a penalty value of 0.01 to 0.1 is much more impactful than going from 0.9 to 1.0. In such a case, it might make sense to work with this parameter in transformed units (such as the log, in this example). If new parameter values are generated at random, it helps if they are uniformly simulated in the transformed units and then converted back to the original units. + +The `trans` parameter accepts a transformation object from the scales package. For example: + +```{r} +#| label: "scales" +library(scales) +lsf.str("package:scales", pattern = "_trans$") +scales::log10_trans() +``` + +A value of `NULL` means that no transformation should be used. + +A quantitative parameter function should have these two arguments and, in the function body, a call `new_quant_param()`. There are a few arguments to this function: + +```{r} +#| label: "new_quant_param" +library(tidymodels) +args(new_quant_param) +``` + +- Possible types are double precision and integers. The value of `type` should agree with the values of `range` in the function definition. + +- It's OK for our tuning to include the minimum or maximum, so we'll use `c(TRUE, TRUE)` for `inclusive`. If the value cannot include one end of the range, set one or both of these values to `FALSE`. + +- The `label` should be a named character string where the name is the parameter name and the value represents what will be printed automatically. + +- `finalize` is an argument that can set parts of the range. This is discussed more below. + +Here's an example of a basic quantitative parameter object: + +```{r} +#| label: "num-initial-terms" +num_initial_terms <- function(range = c(1L, 10L), trans = NULL) { + new_quant_param( + type = "integer", + range = range, + inclusive = c(TRUE, TRUE), + trans = trans, + label = c(num_initial_terms = "# Initial MARS Terms"), + finalize = NULL + ) +} + +num_initial_terms() + +# Sample from the parameter: +set.seed(4832856) +num_initial_terms() %>% value_sample(5) +``` + +### Finalizing parameters + +It might be the case that the range of the parameter is unknown. For example, parameters that are related to the number of columns in a data set cannot be exactly specified in the absence of data. In those cases, a placeholder of `unknown()` can be added. This will force the user to "finalize" the parameter object for their particular data set. Let's redefine our function with an `unknown()` value: + +```{r} +#| label: "num-initial-terms-unk" +#| error: false +#| eval: false +num_initial_terms <- function(range = c(1L, unknown()), trans = NULL) { + new_quant_param( + type = "integer", + range = range, + inclusive = c(TRUE, TRUE), + trans = trans, + label = c(num_initial_terms = "# Initial MARS Terms"), + finalize = NULL + ) +} +num_initial_terms() + +# Can we sample? +num_initial_terms() %>% value_sample(5) +``` + +The `finalize` argument of `num_initial_terms()` can take a function that uses data to set the range. For example, the package already includes a few functions for finalization: + +```{r} +#| label: "dials-final-funcs" +lsf.str("package:dials", pattern = "^get_") +``` + +These functions generally take a data frame of predictors (in an argument called `x`) and add the range of the parameter object. Using the formula in the earth package, we might use: + +```{r} +#| label: "earth-range" +get_initial_mars_terms <- function(object, x) { + upper_bound <- min(200, max(20, 2 * ncol(x))) + 1 + upper_bound <- as.integer(upper_bound) + bounds <- range_get(object) + bounds$upper <- upper_bound + range_set(object, bounds) +} + +# Use the mtcars are the finalize the upper bound: +num_initial_terms() %>% get_initial_mars_terms(x = mtcars[, -1]) +``` + +Once we add this function to the object, the general `finalize()` method can be used: + +```{r} +#| label: "final-obj" +num_initial_terms <- function(range = c(1L, unknown()), trans = NULL) { + new_quant_param( + type = "integer", + range = range, + inclusive = c(TRUE, TRUE), + trans = trans, + label = c(num_initial_terms = "# Initial MARS Terms"), + finalize = get_initial_mars_terms + ) +} + +num_initial_terms() %>% finalize(x = mtcars[, -1]) +``` + +## Qualitative parameters + +Now let's look at an example of a qualitative parameter. If a model includes a data aggregation step, we want to allow users to tune how our parameters are aggregated. For example, in embedding methods, possible values might be `min`, `max`, `mean`, `sum`, or to not aggregate at all ("none"). Since these cannot be put on a numeric scale, they are possible values of a qualitative parameter. We'll take "character" input (not "logical"), and we must specify the allowed values. By default we won't aggregate. + +```{r} +#| label: "aggregation" +aggregation <- function(values = c("none", "min", "max", "mean", "sum")) { + new_qual_param( + type = "character", + values = values, + # By default, the first value is selected as default. We'll specify that to + # make it clear. + default = "none", + label = c(aggregation = "Aggregation Method") + ) +} +``` + +Within the dials package, the convention is to have the values contained in a separate vector whose name starts with `values_`. For example: + +```{r} +#| label: "aggregation-vec" +values_aggregation <- c("none", "min", "max", "mean", "sum") +aggregation <- function(values = values_aggregation) { + new_qual_param( + type = "character", + values = values, + # By default, the first value is selected as default. We'll specify that to + # make it clear. + default = "none", + label = c(aggregation = "Aggregation Method") + ) +} +``` + +This step may not make sense if you are using the function in a script and not keeping it within a package. + +We can use our `aggregation` parameters with dials functions. + +```{r} +#| label: "aggregation-use" +aggregation() +aggregation() %>% value_sample(3) +``` + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/learn/develop/recipes/figs/carbon_dist-1.svg b/learn/develop/recipes/figs/carbon_dist-1.svg new file mode 100644 index 00000000..0edc3462 --- /dev/null +++ b/learn/develop/recipes/figs/carbon_dist-1.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +50 +100 +150 + + + + + + + + +25 +50 +75 +100 +carbon +count + + diff --git a/content/learn/develop/recipes/figs/cdf_plot-1.svg b/learn/develop/recipes/figs/cdf_plot-1.svg similarity index 100% rename from content/learn/develop/recipes/figs/cdf_plot-1.svg rename to learn/develop/recipes/figs/cdf_plot-1.svg diff --git a/learn/develop/recipes/index.qmd b/learn/develop/recipes/index.qmd new file mode 100644 index 00000000..ab2e0848 --- /dev/null +++ b/learn/develop/recipes/index.qmd @@ -0,0 +1,550 @@ +--- +title: "Create your own recipe step function" +categories: + - developer tools +type: learn-subsection +weight: 1 +description: | + Write a new recipe step for data preprocessing. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "ex_setup" +#| include: false +library(tidymodels) +library(modeldata) +pkgs <- c("tidymodels", "modeldata") +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +There are many existing recipe steps in packages like recipes, themis, textrecipes, and others. A full list of steps in CRAN packages [can be found here](/find/recipes/). However, you might need to define your own preprocessing operations; this article describes how to do that. If you are looking for good examples of existing steps, we suggest looking at the code for [centering](https://github.com/tidymodels/recipes/blob/master/R/center.R) or [PCA](https://github.com/tidymodels/recipes/blob/master/R/pca.R) to start. + +For check operations (e.g. `check_class()`), the process is very similar. Notes on this are available at the end of this article. + +The general process to follow is to: + +1. Define a step constructor function. + +2. Create the minimal S3 methods for `prep()`, `bake()`, and `print()`. + +3. Optionally add some extra methods to work with other tidymodels packages, such as `tunable()` and `tidy()`. + +As an example, we will create a step for converting data into percentiles. + +## A new step definition + +Let's create a step that replaces the value of a variable with its percentile from the training set. The example data we'll use is from the modeldata package: + +```{r} +#| label: "initial" +library(modeldata) +data(biomass) +str(biomass) + +biomass_tr <- biomass[biomass$dataset == "Training",] +biomass_te <- biomass[biomass$dataset == "Testing",] +``` + +To illustrate the transformation with the `carbon` variable, note the training set distribution of this variable with a vertical line below for the first value of the test set. + +```{r} +#| label: "carbon_dist" +#| fig-width: 6 +#| fig-height: 4.25 +#| out-width: "100%" +library(ggplot2) +theme_set(theme_bw()) +ggplot(biomass_tr, aes(x = carbon)) + + geom_histogram(binwidth = 5, col = "blue", fill = "blue", alpha = .5) + + geom_vline(xintercept = biomass_te$carbon[1], lty = 2) +``` + +Based on the training set, `r round(mean(biomass_tr$carbon <= biomass_te$carbon[1])*100, 1)`% of the data are less than a value of `r biomass_te$carbon[1]`. There are some applications where it might be advantageous to represent the predictor values as percentiles rather than their original values. + +Our new step will do this computation for any numeric variables of interest. We will call this new recipe step `step_percentile()`. The code below is designed for illustration and not speed or best practices. We've left out a lot of error trapping that we would want in a real implementation. + +## Create the function + +To start, there is a _user-facing_ function. Let's call that `step_percentile()`. This is just a simple wrapper around a _constructor function_, which defines the rules for any step object that defines a percentile transformation. We'll call this constructor `step_percentile_new()`. + +The function `step_percentile()` takes the same arguments as your function and simply adds it to a new recipe. The `...` signifies the variable selectors that can be used. + +```{r} +#| label: "initial_def" +step_percentile <- function( + recipe, + ..., + role = NA, + trained = FALSE, + ref_dist = NULL, + options = list(probs = (0:100)/100, names = TRUE), + skip = FALSE, + id = rand_id("percentile") + ) { + + ## The variable selectors are not immediately evaluated by using + ## the `quos()` function in `rlang`. `ellipse_check()` captures + ## the values and also checks to make sure that they are not empty. + terms <- ellipse_check(...) + + add_step( + recipe, + step_percentile_new( + terms = terms, + trained = trained, + role = role, + ref_dist = ref_dist, + options = options, + skip = skip, + id = id + ) + ) +} +``` + +You should always keep the first four arguments (`recipe` though `trained`) the same as listed above. Some notes: + + * the `role` argument is used when you either 1) create new variables and want their role to be pre-set or 2) replace the existing variables with new values. The latter is what we will be doing and using `role = NA` will leave the existing role intact. + * `trained` is set by the package when the estimation step has been run. You should default your function definition's argument to `FALSE`. + * `skip` is a logical. Whenever a recipe is prepped, each step is trained and then baked. However, there are some steps that should not be applied when a call to `bake()` is used. For example, if a step is applied to the variables with roles of "outcomes", these data would not be available for new samples. + * `id` is a character string that can be used to identify steps in package code. `rand_id()` will create an ID that has the prefix and a random character sequence. + +We can estimate the percentiles of new data points based on the percentiles from the training set with `approx()`. Our `step_percentile` contains a `ref_dist` object to store these percentiles (pre-computed from the training set in `prep()`) for later use in `bake()`. + +We will use `stats::quantile()` to compute the grid. However, we might also want to have control over the granularity of this grid, so the `options` argument will be used to define how that calculation is done. We could use the ellipses (aka `...`) so that any options passed to `step_percentile()` that are not one of its arguments will then be passed to `stats::quantile()`. However, we recommend making a separate list object with the options and use these inside the function because `...` is already used to define the variable selection. + +It is also important to consider if there are any _main arguments_ to the step. For example, for spline-related steps such as `step_ns()`, users typically want to adjust the argument for the degrees of freedom in the spline (e.g. `splines::ns(x, df)`). Rather than letting users add `df` to the `options` argument: + +* Allow the important arguments to be main arguments to the step function. + +* Follow the tidymodels [conventions for naming arguments](https://tidymodels.github.io/model-implementation-principles/standardized-argument-names.html). Whenever possible, avoid jargon and keep common argument names. + +There are benefits to following these principles (as shown below). + +## Initialize a new object + +Now, the constructor function can be created. + +The function cascade is: + +``` +step_percentile() calls recipes::add_step() +└──> recipes::add_step() calls step_percentile_new() + └──> step_percentile_new() calls recipes::step() +``` + +`step()` is a general constructor for recipes that mainly makes sure that the resulting step object is a list with an appropriate S3 class structure. Using `subclass = "percentile"` will set the class of new objects to `"step_percentile"`. + +```{r} +#| label: "initialize" +step_percentile_new <- + function(terms, role, trained, ref_dist, options, skip, id) { + step( + subclass = "percentile", + terms = terms, + role = role, + trained = trained, + ref_dist = ref_dist, + options = options, + skip = skip, + id = id + ) + } +``` + +This constructor function should have no default argument values. Defaults should be set in the user-facing step object. + +## Create the `prep` method + +You will need to create a new `prep()` method for your step's class. To do this, three arguments that the method should have are: + +```r +function(x, training, info = NULL) +``` + +where + + * `x` will be the `step_percentile` object, + * `training` will be a _tibble_ that has the training set data, and + * `info` will also be a tibble that has information on the current set of data available. This information is updated as each step is evaluated by its specific `prep()` method so it may not have the variables from the original data. The columns in this tibble are `variable` (the variable name), `type` (currently either "numeric" or "nominal"), `role` (defining the variable's role), and `source` (either "original" or "derived" depending on where it originated). + +You can define other arguments as well. + +The first thing that you might want to do in the `prep()` function is to translate the specification listed in the `terms` argument to column names in the current data. There is a function called `recipes_eval_select()` that can be used to obtain this. + +::: {.callout-warning} + The `recipes_eval_select()` function is not one you interact with as a typical recipes user, but it is helpful if you develop your own custom recipe steps. +::: + +```{r} +#| label: "prep_1" +#| eval: false +prep.step_percentile <- function(x, training, info = NULL, ...) { + col_names <- recipes_eval_select(x$terms, training, info) + # TODO finish the rest of the function +} +``` + +After this function call, it is a good idea to check that the selected columns have the appropriate type (e.g. numeric for this example). See `recipes::check_type()` to do this for basic types. + +Once we have this, we can save the approximation grid. For the grid, we will use a helper function that enables us to run `rlang::exec()` to splice in any extra arguments contained in the `options` list to the call to `quantile()`: + +```{r} +#| label: "splice" +get_train_pctl <- function(x, args = NULL) { + res <- rlang::exec("quantile", x = x, !!!args) + # Remove duplicate percentile values + res[!duplicated(res)] +} + +# For example: +get_train_pctl(biomass_tr$carbon, list(probs = 0:1)) +get_train_pctl(biomass_tr$carbon) +``` + +Now, the `prep()` method can be created: + +```{r} +#| label: "prep-2" +prep.step_percentile <- function(x, training, info = NULL, ...) { + col_names <- recipes_eval_select(x$terms, training, info) + ## You can add error trapping for non-numeric data here and so on. + + ## We'll use the names later so make sure they are available + if (x$options$names == FALSE) { + rlang::abort("`names` should be set to TRUE") + } + + if (!any(names(x$options) == "probs")) { + x$options$probs <- (0:100)/100 + } else { + x$options$probs <- sort(unique(x$options$probs)) + } + + # Compute percentile grid + ref_dist <- purrr::map(training[, col_names], get_train_pctl, args = x$options) + + ## Use the constructor function to return the updated object. + ## Note that `trained` is now set to TRUE + + step_percentile_new( + terms = x$terms, + trained = TRUE, + role = x$role, + ref_dist = ref_dist, + options = x$options, + skip = x$skip, + id = x$id + ) +} +``` + +We suggest favoring `rlang::abort()` and `rlang::warn()` over `stop()` and `warning()`. The former can be used for better traceback results. + + +## Create the `bake` method + +Remember that the `prep()` function does not _apply_ the step to the data; it only estimates any required values such as `ref_dist`. We will need to create a new method for our `step_percentile()` class. The minimum arguments for this are + +```r +function(object, new_data, ...) +``` + +where `object` is the updated step function that has been through the corresponding `prep()` code and `new_data` is a tibble of data to be processed. + +Here is the code to convert the new data to percentiles. The input data (`x` below) comes in as a numeric vector and the output is a vector of approximate percentiles: + +```{r} +#| label: "bake-helpers" +pctl_by_approx <- function(x, ref) { + # In case duplicates were removed, get the percentiles from + # the names of the reference object + grid <- as.numeric(gsub("%$", "", names(ref))) + approx(x = ref, y = grid, xout = x)$y/100 +} +``` + +These computations are done column-wise using `purrr::map2_dfc()` to modify the new data in-place: + +```{r} +#| label: "bake-method" +bake.step_percentile <- function(object, new_data, ...) { + ## For illustration (and not speed), we will loop through the affected variables + ## and do the computations + vars <- names(object$ref_dist) + + new_data[, vars] <- + purrr::map2_dfc(new_data[, vars], object$ref_dist, pctl_by_approx) + + ## Always convert to tibbles on the way out + tibble::as_tibble(new_data) +} +``` + +::: {.callout-note} +You need to import `recipes::prep()` and `recipes::bake()` to create your own step function in a package. +::: + +## Run the example + +Let's use the example data to make sure that it works: + +```{r} +#| label: "example" +#| eval: false +rec_obj <- + recipe(HHV ~ ., data = biomass_tr) %>% + step_percentile(ends_with("gen")) %>% + prep(training = biomass_tr) + +biomass_te %>% select(ends_with("gen")) %>% slice(1:2) +bake(rec_obj, biomass_te %>% slice(1:2), ends_with("gen")) + +# Checking to get approximate result: +mean(biomass_tr$hydrogen <= biomass_te$hydrogen[1]) +mean(biomass_tr$oxygen <= biomass_te$oxygen[1]) +``` + +The plot below shows how the original hydrogen percentiles line up with the estimated values: + +```{r} +#| label: "cdf_plot" +#| eval: false +hydrogen_values <- + bake(rec_obj, biomass_te, hydrogen) %>% + bind_cols(biomass_te %>% select(original = hydrogen)) + +ggplot(biomass_tr, aes(x = hydrogen)) + + # Plot the empirical distribution function of the + # hydrogen training set values as a black line + stat_ecdf() + + # Overlay the estimated percentiles for the new data: + geom_point(data = hydrogen_values, + aes(x = original, y = hydrogen), + col = "red", alpha = .5, cex = 2) + + labs(x = "New Hydrogen Values", y = "Percentile Based on Training Set") +``` + +These line up very nicely! + +## Custom check operations + +The process here is exactly the same as steps; the internal functions have a similar naming convention: + + * `add_check()` instead of `add_step()` + * `check()` instead of `step()`, and so on. + +It is strongly recommended that: + + 1. The operations start with `check_` (i.e. `check_range()` and `check_range_new()`) + 1. The check uses `rlang::abort(paste0(...))` when the conditions are not met + 1. The original data are returned (unaltered) by the check when the conditions are satisfied. + +## Other step methods + +There are a few other S3 methods that can be created for your step function. They are not required unless you plan on using your step in the broader tidymodels package set. + +### A print method + +If you don't add a print method for `step_percentile`, it will still print but it will be printed as a list of (potentially large) objects and look a bit ugly. The recipes package contains a helper function called `printer()` that should be useful in most cases. We are using it here for the custom print method for `step_percentile`. It requires the original terms specification and the column names this specification is evaluated to by `prep()`. For the former, our step object is structured so that the list object `ref_dist` has the names of the selected variables: + +```{r} +#| label: "print-method" +#| eval: false +print.step_percentile <- + function(x, width = max(20, options()$width - 35), ...) { + cat("Percentile transformation on ", sep = "") + printer( + # Names before prep (could be selectors) + untr_obj = x$terms, + # Names after prep: + tr_obj = names(x$ref_dist), + # Has it been prepped? + trained = x$trained, + # An estimate of how many characters to print on a line: + width = width + ) + invisible(x) + } + +# Results before `prep()`: +recipe(HHV ~ ., data = biomass_tr) %>% + step_percentile(ends_with("gen")) + +# Results after `prep()`: +rec_obj +``` + +### Methods for declaring required packages + +Some recipe steps use functions from other packages. When this is the case, the `step_*()` function should check to see if the package is installed. The function `recipes::recipes_pkg_check()` will do this. For example: + +``` +> recipes::recipes_pkg_check("some_package") +1 package is needed for this step and is not installed. (some_package). Start +a clean R session then run: install.packages("some_package") +``` + +There is an S3 method that can be used to declare what packages should be loaded when using the step. For a hypothetical step that relies on the `hypothetical` package, this might look like: + +```{r} +#| eval: false +required_pkgs.step_hypothetical <- function(x, ...) { + c("hypothetical", "myrecipespkg") +} +``` + +In this example, `myrecipespkg` is the package where the step resides (if it is in a package). + +The reason to declare what packages should be loaded is parallel processing. When parallel worker processes are created, there is heterogeneity across technologies regarding which packages are loaded. Multicore methods on macOS and Linux load all of the packages that were loaded in the main R process. However, parallel processing using psock clusters have no additional packages loaded. If the home package for a recipe step is not loaded in the worker processes, the `prep()` methods cannot be found and an error occurs. + +If this S3 method is used for your step, you can rely on this for checking the installation: + +```{r} +#| eval: false +recipes::recipes_pkg_check(required_pkgs.step_hypothetical()) +``` + +If you'd like an example of this in a package, please take a look at the [embed](https://github.com/tidymodels/embed/) or [themis](https://github.com/tidymodels/themis/) package. + +### A tidy method + +The `broom::tidy()` method is a means to return information about the step in a usable format. For our step, it would be helpful to know the reference values. + +When the recipe has been prepped, those data are in the list `ref_dist`. A small function can be used to reformat that data into a tibble. It is customary to return the main values as `value`: + +```{r} +#| label: "tidy-calcs" +#| eval: false +format_pctl <- function(x) { + tibble::tibble( + value = unname(x), + percentile = as.numeric(gsub("%$", "", names(x))) + ) +} + +# For example: +pctl_step_object <- rec_obj$steps[[1]] +pctl_step_object +format_pctl(pctl_step_object$ref_dist[["hydrogen"]]) +``` + +The tidy method could return these values for each selected column. Before `prep()`, missing values can be used as placeholders. + +```{r} +#| label: "tidy" +#| eval: false +tidy.step_percentile <- function(x, ...) { + if (is_trained(x)) { + res <- map_dfr(x$ref_dist, format_pctl, .id = "term") + } + else { + term_names <- sel2char(x$terms) + res <- + tibble( + terms = term_names, + value = rlang::na_dbl, + percentile = rlang::na_dbl + ) + } + # Always return the step id: + res$id <- x$id + res +} + +tidy(rec_obj, number = 1) +``` + +### Methods for tuning parameters + +The tune package can be used to find reasonable values of step arguments by model tuning. There are some S3 methods that are useful to define for your step. The percentile example doesn't really have any tunable parameters, so we will demonstrate using `step_poly()`, which returns a polynomial expansion of selected columns. Its function definition has the arguments: + +```{r} +#| label: "poly-args" +#| eval: false +args(step_poly) +``` + +The argument `degree` is tunable. + +To work with tune it is _helpful_ (but not required) to use an S3 method called `tunable()` to define which arguments should be tuned and how values of those arguments should be generated. + +`tunable()` takes the step object as its argument and returns a tibble with columns: + +* `name`: The name of the argument. + +* `call_info`: A list that describes how to call a function that returns a dials parameter object. + +* `source`: A character string that indicates where the tuning value comes from (i.e., a model, a recipe etc.). Here, it is just `"recipe"`. + +* `component`: A character string with more information about the source. For recipes, this is just the name of the step (e.g. `"step_poly"`). + +* `component_id`: A character string to indicate where a unique identifier is for the object. For recipes, this is just the `id` value of the step object. + +The main piece of information that requires some detail is `call_info`. This is a list column in the tibble. Each element of the list is a list that describes the package and function that can be used to create a dials parameter object. + +For example, for a nearest-neighbors `neighbors` parameter, this value is just: + +```{r} +#| label: "mtry" +#| eval: false +info <- list(pkg = "dials", fun = "neighbors") + +# FYI: how it is used under-the-hood: +new_param_call <- rlang::call2(.fn = info$fun, .ns = info$pkg) +rlang::eval_tidy(new_param_call) +``` + +For `step_poly()`, a dials object is needed that returns an integer that is the number of new columns to create. It turns out that there are a few different types of tuning parameters related to degree: + +```r +> lsf.str("package:dials", pattern = "degree") +degree : function (range = c(1, 3), trans = NULL) +degree_int : function (range = c(1L, 3L), trans = NULL) +prod_degree : function (range = c(1L, 2L), trans = NULL) +spline_degree : function (range = c(3L, 10L), trans = NULL) +``` + +Looking at the `range` values, some return doubles and others return integers. For our problem, `degree_int()` would be a good choice. + +For `step_poly()` the `tunable()` S3 method could be: + +```{r} +#| label: "tunable" +#| eval: false +tunable.step_poly <- function (x, ...) { + tibble::tibble( + name = c("degree"), + call_info = list(list(pkg = "dials", fun = "degree_int")), + source = "recipe", + component = "step_poly", + component_id = x$id + ) +} +``` + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + + diff --git a/content/learn/develop/thumbnail.png b/learn/develop/thumbnail.png similarity index 100% rename from content/learn/develop/thumbnail.png rename to learn/develop/thumbnail.png diff --git a/content/learn/develop/toolbox.svg b/learn/develop/toolbox.svg similarity index 100% rename from content/learn/develop/toolbox.svg rename to learn/develop/toolbox.svg diff --git a/learn/index.qmd b/learn/index.qmd new file mode 100644 index 00000000..fd771516 --- /dev/null +++ b/learn/index.qmd @@ -0,0 +1,20 @@ +--- +title: Learn +description: "Learn how to go farther with tidymodels in your modeling and machine learning projects." +icon_attribution: "Icons made by [Becris](https://www.flaticon.com/authors/becris) from www.flaticon.com" +listing: + - id: articles-links + categories: unnumbered + type: grid + page-size: 40 + contents: + - "*.qmd" + - "../start/models/index.qmd" + - "../start/recipes/index.qmd" + - "../start/resampling/index.qmd" + - "../start/tuning/index.qmd" + - "../start/case-study/index.qmd" +--- + +After you know [what you need to get started](/start/) with tidymodels, you can learn more and go further. Find articles here to help you solve specific problems using the tidymodels framework. + diff --git a/learn/models/coefficients/figs/glmnet-plot-1.svg b/learn/models/coefficients/figs/glmnet-plot-1.svg new file mode 100644 index 00000000..745b368e --- /dev/null +++ b/learn/models/coefficients/figs/glmnet-plot-1.svg @@ -0,0 +1,212 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Austin + + + + + + + + + + +Clark_Lake + + + + + + + + + + +Harlem + + + + + + +0.001 +0.010 +0.100 +1.000 + + + + +0.001 +0.010 +0.100 +1.000 + + + + +0.001 +0.010 +0.100 +1.000 +0 +1 +2 +3 + + + + +penalty +coefficient + +mixture + + + + +0.1 +1.0 + + diff --git a/learn/models/coefficients/figs/lm-plot-1.svg b/learn/models/coefficients/figs/lm-plot-1.svg new file mode 100644 index 00000000..72b01d66 --- /dev/null +++ b/learn/models/coefficients/figs/lm-plot-1.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-0.5 +0.0 +0.5 +1.0 +1.5 + + + + + + + + +Austin +Clark_Lake +Harlem +Coefficient + +id + + + + + + + + + + +Bootstrap1 +Bootstrap2 +Bootstrap3 +Bootstrap4 +Bootstrap5 + + diff --git a/learn/models/coefficients/index.qmd b/learn/models/coefficients/index.qmd new file mode 100644 index 00000000..e2f4b8ee --- /dev/null +++ b/learn/models/coefficients/index.qmd @@ -0,0 +1,402 @@ +--- +title: "Working with model coefficients" +categories: + - model fitting + - tidying results + - linear regression + - model tuning +type: learn-subsection +weight: 5 +description: | + Create models that use coefficients, extract them from fitted models, and visualize them. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +pkgs <- c("tidymodels", "glmnet") +library(Matrix) +library(glmnet) +``` + +## Introduction + +There are many types of statistical models with diverse kinds of structure. Some models have coefficients (a.k.a. weights) for each term in the model. Familiar examples of such models are linear or logistic regression, but more complex models (e.g. neural networks, MARS) can also have model coefficients. When we work with models that use weights or coefficients, we often want to examine the estimated coefficients. + +This article describes how to retrieve the estimated coefficients from models fit using tidymodels. `r article_req_pkgs(pkgs)` + +## Linear regression + +Let's start with a linear regression model: + +$$\hat{y} = \hat{\beta}_0 + \hat{\beta}_1x_1 + \ldots + \hat{\beta}_px_p$$ + +The $\beta$ values are the coefficients and the $x_j$ are model predictors, or features. + +Let's use the [Chicago train data](https://bookdown.org/max/FES/chicago-intro.html) where we predict the ridership at the Clark and Lake station (column name: `ridership`) with the previous ridership data 14 days prior at three of the stations. + +The data are in the modeldata package: + +```{r} +#| label: "setup-tm" +#| message: false +#| warning: false +library(tidymodels) +tidymodels_prefer() +theme_set(theme_bw()) + +data(Chicago) + +Chicago <- Chicago %>% select(ridership, Clark_Lake, Austin, Harlem) +``` + +### A single model + +Let's start by fitting only a single parsnip model object. We'll create a model specification using `linear_reg()`. + +::: {.callout-note} +The default engine is `"lm"` so no call to `set_engine()` is required. +::: + +The `fit()` function estimates the model coefficients, given a formula and data set. + + +```{r} +#| label: "lm-single" +lm_spec <- linear_reg() +lm_fit <- fit(lm_spec, ridership ~ ., data = Chicago) +lm_fit +``` + +The best way to retrieve the fitted parameters is to use the `tidy()` method. This function, in the broom package, returns the coefficients and their associated statistics in a data frame with standardized column names: + +```{r} +#| label: "lm-tidy" +tidy(lm_fit) +``` + +We'll use this function in subsequent sections. + +### Resampled or tuned models + +The tidymodels framework emphasizes the use of resampling methods to evaluate and characterize how well a model works. While time series resampling methods are appropriate for these data, we can also use the [bootstrap](https://www.tmwr.org/resampling.html#bootstrap) to resample the data. This is a standard resampling approach when evaluating the uncertainty in statistical estimates. + +We'll use five bootstrap resamples of the data to simplify the plots and output (normally, we would use a larger number of resamples for more reliable estimates). + +```{r} +#| label: "bootstraps" +set.seed(123) +bt <- bootstraps(Chicago, times = 5) +``` + +With resampling, we fit the same model to the different simulated versions of the data set produced by resampling. The tidymodels function [`fit_resamples()`](https://www.tmwr.org/resampling.html#resampling-performance) is the recommended approach for doing so. + +::: {.callout-warning} + The `fit_resamples()` function does not automatically save the model objects for each resample since these can be quite large and its main purpose is estimating performance. However, we can pass a function to `fit_resamples()` that _can_ save the model object or any other aspect of the fit. +::: + +This function takes a single argument that represents the fitted [workflow object](https://www.tmwr.org/workflows.html) (even if you don't give `fit_resamples()` a workflow). + +From this, we can extract the model fit. There are two "levels" of model objects that are available: + +* The parsnip model object, which wraps the underlying model object. We retrieve this using the `extract_fit_parsnip()` function. + +* The underlying model object (a.k.a. the engine fit) via the `extract_fit_engine()`. + +We'll use the latter option and then tidy this model object as we did in the previous section. Let's add this to the control function so that we can re-use it. + +```{r} +#| label: "lm-ctrl" +get_lm_coefs <- function(x) { + x %>% + # get the lm model object + extract_fit_engine() %>% + # transform its format + tidy() +} +tidy_ctrl <- control_grid(extract = get_lm_coefs) +``` + +This argument is then passed to `fit_resamples()`: + +```{r} +#| label: "lm-resampled" +lm_res <- + lm_spec %>% + fit_resamples(ridership ~ ., resamples = bt, control = tidy_ctrl) +lm_res +``` + +Note that there is a `.extracts` column in our resampling results. This object contains the output of our `get_lm_coefs()` function for each resample. The structure of the elements of this column is a little complex. Let's start by looking at the first element (which corresponds to the first resample): + + +```{r} +#| label: "lm-extract-ex" +lm_res$.extracts[[1]] +``` + +There is _another_ column in this element called `.extracts` that has the results of the `tidy()` function call: + +```{r} +#| label: "lm-extract-again" +lm_res$.extracts[[1]]$.extracts[[1]] +``` + +These nested columns can be flattened via the purrr `unnest()` function: + +```{r} +#| label: "lm-extract-almost" +lm_res %>% + select(id, .extracts) %>% + unnest(.extracts) +``` + +We still have a column of nested tibbles, so we can run the same command again to get the data into a more useful format: + +```{r} +#| label: "lm-extract-final" +lm_coefs <- + lm_res %>% + select(id, .extracts) %>% + unnest(.extracts) %>% + unnest(.extracts) + +lm_coefs %>% select(id, term, estimate, p.value) +``` + +That's better! Now, let's plot the model coefficients for each resample: + +```{r} +#| label: "lm-plot" +lm_coefs %>% + filter(term != "(Intercept)") %>% + ggplot(aes(x = term, y = estimate, group = id, col = id)) + + geom_hline(yintercept = 0, lty = 3) + + geom_line(alpha = 0.3, lwd = 1.2) + + labs(y = "Coefficient", x = NULL) + + theme(legend.position = "top") +``` + +There seems to be a lot of uncertainty in the coefficient for the Austin station data, but less for the other two. + +Looking at the code for unnesting the results, you may find the double-nesting structure excessive or cumbersome. However, the extraction functionality is flexible, and a simpler structure would prevent many use cases. + +## More complex: a glmnet model + +The glmnet model can fit the same linear regression model structure shown above. It uses regularization (a.k.a penalization) to estimate the model parameters. This has the benefit of shrinking the coefficients towards zero, important in situations where there are strong correlations between predictors or if some feature selection is required. Both of these cases are true for our Chicago train data set. + +There are two types of penalization that this model uses: + +* Lasso (a.k.a. $L_1$) penalties can shrink the model terms so much that they are absolute zero (i.e. their effect is entirely removed from the model). + +* Weight decay (a.k.a ridge regression or $L_2$) uses a different type of penalty that is most useful for highly correlated predictors. + +The glmnet model has two primary tuning parameters, the total amount of penalization and the mixture of the two penalty types. For example, this specification: + +```{r} +#| label: "glmnet-spec" +glmnet_spec <- + linear_reg(penalty = 0.1, mixture = 0.95) %>% + set_engine("glmnet") +``` + +has a penalty that is 95% lasso and 5% weight decay. The total amount of these two penalties is 0.1 (which is fairly high). + +::: {.callout-note} +Models with regularization require that predictors are all on the same scale. The ridership at our three stations are very different, but glmnet [automatically centers and scales the data](https://parsnip.tidymodels.org/reference/details_linear_reg_glmnet.html). You can use recipes to [center and scale your data yourself](https://recipes.tidymodels.org/reference/step_normalize.html). +::: + +Let's combine the model specification with a formula in a model `workflow()` and then fit the model to the data: + +```{r} +#| label: "glmnet-wflow" +glmnet_wflow <- + workflow() %>% + add_model(glmnet_spec) %>% + add_formula(ridership ~ .) + +glmnet_fit <- fit(glmnet_wflow, Chicago) +glmnet_fit +``` + +In this output, the term `lambda` is used to represent the penalty. + +Note that the output shows many values of the penalty despite our specification of `penalty = 0.1`. It turns out that this model fits a "path" of penalty values. Even though we are interested in a value of 0.1, we can get the model coefficients for many associated values of the penalty from the same model object. + +Let's look at two different approaches to obtaining the coefficients. Both will use the `tidy()` method. One will tidy a glmnet object and the other will tidy a tidymodels object. + +### Using glmnet penalty values + +This glmnet fit contains multiple penalty values which depend on the data set; changing the data (or the mixture amount) often produces a different set of values. For this data set, there are `r length(extract_fit_engine(glmnet_fit)$lambda)` penalties available. To get the set of penalties produced for this data set, we can extract the engine fit and tidy: + +```{r} +#| label: "glmnet-tidy" +glmnet_fit %>% + extract_fit_engine() %>% + tidy() %>% + rename(penalty = lambda) %>% # <- for consistent naming + filter(term != "(Intercept)") +``` + +This works well but, it turns out that our penalty value (0.1) is not in the list produced by the model! The underlying package has functions that use interpolation to produce coefficients for this specific value, but the `tidy()` method for glmnet objects does not use it. + +### Using specific penalty values + +If we run the `tidy()` method on the workflow or parsnip object, a different function is used that returns the coefficients for the penalty value that we specified: + +```{r} +#| label: "glmnet-tidy-parsnip" +tidy(glmnet_fit) +``` + +For any another (single) penalty, we can use an additional argument: + +```{r} +#| label: "glmnet-tidy-parsnip-alt" +tidy(glmnet_fit, penalty = 5.5620) # A value from above +``` + +The reason for having two `tidy()` methods is that, with tidymodels, the focus is on using a specific penalty value. + + +### Tuning a glmnet model + +If we know a priori acceptable values for penalty and mixture, we can use the `fit_resamples()` function as we did before with linear regression. Otherwise, we can tune those parameters with the tidymodels `tune_*()` functions. + +Let's tune our glmnet model over both parameters with this grid: + +```{r} +#| label: "glmnet-grid" +pen_vals <- 10^seq(-3, 0, length.out = 10) +grid <- crossing(penalty = pen_vals, mixture = c(0.1, 1.0)) +``` + +Here is where more glmnet-related complexity comes in: we know that each resample and each value of `mixture` will probably produce a different set of penalty values contained in the model object. _How can we look at the coefficients at the specific penalty values that we are using to tune?_ + +The approach that we suggest is to use the special `path_values` option for glmnet. Details are described in the [technical documentation about glmnet and tidymodels](https://parsnip.tidymodels.org/reference/glmnet-details.html#arguments) but in short, this parameter will assign the collection of penalty values used by each glmnet fit (regardless of the data or value of mixture). + +We can pass these as an engine argument and then update our previous workflow object: + +```{r} +#| label: "glmnet-tune" +glmnet_tune_spec <- + linear_reg(penalty = tune(), mixture = tune()) %>% + set_engine("glmnet", path_values = pen_vals) + +glmnet_wflow <- + glmnet_wflow %>% + update_model(glmnet_tune_spec) +``` + +Now we will use an extraction function similar to when we used ordinary least squares. We add an additional argument to retain coefficients that are shrunk to zero by the lasso penalty: + +```{r} +#| label: "glmnet-tuning" +get_glmnet_coefs <- function(x) { + x %>% + extract_fit_engine() %>% + tidy(return_zeros = TRUE) %>% + rename(penalty = lambda) +} +parsnip_ctrl <- control_grid(extract = get_glmnet_coefs) + +glmnet_res <- + glmnet_wflow %>% + tune_grid( + resamples = bt, + grid = grid, + control = parsnip_ctrl + ) +glmnet_res +``` + +As noted before, the elements of the main `.extracts` column have an embedded list column with the results of `get_glmnet_coefs()`: + +```{r} +#| label: "glmnet-extract-single" +glmnet_res$.extracts[[1]] %>% head() + +glmnet_res$.extracts[[1]]$.extracts[[1]] %>% head() +``` + +As before, we'll have to use a double `unnest()`. Since the penalty value is in both the top-level and lower-level `.extracts`, we'll use `select()` to get rid of the first version (but keep `mixture`): + +```{r} +#| label: "glmnet-extract-1" +#| eval: false +glmnet_res %>% + select(id, .extracts) %>% + unnest(.extracts) %>% + select(id, mixture, .extracts) %>% # <- removes the first penalty column + unnest(.extracts) +``` + +But wait! We know that each glmnet fit contains all of the coefficients. This means, for a specific resample and value of `mixture`, the results are the same: + +```{r} +#| label: "glmnet-extract-dups" +all.equal( + # First bootstrap, first `mixture`, first `penalty` + glmnet_res$.extracts[[1]]$.extracts[[1]], + # First bootstrap, first `mixture`, second `penalty` + glmnet_res$.extracts[[1]]$.extracts[[2]] +) +``` + +For this reason, we'll add a `slice(1)` when grouping by `id` and `mixture`. This will get rid of the replicated results. + +```{r} +#| label: "glmnet-extract-final" +glmnet_coefs <- + glmnet_res %>% + select(id, .extracts) %>% + unnest(.extracts) %>% + select(id, mixture, .extracts) %>% + group_by(id, mixture) %>% # ┐ + slice(1) %>% # │ Remove the redundant results + ungroup() %>% # ┘ + unnest(.extracts) + +glmnet_coefs %>% + select(id, penalty, mixture, term, estimate) %>% + filter(term != "(Intercept)") +``` + +Now we have the coefficients. Let's look at how they behave as more regularization is used: + +```{r} +#| label: "glmnet-plot" +#| fig-height: 4 +#| fig-width: 8.5 +glmnet_coefs %>% + filter(term != "(Intercept)") %>% + mutate(mixture = format(mixture)) %>% + ggplot(aes(x = penalty, y = estimate, col = mixture, groups = id)) + + geom_hline(yintercept = 0, lty = 3) + + geom_line(alpha = 0.5, lwd = 1.2) + + facet_wrap(~ term) + + scale_x_log10() + + scale_color_brewer(palette = "Accent") + + labs(y = "coefficient") + + theme(legend.position = "top") +``` + +Notice a couple of things: + +* With a pure lasso model (i.e., `mixture = 1`), the Austin station predictor is selected out in each resample. With a mixture of both penalties, its influence increases. Also, as the penalty increases, the uncertainty in this coefficient decreases. + +* The Harlem predictor is either quickly selected out of the model or goes from negative to positive. + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/content/learn/models/linear-regression.svg b/learn/models/linear-regression.svg similarity index 100% rename from content/learn/models/linear-regression.svg rename to learn/models/linear-regression.svg diff --git a/learn/models/parsnip-nnet/figs/biv-boundary-1.svg b/learn/models/parsnip-nnet/figs/biv-boundary-1.svg new file mode 100644 index 00000000..34fda99a --- /dev/null +++ b/learn/models/parsnip-nnet/figs/biv-boundary-1.svg @@ -0,0 +1,587 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-2.5 +0.0 +2.5 +5.0 + + + + + + + + +-2 +0 +2 +4 +A +B + +class + + + + +Class1 +Class2 + + diff --git a/learn/models/parsnip-nnet/figs/biv-plot-1.svg b/learn/models/parsnip-nnet/figs/biv-plot-1.svg new file mode 100644 index 00000000..24c122c9 --- /dev/null +++ b/learn/models/parsnip-nnet/figs/biv-plot-1.svg @@ -0,0 +1,2089 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-5.0 +-2.5 +0.0 +2.5 +5.0 + + + + + + + + + +-2 +0 +2 +4 +A +B + +class + + + + +Class1 +Class2 + + diff --git a/learn/models/parsnip-nnet/index.qmd b/learn/models/parsnip-nnet/index.qmd new file mode 100644 index 00000000..6a045d38 --- /dev/null +++ b/learn/models/parsnip-nnet/index.qmd @@ -0,0 +1,150 @@ +--- +title: "Classification models using a neural network" +categories: + - model fitting + - torch + - neural networks +type: learn-subsection +weight: 2 +description: | + Train a classification model and evaluate its performance. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +pkgs <- c("tidymodels", "brulee", "AppliedPredictiveModeling") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + + +## Introduction + +`r article_req_pkgs(pkgs)` You will also need the python torch library installed (see `?torch::install_torch()`). + +We can create classification models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict categorical quantities or class labels. Here, let's fit a single classification model using a neural network and evaluate using a validation set. While the [tune](https://tune.tidymodels.org/) package has functionality to also do this, the parsnip package is the center of attention in this article so that we can better understand its usage. + +## Fitting a neural network + + +Let's fit a model to a small, two predictor classification data set. The data are in the modeldata package (part of tidymodels) and have been split into training, validation, and test data sets. In this analysis, the test set is left untouched; this article tries to emulate a good data usage methodology where the test set would only be evaluated once at the end after a variety of models have been considered. + + +```{r} +#| label: "biv--split" +library(AppliedPredictiveModeling) + +set.seed(321) +cls_train <- quadBoundaryFunc(2000) %>% select(A = X1, B = X2, class) +cls_val <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class) +cls_test <- quadBoundaryFunc( 500) %>% select(A = X1, B = X2, class) +``` + +A plot of the data shows two right-skewed predictors: + +```{r} +#| label: "biv-plot" +#| fig-width: 6 +#| fig-height: 6.1 +ggplot(cls_train, aes(x = A, y = B, col = class)) + + geom_point(alpha = 1 / 4, cex = 3) + + coord_fixed() +``` + +Let's use a single hidden layer neural network to predict the outcome. To do this, we transform the predictor columns to be more symmetric (via the `step_BoxCox()` function) and on a common scale (using `step_normalize()`). We can use [recipes](https://recipes.tidymodels.org/) to do so: + +```{r} +#| label: "biv--proc" +biv_rec <- + recipe(class ~ ., data = cls_train) %>% + step_normalize(all_predictors()) +``` + +This recipe is not directly executed; the steps will be estimated when the model is fit. + +We can use the brulee package to fit a model with 5 hidden units and a 10% dropout rate, to regularize the model: + +```{r} +#| label: "biv-nnet" +nnet_spec <- + mlp(epochs = 1000, hidden_units = 10, penalty = 0.01, learn_rate = 0.1) %>% + set_engine("brulee", validation = 0) %>% + set_mode("classification") + +nnet_wflow <- + biv_rec %>% + workflow(nnet_spec) + +set.seed(987) +nnet_fit <- fit(nnet_wflow, cls_train) +nnet_fit %>% extract_fit_engine() +``` + +## Model performance + +In parsnip, the `predict()` function can be used to characterize performance on the validation set. Since parsnip always produces tibble outputs, these can just be column bound to the original data: + +```{r} +#| label: "biv--perf" +val_results <- + cls_val %>% + bind_cols( + predict(nnet_fit, new_data = cls_val), + predict(nnet_fit, new_data = cls_val, type = "prob") + ) +val_results %>% slice(1:5) + +val_results %>% roc_auc(truth = class, .pred_Class1) + +val_results %>% accuracy(truth = class, .pred_class) + +val_results %>% conf_mat(truth = class, .pred_class) +``` + +Let's also create a grid to get a visual sense of the class boundary for the test set. + +```{r} +#| label: "biv-boundary" +#| fig-width: 6 +#| fig-height: 6.1 +a_rng <- range(cls_train$A) +b_rng <- range(cls_train$B) +x_grid <- + expand.grid(A = seq(a_rng[1], a_rng[2], length.out = 100), + B = seq(b_rng[1], b_rng[2], length.out = 100)) + + +# Make predictions using the transformed predictors but +# attach them to the predictors in the original units: +x_grid <- + x_grid %>% + bind_cols(predict(nnet_fit, x_grid, type = "prob")) + +ggplot(x_grid, aes(x = A, y = B)) + + geom_point(data = cls_test, aes(col = class), alpha = 1 / 2, cex = 3) + + geom_contour(aes(z = .pred_Class1), breaks = .5, col = "black", linewidth = 1) + + coord_fixed() +``` + + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/learn/models/parsnip-ranger-glmnet/figs/glmn-pred-1.svg b/learn/models/parsnip-ranger-glmnet/figs/glmn-pred-1.svg new file mode 100644 index 00000000..66600a8d --- /dev/null +++ b/learn/models/parsnip-ranger-glmnet/figs/glmn-pred-1.svg @@ -0,0 +1,1595 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +glmnet + + + + + + + + + + +random forest + + + + + +5.0 +5.5 +6.0 + + + +5.0 +5.5 +6.0 +4.5 +5.0 +5.5 + + + +prediction +Sale_Price + + diff --git a/learn/models/parsnip-ranger-glmnet/index.qmd b/learn/models/parsnip-ranger-glmnet/index.qmd new file mode 100644 index 00000000..5f8f624d --- /dev/null +++ b/learn/models/parsnip-ranger-glmnet/index.qmd @@ -0,0 +1,246 @@ +--- +title: "Regression models two ways" +categories: + - model fitting + - random forests + - linear regression +type: learn-subsection +weight: 1 +description: | + Create and train different kinds of regression models with different computational engines. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +library(ranger) +library(randomForest) +library(glmnet) + +preds <- c("Longitude", "Latitude", "Lot_Area", "Neighborhood", "Year_Sold") +pred_names <- paste0("`", preds, "`") + +pkgs <- c("tidymodels", "ranger", "randomForest", "glmnet") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + + +## Introduction + +`r article_req_pkgs(pkgs)` + +We can create regression models with the tidymodels package [parsnip](https://parsnip.tidymodels.org/) to predict continuous or numeric quantities. Here, let's first fit a random forest model, which does _not_ require all numeric input (see discussion [here](https://bookdown.org/max/FES/categorical-trees.html)) and discuss how to use `fit()` and `fit_xy()`, as well as _data descriptors_. + +Second, let's fit a regularized linear regression model to demonstrate how to move between different types of models using parsnip. + +## The Ames housing data + +We'll use the Ames housing data set to demonstrate how to create regression models using parsnip. First, set up the data set and create a simple training/test set split: + +```{r} +#| label: "ames-split" +library(tidymodels) + +data(ames) + +set.seed(4595) +data_split <- initial_split(ames, strata = "Sale_Price", prop = 0.75) + +ames_train <- training(data_split) +ames_test <- testing(data_split) +``` + +The use of the test set here is _only for illustration_; normally in a data analysis these data would be saved to the very end after many models have been evaluated. + +## Random forest + +We'll start by fitting a random forest model to a small set of parameters. Let's create a model with the predictors `r knitr::combine_words(pred_names)`. A simple random forest model can be specified via: + +```{r} +#| label: "rf-basic" +rf_defaults <- rand_forest(mode = "regression") +rf_defaults +``` + +The model will be fit with the ranger package by default. Since we didn't add any extra arguments to `fit`, _many_ of the arguments will be set to their defaults from the function `ranger::ranger()`. The help pages for the model function describe the default parameters and you can also use the `translate()` function to check out such details. + +The parsnip package provides two different interfaces to fit a model: + +- the formula interface (`fit()`), and +- the non-formula interface (`fit_xy()`). + +Let's start with the non-formula interface: + + +```{r} +#| label: "rf-basic-xy" +preds <- c("Longitude", "Latitude", "Lot_Area", "Neighborhood", "Year_Sold") + +rf_xy_fit <- + rf_defaults %>% + set_engine("ranger") %>% + fit_xy( + x = ames_train[, preds], + y = log10(ames_train$Sale_Price) + ) + +rf_xy_fit +``` + +The non-formula interface doesn't do anything to the predictors before passing them to the underlying model function. This particular model does _not_ require indicator variables (sometimes called "dummy variables") to be created prior to fitting the model. Note that the output shows "Number of independent variables: 5". + +For regression models, we can use the basic `predict()` method, which returns a tibble with a column named `.pred`: + +```{r} +#| label: "rf-basic-xy-pred" +test_results <- + ames_test %>% + select(Sale_Price) %>% + mutate(Sale_Price = log10(Sale_Price)) %>% + bind_cols( + predict(rf_xy_fit, new_data = ames_test[, preds]) + ) +test_results %>% slice(1:5) + +# summarize performance +test_results %>% metrics(truth = Sale_Price, estimate = .pred) +``` + +Note that: + + * If the model required indicator variables, we would have to create them manually prior to using `fit()` (perhaps using the recipes package). + * We had to manually log the outcome prior to modeling. + +Now, for illustration, let's use the formula method using some new parameter values: + +```{r} +#| label: "rf-basic-form" +rand_forest(mode = "regression", mtry = 3, trees = 1000) %>% + set_engine("ranger") %>% + fit( + log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, + data = ames_train + ) +``` + +Suppose that we would like to use the randomForest package instead of ranger. To do so, the only part of the syntax that needs to change is the `set_engine()` argument: + + +```{r} +#| label: "rf-rf" +rand_forest(mode = "regression", mtry = 3, trees = 1000) %>% + set_engine("randomForest") %>% + fit( + log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, + data = ames_train + ) +``` + +Look at the formula code that was printed out; one function uses the argument name `ntree` and the other uses `num.trees`. The parsnip models don't require you to know the specific names of the main arguments. + +Now suppose that we want to modify the value of `mtry` based on the number of predictors in the data. Usually, a good default value is `floor(sqrt(num_predictors))` but a pure bagging model requires an `mtry` value equal to the total number of parameters. There may be cases where you may not know how many predictors are going to be present when the model will be fit (perhaps due to the generation of indicator variables or a variable filter) so this might be difficult to know exactly ahead of time when you write your code. + +When the model it being fit by parsnip, [_data descriptors_](https://parsnip.tidymodels.org/reference/descriptors.html) are made available. These attempt to let you know what you will have available when the model is fit. When a model object is created (say using `rand_forest()`), the values of the arguments that you give it are _immediately evaluated_ unless you delay them. To delay the evaluation of any argument, you can used `rlang::expr()` to make an expression. + +Two relevant data descriptors for our example model are: + + * `.preds()`: the number of predictor _variables_ in the data set that are associated with the predictors **prior to dummy variable creation**. + * `.cols()`: the number of predictor _columns_ after dummy variables (or other encodings) are created. + +Since ranger won't create indicator values, `.preds()` would be appropriate for `mtry` for a bagging model. + +For example, let's use an expression with the `.preds()` descriptor to fit a bagging model: + +```{r} +#| label: "bagged" +rand_forest(mode = "regression", mtry = .preds(), trees = 1000) %>% + set_engine("ranger") %>% + fit( + log10(Sale_Price) ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, + data = ames_train + ) +``` + + +## Regularized regression + +A linear model might work for this data set as well. We can use the `linear_reg()` parsnip model. There are two engines that can perform regularization/penalization, the glmnet and sparklyr packages. Let's use the former here. The glmnet package only implements a non-formula method, but parsnip will allow either one to be used. + +When regularization is used, the predictors should first be centered and scaled before being passed to the model. The formula method won't do that automatically so we will need to do this ourselves. We'll use the [recipes](https://recipes.tidymodels.org/) package for these steps. + +```{r} +#| label: "glmn-form" +norm_recipe <- + recipe( + Sale_Price ~ Longitude + Latitude + Lot_Area + Neighborhood + Year_Sold, + data = ames_train + ) %>% + step_other(Neighborhood) %>% + step_dummy(all_nominal()) %>% + step_center(all_predictors()) %>% + step_scale(all_predictors()) %>% + step_log(Sale_Price, base = 10) %>% + # estimate the means and standard deviations + prep(training = ames_train, retain = TRUE) + +# Now let's fit the model using the processed version of the data + +glmn_fit <- + linear_reg(penalty = 0.001, mixture = 0.5) %>% + set_engine("glmnet") %>% + fit(Sale_Price ~ ., data = bake(norm_recipe, new_data = NULL)) +glmn_fit +``` + +If `penalty` were not specified, all of the `lambda` values would be computed. + +To get the predictions for this specific value of `lambda` (aka `penalty`): + +```{r} +#| label: "glmn-pred" +# First, get the processed version of the test set predictors: +test_normalized <- bake(norm_recipe, new_data = ames_test, all_predictors()) + +test_results <- + test_results %>% + rename(`random forest` = .pred) %>% + bind_cols( + predict(glmn_fit, new_data = test_normalized) %>% + rename(glmnet = .pred) + ) +test_results + +test_results %>% metrics(truth = Sale_Price, estimate = glmnet) + +test_results %>% + gather(model, prediction, -Sale_Price) %>% + ggplot(aes(x = prediction, y = Sale_Price)) + + geom_abline(col = "green", lty = 2) + + geom_point(alpha = .4) + + facet_wrap(~model) + + coord_fixed() +``` + +This final plot compares the performance of the random forest and regularized regression models. + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/learn/models/pls/figs/plot-1.svg b/learn/models/pls/figs/plot-1.svg new file mode 100644 index 00000000..8481b4f5 --- /dev/null +++ b/learn/models/pls/figs/plot-1.svg @@ -0,0 +1,150 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.25 +0.50 +0.75 +1.00 + + + + + + + +4 +8 +12 +components +proportion + +source + + + + + + + + + + + + +fat +predictors +protein +water + + diff --git a/learn/models/pls/index.qmd b/learn/models/pls/index.qmd new file mode 100644 index 00000000..799a2d08 --- /dev/null +++ b/learn/models/pls/index.qmd @@ -0,0 +1,202 @@ +--- +title: "Multivariate analysis using partial least squares" +categories: + - pre-processing + - multivariate analysis + - partial least squares +type: learn-subsection +weight: 6 +description: | + Build and fit a predictive model with more than one outcome. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(pls) +library(tidymodels) +library(sessioninfo) +pkgs <- c("modeldata", "pls", "tidymodels") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + + +## Introduction + +`r article_req_pkgs(pkgs)` + +"Multivariate analysis" usually refers to multiple _outcomes_ being modeled, analyzed, and/or predicted. There are multivariate versions of many common statistical tools. For example, suppose there was a data set with columns `y1` and `y2` representing two outcomes to be predicted. The `lm()` function would look something like: + +```{r} +#| label: "lm" +#| eval: false +lm(cbind(y1, y2) ~ ., data = dat) +``` + +This `cbind()` call is pretty awkward and is a consequence of how the traditional formula infrastructure works. The recipes package is a lot easier to work with! This article demonstrates how to model multiple outcomes. + +The data that we'll use has three outcomes. From `?modeldata::meats`: + +> "These data are recorded on a Tecator Infratec Food and Feed Analyzer working in the wavelength range 850 - 1050 nm by the Near Infrared Transmission (NIT) principle. Each sample contains finely chopped pure meat with different moisture, fat and protein contents. + +> "For each meat sample the data consists of a 100 channel spectrum of absorbances and the contents of moisture (water), fat and protein. The absorbance is `-log10` of the transmittance measured by the spectrometer. The three contents, measured in percent, are determined by analytic chemistry." + +The goal is to predict the proportion of the three substances using the chemistry test. There can often be a high degree of between-variable correlations in predictors, and that is certainly the case here. + +To start, let's take the two data matrices (called `endpoints` and `absorp`) and bind them together in a data frame: + +```{r} +#| label: "data" +library(modeldata) +data(meats) +``` + +The three _outcomes_ have fairly high correlations also. + +## Preprocessing the data + +If the outcomes can be predicted using a linear model, partial least squares (PLS) is an ideal method. PLS models the data as a function of a set of unobserved _latent_ variables that are derived in a manner similar to principal component analysis (PCA). + +PLS, unlike PCA, also incorporates the outcome data when creating the PLS components. Like PCA, it tries to maximize the variance of the predictors that are explained by the components but it also tries to simultaneously maximize the correlation between those components and the outcomes. In this way, PLS _chases_ variation of the predictors and outcomes. + +Since we are working with variances and covariances, we need to standardize the data. The recipe will center and scale all of the variables. + +Many base R functions that deal with multivariate outcomes using a formula require the use of `cbind()` on the left-hand side of the formula to work with the traditional formula methods. In tidymodels, recipes do not; the outcomes can be symbolically "added" together on the left-hand side: + +```{r} +#| label: "recipe" +norm_rec <- + recipe(water + fat + protein ~ ., data = meats) %>% + step_normalize(everything()) +``` + +Before we can finalize the PLS model, the number of PLS components to retain must be determined. This can be done using performance metrics such as the root mean squared error. However, we can also calculate the proportion of variance explained by the components for the _predictors and each of the outcomes_. This allows an informed choice to be made based on the level of evidence that the situation requires. + +Since the data set isn't large, let's use resampling to measure these proportions. With ten repeats of 10-fold cross-validation, we build the PLS model on 90% of the data and evaluate on the heldout 10%. For each of the 100 models, we extract and save the proportions. + +The folds can be created using the [rsample](https://rsample.tidymodels.org/) package and the recipe can be estimated for each resample using the [`prepper()`](https://rsample.tidymodels.org/reference/prepper.html) function: + +```{r} +#| label: "cv" +set.seed(57343) +folds <- vfold_cv(meats, repeats = 10) + +folds <- + folds %>% + mutate(recipes = map(splits, prepper, recipe = norm_rec)) +``` + +## Partial least squares + +The complicated parts for moving forward are: + +1. Formatting the predictors and outcomes into the format that the pls package requires, and +2. Estimating the proportions. + +For the first part, the standardized outcomes and predictors need to be formatted into two separate matrices. Since we used `retain = TRUE` when prepping the recipes, we can `bake()` with `new_data = NULl` to get the processed data back out. To save the data as a matrix, the option `composition = "matrix"` will avoid saving the data as tibbles and use the required format. + +The pls package expects a simple formula to specify the model, but each side of the formula should _represent a matrix_. In other words, we need a data set with two columns where each column is a matrix. The secret to doing this is to "protect" the two matrices using `I()` when adding them to the data frame. + +The calculation for the proportion of variance explained is straightforward for the predictors; the function `pls::explvar()` will compute that. For the outcomes, the process is more complicated. A ready-made function to compute these is not obvious but there is some code inside of the summary function to do the computation (see below). + +The function `get_var_explained()` shown here will do all these computations and return a data frame with columns `components`, `source` (for the predictors, water, etc), and the `proportion` of variance that is explained by the components. + + +```{r} +#| label: "var-explained" +library(pls) + +get_var_explained <- function(recipe, ...) { + + # Extract the predictors and outcomes into their own matrices + y_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_outcomes()) + x_mat <- bake(recipe, new_data = NULL, composition = "matrix", all_predictors()) + + # The pls package prefers the data in a data frame where the outcome + # and predictors are in _matrices_. To make sure this is formatted + # properly, use the `I()` function to inhibit `data.frame()` from making + # all the individual columns. `pls_format` should have two columns. + pls_format <- data.frame( + endpoints = I(y_mat), + measurements = I(x_mat) + ) + # Fit the model + mod <- plsr(endpoints ~ measurements, data = pls_format) + + # Get the proportion of the predictor variance that is explained + # by the model for different number of components. + xve <- explvar(mod)/100 + + # To do the same for the outcome, it is more complex. This code + # was extracted from pls:::summary.mvr. + explained <- + drop(pls::R2(mod, estimate = "train", intercept = FALSE)$val) %>% + # transpose so that components are in rows + t() %>% + as_tibble() %>% + # Add the predictor proportions + mutate(predictors = cumsum(xve) %>% as.vector(), + components = seq_along(xve)) %>% + # Put into a tidy format that is tall + pivot_longer( + cols = c(-components), + names_to = "source", + values_to = "proportion" + ) +} +``` + +We compute this data frame for each resample and save the results in the different columns. + +```{r} +#| label: "get-estimates" +folds <- + folds %>% + mutate(var = map(recipes, get_var_explained), + var = unname(var)) +``` + +To extract and aggregate these data, simple row binding can be used to stack the data vertically. Most of the action happens in the first 15 components so let's filter the data and compute the _average_ proportion. + +```{r} +#| label: "collapse-and-average" +variance_data <- + bind_rows(folds[["var"]]) %>% + filter(components <= 15) %>% + group_by(components, source) %>% + summarize(proportion = mean(proportion)) +``` + +The plot below shows that, if the protein measurement is important, you might require 10 or so components to achieve a good representation of that outcome. Note that the predictor variance is captured extremely well using a single component. This is due to the high degree of correlation in those data. + +```{r} +#| label: "plot" +#| fig-width: 6 +#| fig-height: 4.25 +#| out-width: "100%" +ggplot(variance_data, aes(x = components, y = proportion, col = source)) + + geom_line(alpha = 0.5, size = 1.2) + + geom_point() +``` + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/learn/models/sub-sampling/figs/merge-metrics-1.svg b/learn/models/sub-sampling/figs/merge-metrics-1.svg new file mode 100644 index 00000000..97d00373 --- /dev/null +++ b/learn/models/sub-sampling/figs/merge-metrics-1.svg @@ -0,0 +1,215 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +j_index + + + + + + + + + + +roc_auc + + + + +no_sampling +rose + + +no_sampling +rose +0.85 +0.90 +0.95 +1.00 + + + + +0.00 +0.25 +0.50 +0.75 + + + + +sampling +.estimate + + diff --git a/learn/models/sub-sampling/index.qmd b/learn/models/sub-sampling/index.qmd new file mode 100644 index 00000000..75d6c412 --- /dev/null +++ b/learn/models/sub-sampling/index.qmd @@ -0,0 +1,197 @@ +--- +title: "Subsampling for class imbalances" +categories: + - model fitting + - pre-processing + - class imbalances + - discriminant analysis +type: learn-subsection +weight: 3 +description: | + Improve model performance in imbalanced data sets through undersampling or oversampling. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(readr) +library(klaR) +library(tidymodels) +library(discrim) +library(themis) +library(ROSE) + +pkgs <- c("tidymodels", "klaR", "themis", "discrim", "readr", "ROSE") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + + +## Introduction + +`r article_req_pkgs(pkgs)` + +Subsampling a training set, either undersampling or oversampling the appropriate class or classes, can be a helpful approach to dealing with classification data where one or more classes occur very infrequently. In such a situation (without compensating for it), most models will overfit to the majority class and produce very good statistics for the class containing the frequently occurring classes while the minority classes have poor performance. + +This article describes subsampling for dealing with class imbalances. For better understanding, some knowledge of classification metrics like sensitivity, specificity, and receiver operating characteristic curves is required. See Section 3.2.2 in [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/measuring-performance.html) for more information on these metrics. + +## Simulated data + +Consider a two-class problem where the first class has a very low rate of occurrence. The data were simulated and can be imported into R using the code below: + +```{r} +#| label: "load-data" +#| message: false +imbal_data <- + readr::read_csv("https://bit.ly/imbal_data") %>% + mutate(Class = factor(Class)) +dim(imbal_data) +table(imbal_data$Class) +``` + +If "Class1" is the event of interest, it is very likely that a classification model would be able to achieve very good _specificity_ since almost all of the data are of the second class. _Sensitivity_, however, would likely be poor since the models will optimize accuracy (or other loss functions) by predicting everything to be the majority class. + +One result of class imbalance when there are two classes is that the default probability cutoff of 50% is inappropriate; a different cutoff that is more extreme might be able to achieve good performance. + +## Subsampling the data + +One way to alleviate this issue is to _subsample_ the data. There are a number of ways to do this but the most simple one is to _sample down_ (undersample) the majority class data until it occurs with the same frequency as the minority class. While it may seem counterintuitive, throwing out a large percentage of your data can be effective at producing a useful model that can recognize both the majority and minority classes. In some cases, this even means that the overall performance of the model is better (e.g. improved area under the ROC curve). However, subsampling almost always produces models that are _better calibrated_, meaning that the distributions of the class probabilities are more well behaved. As a result, the default 50% cutoff is much more likely to produce better sensitivity and specificity values than they would otherwise. + +Let's explore subsampling using `themis::step_rose()` in a recipe for the simulated data. It uses the ROSE (random over sampling examples) method from [Menardi, G. and Torelli, N. (2014)](https://scholar.google.com/scholar?hl=en&q=%22training+and+assessing+classification+rules+with+imbalanced+data%22). This is an example of an oversampling strategy, rather than undersampling. + +In terms of workflow: + + * It is extremely important that subsampling occurs _inside of resampling_. Otherwise, the resampling process can produce [poor estimates of model performance](https://topepo.github.io/caret/subsampling-for-class-imbalances.html#resampling). + * The subsampling process should only be applied to the analysis set. The assessment set should reflect the event rates seen "in the wild" and, for this reason, the `skip` argument to `step_downsample()` and other subsampling recipes steps has a default of `TRUE`. + +Here is a simple recipe implementing oversampling: + +```{r} +#| label: "rec" +library(tidymodels) +library(themis) +imbal_rec <- + recipe(Class ~ ., data = imbal_data) %>% + step_rose(Class) +``` + +For a model, let's use a [quadratic discriminant analysis](https://en.wikipedia.org/wiki/Quadratic_classifier#Quadratic_discriminant_analysis) (QDA) model. From the discrim package, this model can be specified using: + +```{r} +#| label: "qda" +library(discrim) +qda_mod <- + discrim_regularized(frac_common_cov = 0, frac_identity = 0) %>% + set_engine("klaR") +``` + +To keep these objects bound together, they can be combined in a [workflow](https://workflows.tidymodels.org/): + +```{r} +#| label: "wflw" +qda_rose_wflw <- + workflow() %>% + add_model(qda_mod) %>% + add_recipe(imbal_rec) +qda_rose_wflw +``` + +## Model performance + +Stratified, repeated 10-fold cross-validation is used to resample the model: + +```{r} +#| label: "cv" +set.seed(5732) +cv_folds <- vfold_cv(imbal_data, strata = "Class", repeats = 5) +``` + +To measure model performance, let's use two metrics: + + * The area under the [ROC curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) is an overall assessment of performance across _all_ cutoffs. Values near one indicate very good results while values near 0.5 would imply that the model is very poor. + * The _J_ index (a.k.a. [Youden's _J_](https://en.wikipedia.org/wiki/Youden%27s_J_statistic) statistic) is `sensitivity + specificity - 1`. Values near one are once again best. + +If a model is poorly calibrated, the ROC curve value might not show diminished performance. However, the _J_ index would be lower for models with pathological distributions for the class probabilities. The yardstick package will be used to compute these metrics. + +```{r} +#| label: "metrics" +cls_metrics <- metric_set(roc_auc, j_index) +``` + +Now, we train the models and generate the results using `tune::fit_resamples()`: + +```{r} +#| label: "resample-rose" +#| message: false +set.seed(2180) +qda_rose_res <- fit_resamples( + qda_rose_wflw, + resamples = cv_folds, + metrics = cls_metrics +) + +collect_metrics(qda_rose_res) +``` + +What do the results look like without using ROSE? We can create another workflow and fit the QDA model along the same resamples: + +```{r} +#| label: "qda-only" +qda_wflw <- + workflow() %>% + add_model(qda_mod) %>% + add_formula(Class ~ .) + +set.seed(2180) +qda_only_res <- fit_resamples(qda_wflw, resamples = cv_folds, metrics = cls_metrics) +collect_metrics(qda_only_res) +``` + +It looks like ROSE helped a lot, especially with the J-index. Class imbalance sampling methods tend to greatly improve metrics based on the hard class predictions (i.e., the categorical predictions) because the default cutoff tends to be a better balance of sensitivity and specificity. + +Let's plot the metrics for each resample to see how the individual results changed. + +```{r} +#| label: "merge-metrics" +no_sampling <- + qda_only_res %>% + collect_metrics(summarize = FALSE) %>% + dplyr::select(-.estimator) %>% + mutate(sampling = "no_sampling") + +with_sampling <- + qda_rose_res %>% + collect_metrics(summarize = FALSE) %>% + dplyr::select(-.estimator) %>% + mutate(sampling = "rose") + +bind_rows(no_sampling, with_sampling) %>% + mutate(label = paste(id2, id)) %>% + ggplot(aes(x = sampling, y = .estimate, group = label)) + + geom_line(alpha = .4) + + facet_wrap(~ .metric, scales = "free_y") +``` + +This visually demonstrates that the subsampling mostly affects metrics that use the hard class predictions. + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/content/learn/models/thumbnail.png b/learn/models/thumbnail.png similarity index 100% rename from content/learn/models/thumbnail.png rename to learn/models/thumbnail.png diff --git a/learn/models/time-series/figs/plot-1.svg b/learn/models/time-series/figs/plot-1.svg new file mode 100644 index 00000000..ba6b7724 --- /dev/null +++ b/learn/models/time-series/figs/plot-1.svg @@ -0,0 +1,200 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +3 +4 +5 + + + + + + + + +2012 +2013 +2014 +2015 +2016 +start_date +MAPE + +error + + + + + + +extrapolation +interpolation + + diff --git a/learn/models/time-series/index.qmd b/learn/models/time-series/index.qmd new file mode 100644 index 00000000..995b41ef --- /dev/null +++ b/learn/models/time-series/index.qmd @@ -0,0 +1,227 @@ +--- +title: "Modeling time series with tidy resampling" +categories: + - model fitting + - time series +type: learn-subsection +weight: 4 +description: | + Calculate performance estimates for time series forecasts using resampling. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(timetk) +library(forecast) +library(tidymodels) +library(sweep) +library(zoo) +pkgs <- c("tidymodels", "timetk", "forecast", "sweep", "zoo") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + + +## Introduction + +`r article_req_pkgs(pkgs)` + +"[Demo Week: Tidy Forecasting with sweep](https://www.business-science.io/code-tools/2017/10/25/demo_week_sweep.html)" is an excellent article that uses tidy methods with time series. This article uses their analysis with rsample to find performance estimates for future observations using [rolling forecast origin resampling](https://robjhyndman.com/hyndsight/crossvalidation/). + +## Example data + +The data for this article are sales of alcoholic beverages originally from [the Federal Reserve Bank of St. Louis website](https://fred.stlouisfed.org/series/S4248SM144NCEN). + +```{r} +#| label: "read-data" +library(tidymodels) +library(modeldata) +data("drinks") +glimpse(drinks) +``` + +Each row represents one month of sales (in millions of US dollars). + +## Time series resampling + +Suppose that we need predictions for one year ahead and our model should use the most recent data from the last 20 years. To set up this resampling scheme: + +```{r} +#| label: "rof" +roll_rs <- rolling_origin( + drinks, + initial = 12 * 20, + assess = 12, + cumulative = FALSE + ) + +nrow(roll_rs) + +roll_rs +``` + +Each `split` element contains the information about that resample: + +```{r} +#| label: "split" +roll_rs$splits[[1]] +``` + +For plotting, let's index each split by the first day of the assessment set: + +```{r} +#| label: "labels" +get_date <- function(x) { + min(assessment(x)$date) +} + +start_date <- map(roll_rs$splits, get_date) +roll_rs$start_date <- do.call("c", start_date) +head(roll_rs$start_date) +``` + +This resampling scheme has `r nrow(roll_rs)` splits of the data so that there will be `r nrow(roll_rs)` ARIMA models that are fit. To create the models, we use the `auto.arima()` function from the forecast package. The rsample functions `analysis()` and `assessment()` return a data frame, so another step converts the data to a `ts` object called `mod_dat` using a function in the timetk package. + +```{r} +#| label: "model-fun" +library(forecast) # for `auto.arima` +library(timetk) # for `tk_ts` +library(zoo) # for `as.yearmon` + +fit_model <- function(x, ...) { + # suggested by Matt Dancho: + x %>% + analysis() %>% + # Since the first day changes over resamples, adjust it + # based on the first date value in the data frame + tk_ts(start = .$date[[1]] %>% as.yearmon(), + frequency = 12, + silent = TRUE) %>% + auto.arima(...) +} +``` + +Save each model in a new column: + +```{r} +#| label: "model-fit" +#| warning: false +#| message: false +roll_rs$arima <- map(roll_rs$splits, fit_model) + +# For example: +roll_rs$arima[[1]] +``` + +(There are some warnings produced by these regarding extra columns in the data that can be ignored.) + +## Model performance + +Using the model fits, let's measure performance in two ways: + + * _Interpolation_ error will measure how well the model fits to the data that were used to create the model. This is most likely optimistic since no holdout method is used. + * _Extrapolation_ or _forecast_ error evaluates the performance of the model on the data from the following year (that were not used in the model fit). + +In each case, the mean absolute percent error (MAPE) is the statistic used to characterize the model fits. The interpolation error can be computed from the `Arima` object. To make things easy, let's use the sweep package's `sw_glance()` function: + +```{r} +#| label: "interp" +library(sweep) + +roll_rs$interpolation <- map_dbl( + roll_rs$arima, + function(x) + sw_glance(x)[["MAPE"]] + ) + +summary(roll_rs$interpolation) +``` + +For the extrapolation error, the model and split objects are required. Using these: + +```{r} +#| label: "extrap" +get_extrap <- function(split, mod) { + n <- nrow(assessment(split)) + # Get assessment data + pred_dat <- assessment(split) %>% + mutate( + pred = as.vector(forecast(mod, h = n)$mean), + pct_error = ( S4248SM144NCEN - pred ) / S4248SM144NCEN * 100 + ) + mean(abs(pred_dat$pct_error)) +} + +roll_rs$extrapolation <- + map2_dbl(roll_rs$splits, roll_rs$arima, get_extrap) + +summary(roll_rs$extrapolation) +``` + +What do these error estimates look like over time? + +```{r} +#| label: "plot" +roll_rs %>% + select(interpolation, extrapolation, start_date) %>% + pivot_longer(cols = matches("ation"), names_to = "error", values_to = "MAPE") %>% + ggplot(aes(x = start_date, y = MAPE, col = error)) + + geom_point() + + geom_line() +``` + +It is likely that the interpolation error is an underestimate to some degree, as mentioned above. + +It is also worth noting that `rolling_origin()` can be used over calendar periods, rather than just over a fixed window size. This is especially useful for irregular series where a fixed window size might not make sense because of missing data points, or because of calendar features like different months having a different number of days. + +The example below demonstrates this idea by splitting `drinks` into a nested set of 26 years, and rolling over years rather than months. Note that the end result accomplishes a different task than the original example; in this new case, each slice moves forward an entire year, rather than just one month. + +```{r} +#| label: "rof-annual" +# The idea is to nest by the period to roll over, +# which in this case is the year. +roll_rs_annual <- drinks %>% + mutate(year = as.POSIXlt(date)$year + 1900) %>% + nest(data = c(date, S4248SM144NCEN)) %>% + rolling_origin( + initial = 20, + assess = 1, + cumulative = FALSE + ) + +analysis(roll_rs_annual$splits[[1]]) +``` + +The workflow to access these calendar slices is to use `bind_rows()` to join +each analysis set together. + +```{r} +mutate( + roll_rs_annual, + extracted_slice = map(splits, ~ bind_rows(analysis(.x)$data)) +) +``` + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/content/learn/statistics/_index.md b/learn/statistics/_index.md similarity index 100% rename from content/learn/statistics/_index.md rename to learn/statistics/_index.md diff --git a/learn/statistics/bootstrap/figs/unnamed-chunk-11-1.svg b/learn/statistics/bootstrap/figs/unnamed-chunk-11-1.svg new file mode 100644 index 00000000..88e6dccd --- /dev/null +++ b/learn/statistics/bootstrap/figs/unnamed-chunk-11-1.svg @@ -0,0 +1,6668 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +10 +20 +30 + + + + + + + +2 +3 +4 +5 +wt +mpg + + diff --git a/learn/statistics/bootstrap/figs/unnamed-chunk-12-1.svg b/learn/statistics/bootstrap/figs/unnamed-chunk-12-1.svg new file mode 100644 index 00000000..0f242423 --- /dev/null +++ b/learn/statistics/bootstrap/figs/unnamed-chunk-12-1.svg @@ -0,0 +1,6679 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +10 +15 +20 +25 +30 +35 + + + + + + + + + + +2 +3 +4 +5 +x +y + + diff --git a/learn/statistics/bootstrap/figs/unnamed-chunk-3-1.svg b/learn/statistics/bootstrap/figs/unnamed-chunk-3-1.svg new file mode 100644 index 00000000..acd6e020 --- /dev/null +++ b/learn/statistics/bootstrap/figs/unnamed-chunk-3-1.svg @@ -0,0 +1,111 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +2 +3 +4 +5 + + + + + + + + + + +10 +15 +20 +25 +30 +35 +mpg +wt + + diff --git a/learn/statistics/bootstrap/figs/unnamed-chunk-4-1.svg b/learn/statistics/bootstrap/figs/unnamed-chunk-4-1.svg new file mode 100644 index 00000000..50984ef5 --- /dev/null +++ b/learn/statistics/bootstrap/figs/unnamed-chunk-4-1.svg @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +10 +15 +20 +25 +30 +35 + + + + + + + + + + +2 +3 +4 +5 +wt +mpg + + diff --git a/learn/statistics/bootstrap/figs/unnamed-chunk-9-1.svg b/learn/statistics/bootstrap/figs/unnamed-chunk-9-1.svg new file mode 100644 index 00000000..56d7ffbb --- /dev/null +++ b/learn/statistics/bootstrap/figs/unnamed-chunk-9-1.svg @@ -0,0 +1,202 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +b + + + + + + + + + + +k + + + + + +0 +5 +10 + + + + + +30 +40 +50 +60 +70 +0 +50 +100 +150 + + + + +0 +50 +100 +150 +200 + + + + + +estimate +count + + diff --git a/learn/statistics/bootstrap/index.qmd b/learn/statistics/bootstrap/index.qmd new file mode 100644 index 00000000..52ddae0c --- /dev/null +++ b/learn/statistics/bootstrap/index.qmd @@ -0,0 +1,175 @@ +--- +title: "Bootstrap resampling and tidy regression models" +categories: + - statistical analysis + - bootstraping + - tidying results + - confidence intervals +type: learn-subsection +weight: 3 +description: | + Apply bootstrap resampling to estimate uncertainty in model parameters. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +pkgs <- c("tidymodels") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + + +## Introduction + +This article only requires the tidymodels package. + +Combining fitted models in a tidy way is useful for performing bootstrapping or permutation tests. These approaches have been explored before, for instance by [Andrew MacDonald here](https://rstudio-pubs-static.s3.amazonaws.com/19698_a4c472606e3c43e4b94720506e49bb7b.html), and [Hadley has explored efficient support for bootstrapping](https://github.com/hadley/dplyr/issues/269) as a potential enhancement to dplyr. The tidymodels package [broom](https://broom.tidyverse.org/) fits naturally with [dplyr](https://dplyr.tidyverse.org/) in performing these analyses. + +Bootstrapping consists of randomly sampling a data set with replacement, then performing the analysis individually on each bootstrapped replicate. The variation in the resulting estimate is then a reasonable approximation of the variance in our estimate. + +Let's say we want to fit a nonlinear model to the weight/mileage relationship in the `mtcars` data set. + +```{r} +library(tidymodels) + +ggplot(mtcars, aes(mpg, wt)) + + geom_point() +``` + +We might use the method of nonlinear least squares (via the `nls()` function) to fit a model. + +```{r} +nlsfit <- nls(mpg ~ k / wt + b, mtcars, start = list(k = 1, b = 0)) +summary(nlsfit) + +ggplot(mtcars, aes(wt, mpg)) + + geom_point() + + geom_line(aes(y = predict(nlsfit))) +``` + +While this does provide a p-value and confidence intervals for the parameters, these are based on model assumptions that may not hold in real data. Bootstrapping is a popular method for providing confidence intervals and predictions that are more robust to the nature of the data. + +## Bootstrapping models + +We can use the `bootstraps()` function in the rsample package to sample bootstrap replications. First, we construct 2000 bootstrap replicates of the data, each of which has been randomly sampled with replacement. The resulting object is an `rset`, which is a data frame with a column of `rsplit` objects. + +An `rsplit` object has two main components: an analysis data set and an assessment data set, accessible via `analysis(rsplit)` and `assessment(rsplit)` respectively. For bootstrap samples, the analysis data set is the bootstrap sample itself, and the assessment data set consists of all the out-of-bag samples. + +```{r} +set.seed(27) +boots <- bootstraps(mtcars, times = 2000, apparent = TRUE) +boots +``` + +Let's create a helper function to fit an `nls()` model on each bootstrap sample, and then use `purrr::map()` to apply this function to all the bootstrap samples at once. Similarly, we create a column of tidy coefficient information by unnesting. + +```{r} +fit_nls_on_bootstrap <- function(split) { + nls(mpg ~ k / wt + b, analysis(split), start = list(k = 1, b = 0)) +} + +boot_models <- + boots %>% + mutate(model = map(splits, fit_nls_on_bootstrap), + coef_info = map(model, tidy)) + +boot_coefs <- + boot_models %>% + unnest(coef_info) +``` + +The unnested coefficient information contains a summary of each replication combined in a single data frame: + +```{r} +boot_coefs +``` + +## Confidence intervals + +We can then calculate confidence intervals (using what is called the [percentile method](https://www.uvm.edu/~dhowell/StatPages/Randomization%20Tests/ResamplingWithR/BootstMeans/bootstrapping_means.html)): + +```{r} +#| label: "percentiles" +percentile_intervals <- int_pctl(boot_models, coef_info) +percentile_intervals +``` + +Or we can use histograms to get a more detailed idea of the uncertainty in each estimate: + +```{r} +ggplot(boot_coefs, aes(estimate)) + + geom_histogram(bins = 30) + + facet_wrap( ~ term, scales = "free") + + geom_vline(aes(xintercept = .lower), data = percentile_intervals, col = "blue") + + geom_vline(aes(xintercept = .upper), data = percentile_intervals, col = "blue") +``` + +The rsample package also has functions for [other types of confidence intervals](https://rsample.tidymodels.org/reference/int_pctl.html). + +## Possible model fits + +We can use `augment()` to visualize the uncertainty in the fitted curve. Since there are so many bootstrap samples, we'll only show a sample of the model fits in our visualization: + +```{r} +boot_aug <- + boot_models %>% + sample_n(200) %>% + mutate(augmented = map(model, augment)) %>% + unnest(augmented) + +boot_aug +``` + +```{r} +ggplot(boot_aug, aes(wt, mpg)) + + geom_line(aes(y = .fitted, group = id), alpha = .2, col = "blue") + + geom_point() +``` + +With only a few small changes, we could easily perform bootstrapping with other kinds of predictive or hypothesis testing models, since the `tidy()` and `augment()` functions works for many statistical outputs. As another example, we could use `smooth.spline()`, which fits a cubic smoothing spline to data: + +```{r} +fit_spline_on_bootstrap <- function(split) { + data <- analysis(split) + smooth.spline(data$wt, data$mpg, df = 4) +} + +boot_splines <- + boots %>% + sample_n(200) %>% + mutate(spline = map(splits, fit_spline_on_bootstrap), + aug_train = map(spline, augment)) + +splines_aug <- + boot_splines %>% + unnest(aug_train) + +ggplot(splines_aug, aes(x, y)) + + geom_line(aes(y = .fitted, group = id), alpha = 0.2, col = "blue") + + geom_point() +``` + + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + + diff --git a/learn/statistics/infer/figs/unnamed-chunk-22-1.svg b/learn/statistics/infer/figs/unnamed-chunk-22-1.svg new file mode 100644 index 00000000..02ac008b --- /dev/null +++ b/learn/statistics/infer/figs/unnamed-chunk-22-1.svg @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.0 +0.2 +0.4 +0.6 + + + + + + + +0 +2 +4 +F stat +density +Theoretical F Null Distribution + + diff --git a/learn/statistics/infer/figs/unnamed-chunk-23-1.svg b/learn/statistics/infer/figs/unnamed-chunk-23-1.svg new file mode 100644 index 00000000..92fb2ce6 --- /dev/null +++ b/learn/statistics/infer/figs/unnamed-chunk-23-1.svg @@ -0,0 +1,5090 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.0 +0.2 +0.4 +0.6 + + + + + + + + +0 +2 +4 +6 +F stat +density +Simulation-Based and Theoretical F Null Distributions + + diff --git a/learn/statistics/infer/figs/visualize-1.svg b/learn/statistics/infer/figs/visualize-1.svg new file mode 100644 index 00000000..7ff02340 --- /dev/null +++ b/learn/statistics/infer/figs/visualize-1.svg @@ -0,0 +1,98 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +250 +500 +750 +1000 + + + + + + + + + + + +38 +39 +40 +41 +42 +43 +stat +count +Simulation-Based Null Distribution + + diff --git a/learn/statistics/infer/figs/visualize2-1.svg b/learn/statistics/infer/figs/visualize2-1.svg new file mode 100644 index 00000000..89148058 --- /dev/null +++ b/learn/statistics/infer/figs/visualize2-1.svg @@ -0,0 +1,5102 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +250 +500 +750 +1000 + + + + + + + + + + + +38 +39 +40 +41 +42 +43 +stat +count +Simulation-Based Null Distribution + + diff --git a/learn/statistics/infer/index.qmd b/learn/statistics/infer/index.qmd new file mode 100644 index 00000000..1103d8c5 --- /dev/null +++ b/learn/statistics/infer/index.qmd @@ -0,0 +1,357 @@ +--- +title: "Hypothesis testing using resampling and tidy data" +categories: + - statistical analysis + - hypothesis testing + - bootstraping +type: learn-subsection +weight: 4 +description: | + Perform common hypothesis tests for statistical inference using flexible functions. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +library(sessioninfo) +pkgs <- c("tidymodels") +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +This article only requires the tidymodels package. + +The tidymodels package [infer](https://infer.tidymodels.org/) implements an expressive grammar to perform statistical inference that coheres with the `tidyverse` design framework. Rather than providing methods for specific statistical tests, this package consolidates the principles that are shared among common hypothesis tests into a set of 4 main verbs (functions), supplemented with many utilities to visualize and extract information from their outputs. + +Regardless of which hypothesis test we're using, we're still asking the same kind of question: + +>Is the effect or difference in our observed data real, or due to chance? + +To answer this question, we start by assuming that the observed data came from some world where "nothing is going on" (i.e. the observed effect was simply due to random chance), and call this assumption our **null hypothesis**. (In reality, we might not believe in the null hypothesis at all; the null hypothesis is in opposition to the **alternate hypothesis**, which supposes that the effect present in the observed data is actually due to the fact that "something is going on.") We then calculate a **test statistic** from our data that describes the observed effect. We can use this test statistic to calculate a **p-value**, giving the probability that our observed data could come about if the null hypothesis was true. If this probability is below some pre-defined **significance level** $\alpha$, then we can reject our null hypothesis. + +If you are new to hypothesis testing, take a look at + +* [Section 9.2 of _Statistical Inference via Data Science_](https://moderndive.com/9-hypothesis-testing.html#understanding-ht) +* The American Statistical Association's recent [statement on p-values](https://doi.org/10.1080/00031305.2016.1154108) + +The workflow of this package is designed around these ideas. Starting from some data set, + ++ `specify()` allows you to specify the variable, or relationship between variables, that you're interested in, ++ `hypothesize()` allows you to declare the null hypothesis, ++ `generate()` allows you to generate data reflecting the null hypothesis, and ++ `calculate()` allows you to calculate a distribution of statistics from the generated data to form the null distribution. + +Throughout this vignette, we make use of `gss`, a data set available in infer containing a sample of 500 observations of 11 variables from the *General Social Survey*. + +```{r} +#| label: "load-gss" +#| warning: false +#| message: false +library(tidymodels) # Includes the infer package + +# load in the data set +data(gss) + +# take a look at its structure +dplyr::glimpse(gss) +``` + +Each row is an individual survey response, containing some basic demographic information on the respondent as well as some additional variables. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this data set is a representative sample of a population we want to learn about: American adults. + +## Specify variables + +The `specify()` function can be used to specify which of the variables in the data set you're interested in. If you're only interested in, say, the `age` of the respondents, you might write: + +```{r} +#| label: "specify-example" +#| warning: false +#| message: false +gss %>% + specify(response = age) +``` + +On the front end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. What do we see if we check the class of this object, though? + +```{r} +#| label: "specify-one" +#| warning: false +#| message: false +gss %>% + specify(response = age) %>% + class() +``` + +We can see that the infer class has been appended on top of the dataframe classes; this new class stores some extra metadata. + +If you're interested in two variables (`age` and `partyid`, for example) you can `specify()` their relationship in one of two (equivalent) ways: + +```{r} +#| label: "specify-two" +#| warning: false +#| message: false +# as a formula +gss %>% + specify(age ~ partyid) + +# with the named arguments +gss %>% + specify(response = age, explanatory = partyid) +``` + +If you're doing inference on one proportion or a difference in proportions, you will need to use the `success` argument to specify which level of your `response` variable is a success. For instance, if you're interested in the proportion of the population with a college degree, you might use the following code: + +```{r} +#| label: "specify-success" +#| warning: false +#| message: false +# specifying for inference on proportions +gss %>% + specify(response = college, success = "degree") +``` + +## Declare the hypothesis + +The next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of "independence" or "point" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`: + +```{r} +#| label: "hypothesize-independence" +#| warning: false +#| message: false +gss %>% + specify(college ~ partyid, success = "degree") %>% + hypothesize(null = "independence") +``` + +If you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write: + +```{r} +#| label: "hypothesize-40-hr-week" +#| warning: false +#| message: false +gss %>% + specify(response = hours) %>% + hypothesize(null = "point", mu = 40) +``` + +Again, from the front-end, the dataframe outputted from `hypothesize()` looks almost exactly the same as it did when it came out of `specify()`, but infer now "knows" your null hypothesis. + +## Generate the distribution + +Once we've asserted our null hypothesis using `hypothesize()`, we can construct a null distribution based on this hypothesis. We can do this using one of several methods, supplied in the `type` argument: + +* `bootstrap`: A bootstrap sample will be drawn for each replicate, where a sample of size equal to the input sample size is drawn (with replacement) from the input sample data. +* `permute`: For each replicate, each input value will be randomly reassigned (without replacement) to a new output value in the sample. +* `simulate`: A value will be sampled from a theoretical distribution with parameters specified in `hypothesize()` for each replicate. (This option is currently only applicable for testing point estimates.) + +Continuing on with our example above, about the average number of hours worked a week, we might write: + +```{r} +#| label: "generate-point" +#| warning: false +#| message: false +gss %>% + specify(response = hours) %>% + hypothesize(null = "point", mu = 40) %>% + generate(reps = 5000, type = "bootstrap") +``` + +In the above example, we take 5000 bootstrap samples to form our null distribution. + +To generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 5000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age: + +```{r} +#| label: "generate-permute" +#| warning: false +#| message: false +gss %>% + specify(partyid ~ age) %>% + hypothesize(null = "independence") %>% + generate(reps = 5000, type = "permute") +``` + +## Calculate statistics + +Depending on whether you're carrying out computation-based inference or theory-based inference, you will either supply `calculate()` with the output of `generate()` or `hypothesize()`, respectively. The function, for one, takes in a `stat` argument, which is currently one of `"mean"`, `"median"`, `"sum"`, `"sd"`, `"prop"`, `"count"`, `"diff in means"`, `"diff in medians"`, `"diff in props"`, `"Chisq"`, `"F"`, `"t"`, `"z"`, `"slope"`, or `"correlation"`. For example, continuing our example above to calculate the null distribution of mean hours worked per week: + +```{r} +#| label: "calculate-point" +#| warning: false +#| message: false +gss %>% + specify(response = hours) %>% + hypothesize(null = "point", mu = 40) %>% + generate(reps = 5000, type = "bootstrap") %>% + calculate(stat = "mean") +``` + +The output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or $t$ and $z$ statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write: + +```{r} +#| label: "specify-diff-in-means" +#| warning: false +#| message: false +gss %>% + specify(age ~ college) %>% + hypothesize(null = "independence") %>% + generate(reps = 5000, type = "permute") %>% + calculate("diff in means", order = c("degree", "no degree")) +``` + +## Other utilities + +The infer package also offers several utilities to extract meaning out of summary statistics and null distributions; the package provides functions to visualize where a statistic is relative to a distribution (with `visualize()`), calculate p-values (with `get_p_value()`), and calculate confidence intervals (with `get_confidence_interval()`). + +To illustrate, we'll go back to the example of determining whether the mean number of hours worked per week is 40 hours. + +```{r} +#| label: "utilities-examples" +# find the point estimate +point_estimate <- gss %>% + specify(response = hours) %>% + calculate(stat = "mean") + +# generate a null distribution +null_dist <- gss %>% + specify(response = hours) %>% + hypothesize(null = "point", mu = 40) %>% + generate(reps = 5000, type = "bootstrap") %>% + calculate(stat = "mean") +``` + +(Notice the warning: `Removed 1244 rows containing missing values.` This would be worth noting if you were actually carrying out this hypothesis test.) + +Our point estimate `r point_estimate` seems *pretty* close to 40, but a little bit different. We might wonder if this difference is just due to random chance, or if the mean number of hours worked per week in the population really isn't 40. + +We could initially just visualize the null distribution. + +```{r} +#| label: "visualize" +#| warning: false +#| message: false +null_dist %>% + visualize() +``` + +Where does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this. + +```{r} +#| label: "visualize2" +#| warning: false +#| message: false +null_dist %>% + visualize() + + shade_p_value(obs_stat = point_estimate, direction = "two_sided") +``` + +Notice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the `+` operator to apply the `shade_p_value()` function. This is because `visualize()` outputs a plot object from ggplot2 instead of a dataframe, and the `+` operator is needed to add the p-value layer to the plot object.) The red bar looks like it's slightly far out on the right tail of the null distribution, so observing a sample mean of `r point_estimate` hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though? + +```{r} +#| label: "get_p_value" +#| warning: false +#| message: false +# get a two-tailed p-value +p_value <- null_dist %>% + get_p_value(obs_stat = point_estimate, direction = "two_sided") + +p_value +``` + +It looks like the p-value is `r p_value`, which is pretty small---if the true mean number of hours worked per week was actually 40, the probability of our sample mean being this far (`r abs(point_estimate-40)` hours) from 40 would be `r p_value`. This may or may not be statistically significantly different, depending on the significance level $\alpha$ you decided on *before* you ran this analysis. If you had set $\alpha = .05$, then this difference would be statistically significant, but if you had set $\alpha = .01$, then it would not be. + +To get a confidence interval around our estimate, we can write: + +```{r} +#| label: "get_conf" +#| message: false +#| warning: false +# start with the null distribution +null_dist %>% + # calculate the confidence interval around the point estimate + get_confidence_interval(point_estimate = point_estimate, + # at the 95% confidence level + level = .95, + # using the standard error + type = "se") +``` + +As you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level $\alpha = .05$. + +## Theoretical methods + +The infer package also provides functionality to use theoretical methods for `"Chisq"`, `"F"` and `"t"` test statistics. + +Generally, to find a null distribution using theory-based methods, use the same code that you would use to find the null distribution using randomization-based methods, but skip the `generate()` step. For example, if we wanted to find a null distribution for the relationship between age (`age`) and party identification (`partyid`) using randomization, we could write: + +```{r} +#| message: false +#| warning: false +null_f_distn <- gss %>% + specify(age ~ partyid) %>% + hypothesize(null = "independence") %>% + generate(reps = 5000, type = "permute") %>% + calculate(stat = "F") +``` + +To find the null distribution using theory-based methods, instead, skip the `generate()` step entirely: + +```{r} +#| message: false +#| warning: false +null_f_distn_theoretical <- gss %>% + specify(age ~ partyid) %>% + hypothesize(null = "independence") %>% + calculate(stat = "F") +``` + +We'll calculate the observed statistic to make use of in the following visualizations; this procedure is the same, regardless of the methods used to find the null distribution. + +```{r} +#| message: false +#| warning: false +F_hat <- gss %>% + specify(age ~ partyid) %>% + calculate(stat = "F") +``` + +Now, instead of just piping the null distribution into `visualize()`, as we would do if we wanted to visualize the randomization-based null distribution, we also need to provide `method = "theoretical"` to `visualize()`. + +```{r} +#| message: false +#| warning: false +visualize(null_f_distn_theoretical, method = "theoretical") + + shade_p_value(obs_stat = F_hat, direction = "greater") +``` + +To get a sense of how the theory-based and randomization-based null distributions relate, we can pipe the randomization-based null distribution into `visualize()` and also specify `method = "both"` + +```{r} +#| message: false +#| warning: false +visualize(null_f_distn, method = "both") + + shade_p_value(obs_stat = F_hat, direction = "greater") +``` + +That's it! This vignette covers most all of the key functionality of infer. See `help(package = "infer")` for a full list of functions and vignettes. + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/learn/statistics/k-means/figs/unnamed-chunk-11-1.svg b/learn/statistics/k-means/figs/unnamed-chunk-11-1.svg new file mode 100644 index 00000000..1c39d9e5 --- /dev/null +++ b/learn/statistics/k-means/figs/unnamed-chunk-11-1.svg @@ -0,0 +1,3108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +7 + + + + + + + + + + +8 + + + + + + + + + + +9 + + + + + + + + + + +4 + + + + + + + + + + +5 + + + + + + + + + + +6 + + + + + + + + + + +1 + + + + + + + + + + +2 + + + + + + + + + + +3 + + + + + +-5 +0 +5 + + + +-5 +0 +5 + + + +-5 +0 +5 +-2.5 +0.0 +2.5 + + + +-2.5 +0.0 +2.5 + + + +-2.5 +0.0 +2.5 + + + +x1 +x2 + +.cluster + + + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 +6 +7 +8 +9 + + diff --git a/learn/statistics/k-means/figs/unnamed-chunk-12-1.svg b/learn/statistics/k-means/figs/unnamed-chunk-12-1.svg new file mode 100644 index 00000000..032575fd --- /dev/null +++ b/learn/statistics/k-means/figs/unnamed-chunk-12-1.svg @@ -0,0 +1,3153 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x +x +x +x + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x +x +x +x +x +x +x + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x +x + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x +x +x +x +x + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x +x +x +x +x +x +x +x + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x +x +x + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x +x +x +x +x +x + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +x +x +x +x +x +x +x +x +x + + + + + + + + + + + +7 + + + + + + + + + + +8 + + + + + + + + + + +9 + + + + + + + + + + +4 + + + + + + + + + + +5 + + + + + + + + + + +6 + + + + + + + + + + +1 + + + + + + + + + + +2 + + + + + + + + + + +3 + + + + + +-5 +0 +5 + + + +-5 +0 +5 + + + +-5 +0 +5 +-2.5 +0.0 +2.5 + + + +-2.5 +0.0 +2.5 + + + +-2.5 +0.0 +2.5 + + + +x1 +x2 + +.cluster + + + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 +6 +7 +8 +9 + + diff --git a/learn/statistics/k-means/figs/unnamed-chunk-13-1.svg b/learn/statistics/k-means/figs/unnamed-chunk-13-1.svg new file mode 100644 index 00000000..2018b7f4 --- /dev/null +++ b/learn/statistics/k-means/figs/unnamed-chunk-13-1.svg @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +1000 +2000 +3000 + + + + + + +2.5 +5.0 +7.5 +k +tot.withinss + + diff --git a/learn/statistics/k-means/figs/unnamed-chunk-3-1.svg b/learn/statistics/k-means/figs/unnamed-chunk-3-1.svg new file mode 100644 index 00000000..0e62f5c8 --- /dev/null +++ b/learn/statistics/k-means/figs/unnamed-chunk-3-1.svg @@ -0,0 +1,375 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +-2.5 +0.0 +2.5 + + + + + + +-5 +0 +5 +x1 +x2 + +cluster + + + + + + +1 +2 +3 + + diff --git a/learn/statistics/k-means/index.qmd b/learn/statistics/k-means/index.qmd new file mode 100644 index 00000000..8584a219 --- /dev/null +++ b/learn/statistics/k-means/index.qmd @@ -0,0 +1,195 @@ +--- +title: "K-means clustering with tidy data principles" +categories: + - statistical analysis + - clustering + - tidying results +type: learn-subsection +weight: 2 +description: | + Summarize clustering characteristics and estimate the best number of clusters for a data set. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +pkgs <- c("tidymodels") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +This article only requires the tidymodels package. + +K-means clustering serves as a useful example of applying tidy data principles to statistical analysis, and especially the distinction between the three tidying functions: + +- `tidy()` +- `augment()` +- `glance()` + +Let's start by generating some random two-dimensional data with three clusters. Data in each cluster will come from a multivariate gaussian distribution, with different means for each cluster: + +```{r} +library(tidymodels) + +set.seed(27) + +centers <- tibble( + cluster = factor(1:3), + num_points = c(100, 150, 50), # number points in each cluster + x1 = c(5, 0, -3), # x1 coordinate of cluster center + x2 = c(-1, 1, -2) # x2 coordinate of cluster center +) + +labelled_points <- + centers %>% + mutate( + x1 = map2(num_points, x1, rnorm), + x2 = map2(num_points, x2, rnorm) + ) %>% + select(-num_points) %>% + unnest(cols = c(x1, x2)) + +ggplot(labelled_points, aes(x1, x2, color = cluster)) + + geom_point(alpha = 0.3) +``` + +This is an ideal case for k-means clustering. + +## How does K-means work? + +Rather than using equations, this short animation using the [artwork](https://github.com/allisonhorst/stats-illustrations) of Allison Horst explains the clustering process: + +```{r} +#| label: "illustrations" +#| echo: false +#| results: asis +#| fig-align: center +knitr::include_graphics("kmeans.gif") +``` + +## Clustering in R + +We'll use the built-in `kmeans()` function, which accepts a data frame with all numeric columns as it's primary argument. + +```{r} +points <- + labelled_points %>% + select(-cluster) + +kclust <- kmeans(points, centers = 3) +kclust +summary(kclust) +``` + +The output is a list of vectors, where each component has a different length. There's one of length `r nrow(points)`, the same as our original data set. There are two elements of length 3 (`withinss` and `tot.withinss`) and `centers` is a matrix with 3 rows. And then there are the elements of length 1: `totss`, `tot.withinss`, `betweenss`, and `iter`. (The value `ifault` indicates possible algorithm problems.) + +These differing lengths have important meaning when we want to tidy our data set; they signify that each type of component communicates a *different kind* of information. + +- `cluster` (`r nrow(points)` values) contains information about each *point* +- `centers`, `withinss`, and `size` (3 values) contain information about each *cluster* +- `totss`, `tot.withinss`, `betweenss`, and `iter` (1 value) contain information about the *full clustering* + +Which of these do we want to extract? There is no right answer; each of them may be interesting to an analyst. Because they communicate entirely different information (not to mention there's no straightforward way to combine them), they are extracted by separate functions. `augment` adds the point classifications to the original data set: + +```{r} +augment(kclust, points) +``` + +The `tidy()` function summarizes on a per-cluster level: + +```{r} +tidy(kclust) +``` + +And as it always does, the `glance()` function extracts a single-row summary: + +```{r} +glance(kclust) +``` + +## Exploratory clustering + +While these summaries are useful, they would not have been too difficult to extract out from the data set yourself. The real power comes from combining these analyses with other tools like [dplyr](https://dplyr.tidyverse.org/). + +Let's say we want to explore the effect of different choices of `k`, from 1 to 9, on this clustering. First cluster the data 9 times, each using a different value of `k`, then create columns containing the tidied, glanced and augmented data: + +```{r} +kclusts <- + tibble(k = 1:9) %>% + mutate( + kclust = map(k, ~kmeans(points, .x)), + tidied = map(kclust, tidy), + glanced = map(kclust, glance), + augmented = map(kclust, augment, points) + ) + +kclusts +``` + +We can turn these into three separate data sets each representing a different type of data: using `tidy()`, using `augment()`, and using `glance()`. Each of these goes into a separate data set as they represent different types of data. + +```{r} +clusters <- + kclusts %>% + unnest(cols = c(tidied)) + +assignments <- + kclusts %>% + unnest(cols = c(augmented)) + +clusterings <- + kclusts %>% + unnest(cols = c(glanced)) +``` + +Now we can plot the original points using the data from `augment()`, with each point colored according to the predicted cluster. + +```{r} +#| fig-width: 7 +#| fig-height: 7 +p1 <- + ggplot(assignments, aes(x = x1, y = x2)) + + geom_point(aes(color = .cluster), alpha = 0.8) + + facet_wrap(~ k) +p1 +``` + +Already we get a good sense of the proper number of clusters (3), and how the k-means algorithm functions when `k` is too high or too low. We can then add the centers of the cluster using the data from `tidy()`: + +```{r} +p2 <- p1 + geom_point(data = clusters, size = 10, shape = "x") +p2 +``` + +The data from `glance()` fills a different but equally important purpose; it lets us view trends of some summary statistics across values of `k`. Of particular interest is the total within sum of squares, saved in the `tot.withinss` column. + +```{r} +ggplot(clusterings, aes(k, tot.withinss)) + + geom_line() + + geom_point() +``` + +This represents the variance within the clusters. It decreases as `k` increases, but notice a bend (or "elbow") around `k = 3`. This bend indicates that additional clusters beyond the third have little value. (See [here](https://web.stanford.edu/~hastie/Papers/gap.pdf) for a more mathematically rigorous interpretation and implementation of this method). Thus, all three methods of tidying data provided by broom are useful for summarizing clustering output. + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/learn/statistics/k-means/kmeans.gif b/learn/statistics/k-means/kmeans.gif new file mode 100644 index 00000000..ece19fa7 Binary files /dev/null and b/learn/statistics/k-means/kmeans.gif differ diff --git a/content/learn/statistics/thumbnail.png b/learn/statistics/thumbnail.png similarity index 100% rename from content/learn/statistics/thumbnail.png rename to learn/statistics/thumbnail.png diff --git a/learn/statistics/tidy-analysis/figs/unnamed-chunk-4-1.svg b/learn/statistics/tidy-analysis/figs/unnamed-chunk-4-1.svg new file mode 100644 index 00000000..9a9718ee --- /dev/null +++ b/learn/statistics/tidy-analysis/figs/unnamed-chunk-4-1.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +50 +100 +150 +200 + + + + + + + + +400 +800 +1200 +1600 +age +circumference + +Tree + + + + + + + + + + +3 +1 +5 +2 +4 + + diff --git a/learn/statistics/tidy-analysis/index.qmd b/learn/statistics/tidy-analysis/index.qmd new file mode 100644 index 00000000..4de9aa79 --- /dev/null +++ b/learn/statistics/tidy-analysis/index.qmd @@ -0,0 +1,210 @@ +--- +title: "Correlation and regression fundamentals with tidy data principles" +categories: + - statistical analysis + - correlation + - tidying results +type: learn-subsection +weight: 1 +description: | + Analyze the results of correlation tests and simple regression models for many data sets at once. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +pkgs <- c("tidymodels") +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +This article only requires the tidymodels package. + +While the tidymodels package [broom](https://broom.tidyverse.org/) is useful for summarizing the result of a single analysis in a consistent format, it is really designed for high-throughput applications, where you must combine results from multiple analyses. These could be subgroups of data, analyses using different models, bootstrap replicates, permutations, and so on. In particular, it plays well with the `nest()/unnest()` functions from [tidyr](https://tidyr.tidyverse.org/) and the `map()` function in [purrr](https://purrr.tidyverse.org/). + +## Correlation analysis + +Let's demonstrate this with a simple data set, the built-in `Orange`. We start by coercing `Orange` to a `tibble`. This gives a nicer print method that will be especially useful later on when we start working with list-columns. + +```{r} +library(tidymodels) + +data(Orange) + +Orange <- as_tibble(Orange) +Orange +``` + +This contains 35 observations of three variables: `Tree`, `age`, and `circumference`. `Tree` is a factor with five levels describing five trees. As might be expected, age and circumference are correlated: + +```{r} +cor(Orange$age, Orange$circumference) + +library(ggplot2) + +ggplot(Orange, aes(age, circumference, color = Tree)) + + geom_line() +``` + +Suppose you want to test for correlations individually *within* each tree. You can do this with dplyr's `group_by`: + +```{r} +Orange %>% + group_by(Tree) %>% + summarize(correlation = cor(age, circumference)) +``` + +(Note that the correlations are much higher than the aggregated one, and also we can now see the correlation is similar across trees). + +Suppose that instead of simply estimating a correlation, we want to perform a hypothesis test with `cor.test()`: + +```{r} +ct <- cor.test(Orange$age, Orange$circumference) +ct +``` + +This test output contains multiple values we may be interested in. Some are vectors of length 1, such as the p-value and the estimate, and some are longer, such as the confidence interval. We can get this into a nicely organized tibble using the `tidy()` function: + +```{r} +tidy(ct) +``` + +Often, we want to perform multiple tests or fit multiple models, each on a different part of the data. In this case, we recommend a `nest-map-unnest` workflow. For example, suppose we want to perform correlation tests for each different tree. We start by `nest`ing our data based on the group of interest: + +```{r} +nested <- + Orange %>% + nest(data = c(age, circumference)) +``` + +Then we perform a correlation test for each nested tibble using `purrr::map()`: + +```{r} +nested %>% + mutate(test = map(data, ~ cor.test(.x$age, .x$circumference))) +``` + +This results in a list-column of S3 objects. We want to tidy each of the objects, which we can also do with `map()`. + +```{r} +nested %>% + mutate( + test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col + tidied = map(test, tidy) + ) +``` + +Finally, we want to unnest the tidied data frames so we can see the results in a flat tibble. All together, this looks like: + +```{r} +Orange %>% + nest(data = c(age, circumference)) %>% + mutate( + test = map(data, ~ cor.test(.x$age, .x$circumference)), # S3 list-col + tidied = map(test, tidy) + ) %>% + unnest(cols = tidied) %>% + select(-data, -test) +``` + +## Regression models + +This type of workflow becomes even more useful when applied to regressions. Untidy output for a regression looks like: + +```{r} +lm_fit <- lm(age ~ circumference, data = Orange) +summary(lm_fit) +``` + +When we tidy these results, we get multiple rows of output for each model: + +```{r} +tidy(lm_fit) +``` + +Now we can handle multiple regressions at once using exactly the same workflow as before: + +```{r} +Orange %>% + nest(data = c(-Tree)) %>% + mutate( + fit = map(data, ~ lm(age ~ circumference, data = .x)), + tidied = map(fit, tidy) + ) %>% + unnest(tidied) %>% + select(-data, -fit) +``` + +You can just as easily use multiple predictors in the regressions, as shown here on the `mtcars` dataset. We nest the data into automatic vs. manual cars (the `am` column), then perform the regression within each nested tibble. + +```{r} +data(mtcars) +mtcars <- as_tibble(mtcars) # to play nicely with list-cols +mtcars + +mtcars %>% + nest(data = c(-am)) %>% + mutate( + fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), # S3 list-col + tidied = map(fit, tidy) + ) %>% + unnest(tidied) %>% + select(-data, -fit) +``` + +What if you want not just the `tidy()` output, but the `augment()` and `glance()` outputs as well, while still performing each regression only once? Since we're using list-columns, we can just fit the model once and use multiple list-columns to store the tidied, glanced and augmented outputs. + +```{r} +regressions <- + mtcars %>% + nest(data = c(-am)) %>% + mutate( + fit = map(data, ~ lm(wt ~ mpg + qsec + gear, data = .x)), + tidied = map(fit, tidy), + glanced = map(fit, glance), + augmented = map(fit, augment) + ) + +regressions %>% + select(tidied) %>% + unnest(tidied) + +regressions %>% + select(glanced) %>% + unnest(glanced) + +regressions %>% + select(augmented) %>% + unnest(augmented) +``` + +By combining the estimates and p-values across all groups into the same tidy data frame (instead of a list of output model objects), a new class of analyses and visualizations becomes straightforward. This includes: + +- sorting by p-value or estimate to find the most significant terms across all tests, +- p-value histograms, and +- volcano plots comparing p-values to effect size estimates. + +In each of these cases, we can easily filter, facet, or distinguish based on the `term` column. In short, this makes the tools of tidy data analysis available for the *results* of data analysis and models, not just the inputs. + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/learn/statistics/xtabs/figs/plot-indep-1.svg b/learn/statistics/xtabs/figs/plot-indep-1.svg new file mode 100644 index 00000000..20db3115 --- /dev/null +++ b/learn/statistics/xtabs/figs/plot-indep-1.svg @@ -0,0 +1,95 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +E2E2 +E2E3 +E2E4 +E3E3 +E3E4 +E4E4 + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +Proportion +Genotype: Apolipoprotein E Genetics + +Class + + + + +Impaired +Control + + diff --git a/learn/statistics/xtabs/figs/visualize-indep-1.svg b/learn/statistics/xtabs/figs/visualize-indep-1.svg new file mode 100644 index 00000000..5dcc7b01 --- /dev/null +++ b/learn/statistics/xtabs/figs/visualize-indep-1.svg @@ -0,0 +1,5092 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +500 +1000 + + + + + + + + + +0 +5 +10 +15 +20 +25 +stat +count +Simulation-Based Null Distribution + + diff --git a/learn/statistics/xtabs/figs/visualize-indep-both-1.svg b/learn/statistics/xtabs/figs/visualize-indep-both-1.svg new file mode 100644 index 00000000..61beecec --- /dev/null +++ b/learn/statistics/xtabs/figs/visualize-indep-both-1.svg @@ -0,0 +1,5096 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.05 +0.10 +0.15 + + + + + + + + + + +0 +5 +10 +15 +20 +25 +Chi-Square stat +density +Simulation-Based and Theoretical Chi-Square Null Distributions + + diff --git a/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg b/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg new file mode 100644 index 00000000..7d7d7482 --- /dev/null +++ b/learn/statistics/xtabs/figs/visualize-indep-gof-1.svg @@ -0,0 +1,5081 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +500 +1000 + + + + + + +0 +10 +20 +stat +count +Simulation-Based Null Distribution + + diff --git a/learn/statistics/xtabs/figs/visualize-indep-theor-1.svg b/learn/statistics/xtabs/figs/visualize-indep-theor-1.svg new file mode 100644 index 00000000..ee7b9e74 --- /dev/null +++ b/learn/statistics/xtabs/figs/visualize-indep-theor-1.svg @@ -0,0 +1,411 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.05 +0.10 +0.15 + + + + + + + + + +0 +5 +10 +15 +20 +Chi-Square stat +density +Theoretical Chi-Square Null Distribution + + diff --git a/learn/statistics/xtabs/index.qmd b/learn/statistics/xtabs/index.qmd new file mode 100644 index 00000000..ef7fbf4a --- /dev/null +++ b/learn/statistics/xtabs/index.qmd @@ -0,0 +1,278 @@ +--- +title: "Statistical analysis of contingency tables" +categories: + - statistical analysis + - analysis of tables + - hypothesis testing +type: learn-subsection +weight: 5 +description: | + Use tests of independence and goodness of fit to analyze tables of counts. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +library(sessioninfo) +pkgs <- c("tidymodels") +theme_set(theme_bw() + theme(legend.position = "top")) +``` + + +## Introduction + +This article only requires that you have the tidymodels package installed. + +In this vignette, we'll walk through conducting a $\chi^2$ (chi-squared) test of independence and a chi-squared goodness of fit test using infer. We'll start out with a chi-squared test of independence, which can be used to test the association between two categorical variables. Then, we'll move on to a chi-squared goodness of fit test, which tests how well the distribution of one categorical variable can be approximated by some theoretical distribution. + +Throughout this vignette, we'll make use of the `ad_data` data set (available in the modeldata package, which is part of tidymodels). This data set is related to cognitive impairment in 333 patients from [Craig-Schapiro _et al_ (2011)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3079734/). See `?ad_data` for more information on the variables included and their source. One of the main research questions in these data were how a person's genetics related to the Apolipoprotein E gene affect their cognitive skills. The data shows: + +```{r} +#| label: "glimpse-ad_data-actual" +#| warning: false +#| message: false +library(tidymodels) # Includes the infer package + +data(ad_data, package = "modeldata") +ad_data %>% + select(Genotype, Class) +``` + +The three main genetic variants are called E2, E3, and E4. The values in `Genotype` represent the genetic makeup of patients based on what they inherited from their parents (i.e, a value of "E2E4" means E2 from one parent and E4 from the other). + +## Test of independence + +To carry out a chi-squared test of independence, we'll examine the association between their cognitive ability (impaired and healthy) and the genetic makeup. This is what the relationship looks like in the sample data: + +```{r} +#| label: "plot-indep" +#| echo: false +ad_data %>% + ggplot() + + aes(y = Genotype, fill = Class) + + geom_bar(position = "fill") + + scale_fill_brewer(type = "qual") + + labs(y = "Genotype: Apolipoprotein E Genetics", + x = "Proportion") +``` + +If there were no relationship, we would expect to see the purple bars reaching to the same length, regardless of cognitive ability. Are the differences we see here, though, just due to random noise? + +First, to calculate the observed statistic, we can use `specify()` and `calculate()`. + +```{r} +#| label: "calc-obs-stat-indep" +#| warning: false +#| message: false +# calculate the observed statistic +observed_indep_statistic <- ad_data %>% + specify(Genotype ~ Class) %>% + calculate(stat = "Chisq") +``` + +The observed $\chi^2$ statistic is `r observed_indep_statistic`. Now, we want to compare this statistic to a null distribution, generated under the assumption that these variables are not actually related, to get a sense of how likely it would be for us to see this observed statistic if there were actually no association between cognitive ability and genetics. + +We can `generate()` the null distribution in one of two ways: using randomization or theory-based methods. The randomization approach permutes the response and explanatory variables, so that each person's genetics is matched up with a random cognitive rating from the sample in order to break up any association between the two. + +```{r} +#| label: "generate-null-indep" +#| warning: false +#| message: false +# generate the null distribution using randomization +null_distribution_simulated <- ad_data %>% + specify(Genotype ~ Class) %>% + hypothesize(null = "independence") %>% + generate(reps = 5000, type = "permute") %>% + calculate(stat = "Chisq") +``` + +Note that, in the line `specify(Genotype ~ Class)` above, we could use the equivalent syntax `specify(response = Genotype, explanatory = Class)`. The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization. + +```{r} +#| label: "generate-null-indep-t" +#| warning: false +#| message: false +# generate the null distribution by theoretical approximation +null_distribution_theoretical <- ad_data %>% + specify(Genotype ~ Class) %>% + hypothesize(null = "independence") %>% + # note that we skip the generation step here! + calculate(stat = "Chisq") +``` + +To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: + +```{r} +#| label: "visualize-indep" +#| warning: false +#| message: false +# visualize the null distribution and test statistic! +null_distribution_simulated %>% + visualize() + + shade_p_value(observed_indep_statistic, + direction = "greater") +``` + +We could also visualize the observed statistic against the theoretical null distribution. Note that we skip the `generate()` and `calculate()` steps when using the theoretical approach, and that we now need to provide `method = "theoretical"` to `visualize()`. + +```{r} +#| label: "visualize-indep-theor" +#| warning: false +#| message: false +# visualize the theoretical null distribution and test statistic! +ad_data %>% + specify(Genotype ~ Class) %>% + hypothesize(null = "independence") %>% + visualize(method = "theoretical") + + shade_p_value(observed_indep_statistic, + direction = "greater") +``` + +To visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and further provide `method = "both"`. + +```{r} +#| label: "visualize-indep-both" +#| warning: false +#| message: false +# visualize both null distributions and the test statistic! +null_distribution_simulated %>% + visualize(method = "both") + + shade_p_value(observed_indep_statistic, + direction = "greater") +``` + +Either way, it looks like our observed test statistic would be fairly unlikely if there were actually no association between cognition and genotype. More exactly, we can calculate the p-value: + +```{r} +#| label: "p-value-indep" +#| warning: false +#| message: false +# calculate the p value from the observed statistic and null distribution +p_value_independence <- null_distribution_simulated %>% + get_p_value(obs_stat = observed_indep_statistic, + direction = "greater") + +p_value_independence +``` + +Thus, if there were really no relationship between cognition and genotype, the probability that we would see a statistic as or more extreme than `r observed_indep_statistic` is approximately `r p_value_independence`. + +Note that, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this: + +```{r} +#| label: "chisq-indep-wrapper" +#| message: false +#| warning: false +chisq_test(ad_data, Genotype ~ Class) +``` + + +## Goodness of fit + +Now, moving on to a chi-squared goodness of fit test, we'll take a look at just the genotype data. Many papers have investigated the relationship of Apolipoprotein E to diseases. For example, [Song _et al_ (2004)](https://annals.org/aim/article-abstract/717641/meta-analysis-apolipoprotein-e-genotypes-risk-coronary-heart-disease) conducted a meta-analysis of numerous studies that looked at this gene and heart disease. In their paper, they describe the frequency of the different genotypes across many samples. For the cognition study, it might be interesting to see if our sample of genotypes was consistent with this literature (treating the rates, for this analysis, as known). + +The rates of the meta-analysis and our observed data are: + +```{r} +#| label: "rates" +# Song, Y., Stampfer, M. J., & Liu, S. (2004). Meta-Analysis: Apolipoprotein E +# Genotypes and Risk for Coronary Heart Disease. Annals of Internal Medicine, +# 141(2), 137. +meta_rates <- c("E2E2" = 0.71, "E2E3" = 11.4, "E2E4" = 2.32, + "E3E3" = 61.0, "E3E4" = 22.6, "E4E4" = 2.22) +meta_rates <- meta_rates/sum(meta_rates) # these add up to slightly > 100% + +obs_rates <- table(ad_data$Genotype)/nrow(ad_data) +round(cbind(obs_rates, meta_rates) * 100, 2) +``` + +Suppose our null hypothesis is that `Genotype` follows the same frequency distribution as the meta-analysis. Lets now test whether this difference in distributions is statistically significant. + +First, to carry out this hypothesis test, we would calculate our observed statistic. + +```{r} +#| label: "observed-gof-statistic" +#| warning: false +#| message: false +# calculating the null distribution +observed_gof_statistic <- ad_data %>% + specify(response = Genotype) %>% + hypothesize(null = "point", p = meta_rates) %>% + calculate(stat = "Chisq") +``` + +The observed statistic is `r observed_gof_statistic`. Now, generating a null distribution, by just dropping in a call to `generate()`: + + +```{r} +#| label: "null-distribution-gof" +#| warning: false +#| message: false +# generating a null distribution +null_distribution_gof <- ad_data %>% + specify(response = Genotype) %>% + hypothesize(null = "point", p = meta_rates) %>% + generate(reps = 5000, type = "simulate") %>% + calculate(stat = "Chisq") +``` + +Again, to get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: + +```{r} +#| label: "visualize-indep-gof" +#| warning: false +#| message: false +# visualize the null distribution and test statistic! +null_distribution_gof %>% + visualize() + + shade_p_value(observed_gof_statistic, + direction = "greater") +``` + +This statistic seems like it would be unlikely if our rates were the same as the rates from the meta-analysis! How unlikely, though? Calculating the p-value: + +```{r} +#| label: "get-p-value-gof" +#| warning: false +#| message: false +# calculate the p-value +p_value_gof <- null_distribution_gof %>% + get_p_value(observed_gof_statistic, + direction = "greater") + +p_value_gof +``` + +Thus, if each genotype occurred at the same rate as the Song paper, the probability that we would see a distribution like the one we did is approximately `r p_value_gof`. + +Again, equivalently to the steps shown above, the package supplies a wrapper function, `chisq_test`, to carry out chi-squared goodness of fit tests on tidy data. The syntax goes like this: + +```{r} +#| label: "chisq-gof-wrapper" +#| message: false +#| warning: false +chisq_test(ad_data, response = Genotype, p = meta_rates) +``` + + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/content/learn/work/_index.md b/learn/work/_index.md similarity index 100% rename from content/learn/work/_index.md rename to learn/work/_index.md diff --git a/learn/work/bayes-opt/figs/bo-param-plot-1.svg b/learn/work/bayes-opt/figs/bo-param-plot-1.svg new file mode 100644 index 00000000..18abd7ea --- /dev/null +++ b/learn/work/bayes-opt/figs/bo-param-plot-1.svg @@ -0,0 +1,359 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# Components + + + + + + + + + + +Cost (log-2) + + + + + + + + + + +Radial Basis Function sigma (log-10) + + + + + + + +0 +10 +20 +30 +40 + + + + + +0 +10 +20 +30 +40 + + + + + +0 +10 +20 +30 +40 +-7.5 +-5.0 +-2.5 +0.0 + + + + +-10 +-5 +0 +5 + + + + +0 +5 +10 +15 +20 + + + + + +Iterations + + diff --git a/learn/work/bayes-opt/figs/bo-plot-1.svg b/learn/work/bayes-opt/figs/bo-plot-1.svg new file mode 100644 index 00000000..127cb3e3 --- /dev/null +++ b/learn/work/bayes-opt/figs/bo-plot-1.svg @@ -0,0 +1,265 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.25 +0.50 +0.75 + + + + + + + + +0 +10 +20 +30 +40 +Iteration +roc_auc + + diff --git a/learn/work/bayes-opt/index.qmd b/learn/work/bayes-opt/index.qmd new file mode 100644 index 00000000..4506bbd3 --- /dev/null +++ b/learn/work/bayes-opt/index.qmd @@ -0,0 +1,218 @@ +--- +title: "Iterative Bayesian optimization of a classification model" +categories: + - model tuning + - Bayesian optimization + - SVMs +type: learn-subsection +weight: 3 +description: | + Identify the best hyperparameters for a model using Bayesian optimization of iterative search. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +library(tune) +library(kernlab) +library(rlang) +library(doMC) +library(themis) +registerDoMC(cores = parallel::detectCores()) + +pkgs <- c("modeldata", "kernlab", "tidymodels", "themis") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +Many of the examples for model tuning focus on [grid search](/learn/work/tune-svm/). For that method, all the candidate tuning parameter combinations are defined prior to evaluation. Alternatively, _iterative search_ can be used to analyze the existing tuning parameter results and then _predict_ which tuning parameters to try next. + +There are a variety of methods for iterative search and the focus in this article is on _Bayesian optimization_. For more information on this method, these resources might be helpful: + +* [_Practical bayesian optimization of machine learning algorithms_](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=Practical+Bayesian+Optimization+of+Machine+Learning+Algorithms&btnG=) (2012). J Snoek, H Larochelle, and RP Adams. Advances in neural information. + +* [_A Tutorial on Bayesian Optimization for Machine Learning_](https://www.cs.toronto.edu/~rgrosse/courses/csc411_f18/tutorials/tut8_adams_slides.pdf) (2018). R Adams. + + * [_Gaussian Processes for Machine Learning_](http://www.gaussianprocess.org/gpml/) (2006). C E Rasmussen and C Williams. + +* [Other articles!](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q="Bayesian+Optimization"&btnG=) + + +## Cell segmenting revisited + +To demonstrate this approach to tuning models, let's return to the cell segmentation data from the [Getting Started](/start/resampling/) article on resampling: + +```{r} +#| label: "import-data" +library(tidymodels) +library(modeldata) + +# Load data +data(cells) + +set.seed(2369) +tr_te_split <- initial_split(cells %>% select(-case), prop = 3/4) +cell_train <- training(tr_te_split) +cell_test <- testing(tr_te_split) + +set.seed(1697) +folds <- vfold_cv(cell_train, v = 10) +``` + +## The tuning scheme + +Since the predictors are highly correlated, we can used a recipe to convert the original predictors to principal component scores. There is also slight class imbalance in these data; about `r floor(mean(cells$class == "PS") * 100)`% of the data are poorly segmented. To mitigate this, the data will be down-sampled at the end of the pre-processing so that the number of poorly and well segmented cells occur with equal frequency. We can use a recipe for all this pre-processing, but the number of principal components will need to be _tuned_ so that we have enough (but not too many) representations of the data. + +```{r} +#| label: "recipe" +library(themis) + +cell_pre_proc <- + recipe(class ~ ., data = cell_train) %>% + step_YeoJohnson(all_predictors()) %>% + step_normalize(all_predictors()) %>% + step_pca(all_predictors(), num_comp = tune()) %>% + step_downsample(class) +``` + +In this analysis, we will use a support vector machine to model the data. Let's use a radial basis function (RBF) kernel and tune its main parameter ($\sigma$). Additionally, the main SVM parameter, the cost value, also needs optimization. + +```{r} +#| label: "model" +svm_mod <- + svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) %>% + set_engine("kernlab") +``` + +These two objects (the recipe and model) will be combined into a single object via the `workflow()` function from the [workflows](https://workflows.tidymodels.org/) package; this object will be used in the optimization process. + +```{r} +#| label: "workflow" +svm_wflow <- + workflow() %>% + add_model(svm_mod) %>% + add_recipe(cell_pre_proc) +``` + +From this object, we can derive information about what parameters are slated to be tuned. A parameter set is derived by: + +```{r} +#| label: "pset" +svm_set <- extract_parameter_set_dials(svm_wflow) +svm_set +``` + +The default range for the number of PCA components is rather small for this data set. A member of the parameter set can be modified using the `update()` function. Let's constrain the search to one to twenty components by updating the `num_comp` parameter. Additionally, the lower bound of this parameter is set to zero which specifies that the original predictor set should also be evaluated (i.e., with no PCA step at all): + +```{r} +#| label: "update" +svm_set <- + svm_set %>% + update(num_comp = num_comp(c(0L, 20L))) +``` + +## Sequential tuning + +Bayesian optimization is a sequential method that uses a model to predict new candidate parameters for assessment. When scoring potential parameter value, the mean and variance of performance are predicted. The strategy used to define how these two statistical quantities are used is defined by an _acquisition function_. + +For example, one approach for scoring new candidates is to use a confidence bound. Suppose accuracy is being optimized. For a metric that we want to maximize, a lower confidence bound can be used. The multiplier on the standard error (denoted as $\kappa$) is a value that can be used to make trade-offs between **exploration** and **exploitation**. + + * **Exploration** means that the search will consider candidates in untested space. + + * **Exploitation** focuses in areas where the previous best results occurred. + +The variance predicted by the Bayesian model is mostly spatial variation; the value will be large for candidate values that are not close to values that have already been evaluated. If the standard error multiplier is high, the search process will be more likely to avoid areas without candidate values in the vicinity. + +We'll use another acquisition function, _expected improvement_, that determines which candidates are likely to be helpful relative to the current best results. This is the default acquisition function. More information on these functions can be found in the [package vignette for acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html). + +```{r} +#| label: "search" +#| cache: false +set.seed(12) +search_res <- + svm_wflow %>% + tune_bayes( + resamples = folds, + # To use non-default parameter ranges + param_info = svm_set, + # Generate five at semi-random to start + initial = 5, + iter = 50, + # How to measure performance? + metrics = metric_set(roc_auc), + control = control_bayes(no_improve = 30, verbose = TRUE) + ) +``` + +The resulting tibble is a stacked set of rows of the rsample object with an additional column for the iteration number: + +```{r} +#| label: "show-iters" +search_res +``` + +As with grid search, we can summarize the results over resamples: + +```{r} +#| label: "summarize-iters" +estimates <- + collect_metrics(search_res) %>% + arrange(.iter) + +estimates +``` + + +The best performance of the initial set of candidate values was `AUC = `r max(estimates$mean[estimates$.iter == 0])` `. The best results were achieved at iteration `r estimates$.iter[which.max(estimates$mean)]` with a corresponding AUC value of `r max(estimates$mean)`. The five best results are: + +```{r} +#| label: "best" +show_best(search_res, metric = "roc_auc") +``` + +A plot of the search iterations can be created via: + +```{r} +#| label: "bo-plot" +autoplot(search_res, type = "performance") +``` + +There are many parameter combinations have roughly equivalent results. + +How did the parameters change over iterations? + + +```{r} +#| label: "bo-param-plot" +#| fig-width: 9 +autoplot(search_res, type = "parameters") + + labs(x = "Iterations", y = NULL) +``` + + + + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` + diff --git a/learn/work/case-weights/figs/unnamed-chunk-5-1.svg b/learn/work/case-weights/figs/unnamed-chunk-5-1.svg new file mode 100644 index 00000000..487465c9 --- /dev/null +++ b/learn/work/case-weights/figs/unnamed-chunk-5-1.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + +0 +2000 +4000 +days +y + + diff --git a/learn/work/case-weights/figs/unnamed-chunk-6-1.svg b/learn/work/case-weights/figs/unnamed-chunk-6-1.svg new file mode 100644 index 00000000..6b9ad495 --- /dev/null +++ b/learn/work/case-weights/figs/unnamed-chunk-6-1.svg @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + +0 +2000 +4000 +days +value + +base + + + + + + +0.99 +0.999 +0.9999 + + diff --git a/learn/work/case-weights/index.qmd b/learn/work/case-weights/index.qmd new file mode 100644 index 00000000..360ecf8c --- /dev/null +++ b/learn/work/case-weights/index.qmd @@ -0,0 +1,178 @@ +--- +title: "Creating case weights based on time" +categories: + - model fitting + - case weights + - time series +type: learn-subsection +weight: 5 +description: | + Create models that use coefficients, extract them from fitted models, and visualize them. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +pkgs <- c("tidymodels") +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +This article demonstrates how to create and use importance weights in a predictive model. Using importance weights is a way to have our model care more about some observations than others. + +## Example Data + +To demonstrate we will use the Chicago data from the modeldata package. + +```{r} +library(tidymodels) +data(Chicago) + +Chicago <- Chicago %>% + select(ridership, date, one_of(stations)) +``` + +From `?Chicago` + +> These data are from Kuhn and Johnson (2020) and contain an abbreviated training set for modeling the number of people (in thousands) who enter the Clark and Lake L station. + +> The date column corresponds to the current date. The columns with station names (Austin through California) are a sample of the columns used in the original analysis (for filesize reasons). These are 14 day lag variables (i.e. date - 14 days). There are columns related to weather and sports team schedules. + +For simplicity, we have limited our view to the date and station variables. + +## Creating weights + +This data set contains daily information from `r min(Chicago$date)` to `r max(Chicago$date)`. We will pretend that it is January 1st, 2016 and we want to predict the ridership for the remainder of 2016 using the date and station variables as predictors. Without any weighting, all the previous observations would have the same influence on the model. This may not be ideal since some observations appear a long time ago and not be as representative of the future as more recent observations. + +We could just use recent observations to fit the model, ensuring that the training data stays as close to the testing data as possible. While a tempting idea, it would throw out a lot of informative data. Instead let us assign a weight to each observation, related to how long ago the observation was taken. This way we are not completely throwing away any observation; we are only giving less weight to data farther in the past. + +We need to decide on a way to calculate the case weights. The main thing constraint is that the weight cannot be negative, and it would be nice if today was weighted as 1. So we need a function that is 1 when `x = 0` and decreasing otherwise. There are many kinds of functions like that, and we will be using this exponential decay function + +$$ weight = base ^ x $$ + +where `base` is some constant and `x` is the number of days. To make sure that we select a reasonable `base`, we need to do some manual testing, starting with looking at how old the oldest observation is. + +```{r} +difftime("2016-01-01", min(Chicago$date)) +``` + +Using this information we can visualize the weight curve, to see if we like the value of `base`. + +```{r} +tibble_days <- tibble(days = 0:5457) + +tibble_days %>% + ggplot(aes(days)) + + geom_function(fun = ~ 0.99 ^ .x) +``` + +setting `base` to 0.99 appears to be down weighted too much. Any observation more than a year old would have no influence. + +Let us try a few more values to find + +```{r} +map_dfr( + c(0.99, 0.999, 0.9999), + ~ tibble_days %>% mutate(base = factor(.x), value = .x ^ days) +) %>% + ggplot(aes(days, value, group = base, color = base)) + + geom_line() +``` + +From this, we could pick something around 0.999 since it gives a better balance. Let's create a small function to help us encode this weight. + +```{r} +weights_from_dates <- function(x, ref) { + if_else( + condition = x >= ref, + true = 1, # <- Notice that I'm setting any future weight to 1. + false = 0.999 ^ as.numeric(difftime(ref, x, units = "days")) + ) +} +``` + +We then modify `Chicago` to add a weight column, explicitly making it an importance weight with `importance_weight()`. + +```{r} +Chicago <- Chicago %>% + mutate(weight = weights_from_dates(date, "2016-01-01"), + weight = importance_weights(weight)) +``` + +This approach to creating importance weights from dates is not limited to cases where we have daily observations. You are free to create similar weights if you have gaps or repeated observations within the same day. Likewise, you don't need to use days as the unit. Seconds, weeks, or years could be used as well. + +## Modeling + +We start by splitting up our data into a training and testing set based on the day `"2016-01-01"`. We added weights to the data set before splitting it so each set has weights. + +```{r} +Chicago_train <- Chicago %>% filter(date < "2016-01-01") +Chicago_test <- Chicago %>% filter(date >= "2016-01-01") +``` + +Next, we are going to create a recipe. The weights won't have any influence on the preprocessing since none of these operations are supervised and we are using importance weights. + +```{r} +base_recipe <- + recipe(ridership ~ ., data = Chicago_train) %>% + # Create date features + step_date(date) %>% + step_holiday(date, keep_original_cols = FALSE) %>% + # Remove any columns with a single unique value + step_zv(all_predictors()) %>% + # Normalize all the numerical features + step_normalize(all_numeric_predictors()) %>% + # Perform PCA to reduce the correlation bet the stations + step_pca(all_numeric_predictors(), threshold = 0.95) +``` + +Next we need to build the rest of the workflow. We use a linear regression specification + +```{r} +lm_spec <- + linear_reg() %>% + set_engine("lm") +``` + +and we add these together in the workflow. To activate the case weights, we use the `add_case_weights()` function to specify the name of the case weights being used. + +```{r} +lm_wflow <- + workflow() %>% + add_case_weights(weight) %>% + add_recipe(base_recipe) %>% + add_model(lm_spec) + +lm_wflow +``` + +With all that done we can fit the workflow with the usual syntax: + +```{r} +lm_fit <- fit(lm_wflow, data = Chicago_train) +lm_fit +``` + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/content/learn/work/data.svg b/learn/work/data.svg similarity index 100% rename from content/learn/work/data.svg rename to learn/work/data.svg diff --git a/learn/work/nested-resampling/figs/choose-1.svg b/learn/work/nested-resampling/figs/choose-1.svg new file mode 100644 index 00000000..8ad0cee3 --- /dev/null +++ b/learn/work/nested-resampling/figs/choose-1.svg @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +10 +20 +30 + + + + + + + + + + + + + + + +0.25 +0.5 +1 +2 +4 +8 +16 +32 +64 +128 +256 +SVM Cost +count + + diff --git a/learn/work/nested-resampling/figs/not-nested-1.svg b/learn/work/nested-resampling/figs/not-nested-1.svg new file mode 100644 index 00000000..bafe2ea4 --- /dev/null +++ b/learn/work/nested-resampling/figs/not-nested-1.svg @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +2.6 +2.8 +3.0 +3.2 +3.4 + + + + + + + + +1 +8 +64 +SVM Cost +RMSE + + diff --git a/learn/work/nested-resampling/figs/rmse-plot-1.svg b/learn/work/nested-resampling/figs/rmse-plot-1.svg new file mode 100644 index 00000000..e7c81dc5 --- /dev/null +++ b/learn/work/nested-resampling/figs/rmse-plot-1.svg @@ -0,0 +1,162 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +3.0 +3.5 + + + + + +1 +8 +64 +SVM Cost +Inner RMSE + + diff --git a/learn/work/nested-resampling/img/resampling.svg b/learn/work/nested-resampling/img/resampling.svg new file mode 100644 index 00000000..33dc40f5 --- /dev/null +++ b/learn/work/nested-resampling/img/resampling.svg @@ -0,0 +1,172 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Produced by OmniGraffle 7.13.1 + 2020-03-15 00:14:09 +0000 + + + Canvas 1 + + Layer 1 + + + + + All Data + + + + + + + Training + + + + + + + Testing + + + + + + + + + + + + + Assessment + + + + + + + Analysis + + + + + + + Resample 1 + + + + + + + + + + + + + + + + Assessment + + + + + + + Analysis + + + + + + + Resample 2 + + + + + + + + + + + + + + + + Assessment + + + + + + + Analysis + + + + + + + Resample + B + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/learn/work/nested-resampling/index.qmd b/learn/work/nested-resampling/index.qmd new file mode 100644 index 00000000..19d2b8c2 --- /dev/null +++ b/learn/work/nested-resampling/index.qmd @@ -0,0 +1,301 @@ +--- +title: "Nested resampling" +categories: + - nested resampling + - SVMs +type: learn-subsection +weight: 2 +description: | + Estimate the best hyperparameters for a model using nested resampling. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +library(scales) +library(mlbench) +library(kernlab) +library(furrr) + +pkgs <- c("tidymodels", "scales", "mlbench", "kernlab", "furrr") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +In this article, we discuss an alternative method for evaluating and tuning models, called [nested resampling](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C7&q=%22nested+resampling%22+inner+outer&btnG=). While it is more computationally taxing and challenging to implement than other resampling methods, it has the potential to produce better estimates of model performance. + +## Resampling models + +A typical scheme for splitting the data when developing a predictive model is to create an initial split of the data into a training and test set. If resampling is used, it is executed on the training set. A series of binary splits is created. In rsample, we use the term *analysis set* for the data that are used to fit the model and the term *assessment set* for the set used to compute performance: + +```{r} +#| label: "resampling-fig" +#| echo: false +#| fig-align: center +#| out-width: "70%" +knitr::include_graphics("img/resampling.svg") +``` + +A common method for tuning models is [grid search](/learn/work/tune-svm/) where a candidate set of tuning parameters is created. The full set of models for every combination of the tuning parameter grid and the resamples is fitted. Each time, the assessment data are used to measure performance and the average value is determined for each tuning parameter. + +The potential problem is that once we pick the tuning parameter associated with the best performance, this performance value is usually quoted as the performance of the model. There is serious potential for *optimization bias* since we use the same data to tune the model and to assess performance. This would result in an optimistic estimate of performance. + +Nested resampling uses an additional layer of resampling that separates the tuning activities from the process used to estimate the efficacy of the model. An *outer* resampling scheme is used and, for every split in the outer resample, another full set of resampling splits are created on the original analysis set. For example, if 10-fold cross-validation is used on the outside and 5-fold cross-validation on the inside, a total of 500 models will be fit. The parameter tuning will be conducted 10 times and the best parameters are determined from the average of the 5 assessment sets. This process occurs 10 times. + +Once the tuning results are complete, a model is fit to each of the outer resampling splits using the best parameter associated with that resample. The average of the outer method's assessment sets are a unbiased estimate of the model. + +We will simulate some regression data to illustrate the methods. The mlbench package has a function `mlbench::mlbench.friedman1()` that can simulate a complex regression data structure from the [original MARS publication](https://scholar.google.com/scholar?hl=en&q=%22Multivariate+adaptive+regression+splines%22&btnG=&as_sdt=1%2C7&as_sdtp=). A training set size of 100 data points are generated as well as a large set that will be used to characterize how well the resampling procedure performed. + +```{r} +#| label: "sim-data" +library(mlbench) +sim_data <- function(n) { + tmp <- mlbench.friedman1(n, sd = 1) + tmp <- cbind(tmp$x, tmp$y) + tmp <- as.data.frame(tmp) + names(tmp)[ncol(tmp)] <- "y" + tmp +} + +set.seed(9815) +train_dat <- sim_data(100) +large_dat <- sim_data(10^5) +``` + +## Nested resampling + +To get started, the types of resampling methods need to be specified. This isn't a large data set, so 5 repeats of 10-fold cross validation will be used as the *outer* resampling method for generating the estimate of overall performance. To tune the model, it would be good to have precise estimates for each of the values of the tuning parameter so let's use 25 iterations of the bootstrap. This means that there will eventually be `5 * 10 * 25 = 1250` models that are fit to the data *per tuning parameter*. These models will be discarded once the performance of the model has been quantified. + +To create the tibble with the resampling specifications: + +```{r} +#| label: "tibble-gen" +library(tidymodels) +results <- nested_cv(train_dat, + outside = vfold_cv(repeats = 5), + inside = bootstraps(times = 25)) +results +``` + +The splitting information for each resample is contained in the `split` objects. Focusing on the second fold of the first repeat: + +```{r} +#| label: "split-example" +results$splits[[2]] +``` + +`<90/10/100>` indicates the number of observations in the analysis set, assessment set, and the original data. + +Each element of `inner_resamples` has its own tibble with the bootstrapping splits. + +```{r} +#| label: "inner-splits" +results$inner_resamples[[5]] +``` + +These are self-contained, meaning that the bootstrap sample is aware that it is a sample of a specific 90% of the data: + +```{r} +#| label: "inner-boot-split" +results$inner_resamples[[5]]$splits[[1]] +``` + +To start, we need to define how the model will be created and measured. Let's use a radial basis support vector machine model via the function `kernlab::ksvm`. This model is generally considered to have *two* tuning parameters: the SVM cost value and the kernel parameter `sigma`. For illustration purposes here, only the cost value will be tuned and the function `kernlab::sigest` will be used to estimate `sigma` during each model fit. This is automatically done by `ksvm`. + +After the model is fit to the analysis set, the root-mean squared error (RMSE) is computed on the assessment set. **One important note:** for this model, it is critical to center and scale the predictors before computing dot products. We don't do this operation here because `mlbench.friedman1` simulates all of the predictors to be standardized uniform random variables. + +Our function to fit the model and compute the RMSE is: + +```{r} +#| label: "rmse-func" +library(kernlab) + +# `object` will be an `rsplit` object from our `results` tibble +# `cost` is the tuning parameter +svm_rmse <- function(object, cost = 1) { + y_col <- ncol(object$data) + mod <- + svm_rbf(mode = "regression", cost = cost) %>% + set_engine("kernlab") %>% + fit(y ~ ., data = analysis(object)) + + holdout_pred <- + predict(mod, assessment(object) %>% dplyr::select(-y)) %>% + bind_cols(assessment(object) %>% dplyr::select(y)) + rmse(holdout_pred, truth = y, estimate = .pred)$.estimate +} + +# In some case, we want to parameterize the function over the tuning parameter: +rmse_wrapper <- function(cost, object) svm_rmse(object, cost) +``` + +For the nested resampling, a model needs to be fit for each tuning parameter and each bootstrap split. To do this, create a wrapper: + +```{r} +#| label: "inner-tune-func" +# `object` will be an `rsplit` object for the bootstrap samples +tune_over_cost <- function(object) { + tibble(cost = 2 ^ seq(-2, 8, by = 1)) %>% + mutate(RMSE = map_dbl(cost, rmse_wrapper, object = object)) +} +``` + +Since this will be called across the set of outer cross-validation splits, another wrapper is required: + +```{r} +#| label: "inner-func" +# `object` is an `rsplit` object in `results$inner_resamples` +summarize_tune_results <- function(object) { + # Return row-bound tibble that has the 25 bootstrap results + map_df(object$splits, tune_over_cost) %>% + # For each value of the tuning parameter, compute the + # average RMSE which is the inner bootstrap estimate. + group_by(cost) %>% + summarize(mean_RMSE = mean(RMSE, na.rm = TRUE), + n = length(RMSE), + .groups = "drop") +} +``` + +Now that those functions are defined, we can execute all the inner resampling loops: + +```{r} +#| label: "inner-runs" +#| eval: false +tuning_results <- map(results$inner_resamples, summarize_tune_results) +``` + +Alternatively, since these computations can be run in parallel, we can use the furrr package. Instead of using `map()`, the function `future_map()` parallelizes the iterations using the [future package](https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html). The `multisession` plan uses the local cores to process the inner resampling loop. The end results are the same as the sequential computations. + +```{r} +#| label: "inner-runs-parallel" +#| warning: false +library(furrr) +plan(multisession) + +tuning_results <- future_map(results$inner_resamples, summarize_tune_results) +``` + +The object `tuning_results` is a list of data frames for each of the 50 outer resamples. + +Let's make a plot of the averaged results to see what the relationship is between the RMSE and the tuning parameters for each of the inner bootstrapping operations: + +```{r} +#| label: "rmse-plot" +#| fig-height: 4 +#| message: false +library(scales) + +pooled_inner <- tuning_results %>% bind_rows + +best_cost <- function(dat) dat[which.min(dat$mean_RMSE),] + +p <- + ggplot(pooled_inner, aes(x = cost, y = mean_RMSE)) + + scale_x_continuous(trans = 'log2') + + xlab("SVM Cost") + ylab("Inner RMSE") + +for (i in 1:length(tuning_results)) + p <- p + + geom_line(data = tuning_results[[i]], alpha = .2) + + geom_point(data = best_cost(tuning_results[[i]]), pch = 16, alpha = 3/4) + +p <- p + geom_smooth(data = pooled_inner, se = FALSE) +p +``` + +Each gray line is a separate bootstrap resampling curve created from a different 90% of the data. The blue line is a LOESS smooth of all the results pooled together. + +To determine the best parameter estimate for each of the outer resampling iterations: + +```{r} +#| label: "choose" +#| fig-height: 4 +cost_vals <- + tuning_results %>% + map_df(best_cost) %>% + select(cost) + +results <- + bind_cols(results, cost_vals) %>% + mutate(cost = factor(cost, levels = paste(2 ^ seq(-2, 8, by = 1)))) + +ggplot(results, aes(x = cost)) + + geom_bar() + + xlab("SVM Cost") + + scale_x_discrete(drop = FALSE) +``` + +Most of the resamples produced an optimal cost value of 2.0, but the distribution is right-skewed due to the flat trend in the resampling profile once the cost value becomes 10 or larger. + +Now that we have these estimates, we can compute the outer resampling results for each of the `r nrow(results)` splits using the corresponding tuning parameter value: + +```{r} +#| label: "run-out-r" +results <- + results %>% + mutate(RMSE = map2_dbl(splits, cost, svm_rmse)) + +summary(results$RMSE) +``` + +The estimated RMSE for the model tuning process is `r round(mean(results$RMSE), 2)`. + +What is the RMSE estimate for the non-nested procedure when only the outer resampling method is used? For each cost value in the tuning grid, `r nrow(results)` SVM models are fit and their RMSE values are averaged. The table of cost values and mean RMSE estimates is used to determine the best cost value. The associated RMSE is the biased estimate. + +```{r} +#| label: "not-nested" +#| fig-height: 4 +not_nested <- + map(results$splits, tune_over_cost) %>% + bind_rows + +outer_summary <- not_nested %>% + group_by(cost) %>% + summarize(outer_RMSE = mean(RMSE), n = length(RMSE)) + +outer_summary + +ggplot(outer_summary, aes(x = cost, y = outer_RMSE)) + + geom_point() + + geom_line() + + scale_x_continuous(trans = 'log2') + + xlab("SVM Cost") + ylab("RMSE") +``` + +The non-nested procedure estimates the RMSE to be `r round(min(outer_summary$outer_RMSE), 2)`. Both estimates are fairly close. + +The approximately true RMSE for an SVM model with a cost value of 2.0 can be approximated with the large sample that was simulated at the beginning. + +```{r} +#| label: "large-sample-estimate" +finalModel <- ksvm(y ~ ., data = train_dat, C = 2) +large_pred <- predict(finalModel, large_dat[, -ncol(large_dat)]) +sqrt(mean((large_dat$y - large_pred) ^ 2, na.rm = TRUE)) +``` + +The nested procedure produces a closer estimate to the approximate truth but the non-nested estimate is very similar. + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/content/learn/work/thumbnail.png b/learn/work/thumbnail.png similarity index 100% rename from content/learn/work/thumbnail.png rename to learn/work/thumbnail.png diff --git a/learn/work/tune-svm/figs/augment-preds-1.svg b/learn/work/tune-svm/figs/augment-preds-1.svg new file mode 100644 index 00000000..72f7ceb0 --- /dev/null +++ b/learn/work/tune-svm/figs/augment-preds-1.svg @@ -0,0 +1,489 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +bad + + + + + + + + + + +good + + + + + + + +-1.0 +-0.5 +0.0 +0.5 +1.0 + + + + + +-1.0 +-0.5 +0.0 +0.5 +1.0 +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + +V3 +.pred_good + + diff --git a/learn/work/tune-svm/index.qmd b/learn/work/tune-svm/index.qmd new file mode 100644 index 00000000..5e6cab08 --- /dev/null +++ b/learn/work/tune-svm/index.qmd @@ -0,0 +1,237 @@ +--- +title: "Model tuning via grid search" +categories: + - model tuning + - SVMs +type: learn-subsection +weight: 1 +description: | + Choose hyperparameters for a model by training on a grid of many possible parameter values. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +library(mlbench) +library(kernlab) +library(doMC) +registerDoMC(cores = parallel::detectCores()) + +pkgs <- c("tidymodels", "mlbench", "kernlab") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +This article demonstrates how to tune a model using grid search. Many models have **hyperparameters** that can't be learned directly from a single data set when training the model. Instead, we can train many models in a grid of possible hyperparameter values and see which ones turn out best. + +## Example data + +To demonstrate model tuning, we'll use the Ionosphere data in the mlbench package: + +```{r} +#| label: "load-data" +library(tidymodels) +library(mlbench) +data(Ionosphere) +``` + +From `?Ionosphere`: + +> This radar data was collected by a system in Goose Bay, Labrador. This system consists of a phased array of 16 high-frequency antennas with a total transmitted power on the order of 6.4 kilowatts. See the paper for more details. The targets were free electrons in the ionosphere. "good" radar returns are those showing evidence of some type of structure in the ionosphere. "bad" returns are those that do not; their signals pass through the ionosphere. + +> Received signals were processed using an autocorrelation function whose arguments are the time of a pulse and the pulse number. There were 17 pulse numbers for the Goose Bay system. Instances in this databse are described by 2 attributes per pulse number, corresponding to the complex values returned by the function resulting from the complex electromagnetic signal. See cited below for more details. + +There are 43 predictors and a factor outcome. Two of the predictors are factors (`V1` and `V2`) and the rest are numeric variables that have been scaled to a range of -1 to 1. Note that the two factor predictors have sparse distributions: + +```{r} +#| label: "factor-pred" +table(Ionosphere$V1) +table(Ionosphere$V2) +``` + +There's no point of putting `V2` into any model since is is a zero-variance predictor. `V1` is not but it _could_ be if the resampling process ends up sampling all of the same value. Is this an issue? It might be since the standard R formula infrastructure fails when there is only a single observed value: + +```{r} +#| label: "glm-fail" +#| error: false +#| eval: false +glm(Class ~ ., data = Ionosphere, family = binomial) + +# Surprisingly, this doesn't help: + +glm(Class ~ . - V2, data = Ionosphere, family = binomial) +``` + +Let's remove these two problematic variables: + +```{r} +#| label: "ion-rm" +Ionosphere <- Ionosphere %>% select(-V1, -V2) +``` + +## Inputs for the search + +To demonstrate, we'll fit a radial basis function support vector machine to these data and tune the SVM cost parameter and the $\sigma$ parameter in the kernel function: + +```{r} +#| label: "svm-mod" +svm_mod <- + svm_rbf(cost = tune(), rbf_sigma = tune()) %>% + set_mode("classification") %>% + set_engine("kernlab") +``` + +In this article, tuning will be demonstrated in two ways, using: + +- a standard R formula, and +- a recipe. + +Let's create a simple recipe here: + +```{r} +#| label: "rec" +iono_rec <- + recipe(Class ~ ., data = Ionosphere) %>% + # remove any zero variance predictors + step_zv(all_predictors()) %>% + # remove any linear combinations + step_lincomb(all_numeric()) +``` + +The only other required item for tuning is a resampling strategy as defined by an rsample object. Let's demonstrate using basic bootstrapping: + +```{r} +#| label: "rs" +set.seed(4943) +iono_rs <- bootstraps(Ionosphere, times = 30) +``` + +## Optional inputs + +An _optional_ step for model tuning is to specify which metrics should be computed using the out-of-sample predictions. For classification, the default is to calculate the log-likelihood statistic and overall accuracy. Instead of the defaults, the area under the ROC curve will be used. To do this, a yardstick package function can be used to create a metric set: + +```{r} +#| label: "roc" +roc_vals <- metric_set(roc_auc) +``` + +If no grid or parameters are provided, a set of 10 hyperparameters are created using a space-filling design (via a Latin hypercube). A grid can be given in a data frame where the parameters are in columns and parameter combinations are in rows. Here, the default will be used. + +Also, a control object can be passed that specifies different aspects of the search. Here, the verbose option is turned off and the option to save the out-of-sample predictions is turned on. + +```{r} +#| label: "ctrl" +ctrl <- control_grid(verbose = FALSE, save_pred = TRUE) +``` + +## Executing with a formula + +First, we can use the formula interface: + +```{r} +#| label: "grid" +#| message: false +set.seed(35) +formula_res <- + svm_mod %>% + tune_grid( + Class ~ ., + resamples = iono_rs, + metrics = roc_vals, + control = ctrl + ) +formula_res +``` + +The `.metrics` column contains tibbles of the performance metrics for each tuning parameter combination: + +```{r} +#| label: "raw-metrics" +formula_res %>% + select(.metrics) %>% + slice(1) %>% + pull(1) +``` + +To get the final resampling estimates, the `collect_metrics()` function can be used on the grid object: + +```{r} +#| label: "metric-estimates" +estimates <- collect_metrics(formula_res) +estimates +``` + +The top combinations are: + +```{r} +#| label: "sorted-metrics" +show_best(formula_res, metric = "roc_auc") +``` + +## Executing with a recipe + +Next, we can use the same syntax but pass a *recipe* in as the pre-processor argument: + +```{r} +#| label: "recipe" +set.seed(325) +recipe_res <- + svm_mod %>% + tune_grid( + iono_rec, + resamples = iono_rs, + metrics = roc_vals, + control = ctrl + ) +recipe_res +``` + +The best setting here is: + +```{r} +#| label: "best-rec" +show_best(recipe_res, metric = "roc_auc") +``` + +## Out-of-sample predictions + +If we used `save_pred = TRUE` to keep the out-of-sample predictions for each resample during tuning, we can obtain those predictions, along with the tuning parameters and resample identifier, using `collect_predictions()`: + +```{r} +#| label: "rec-preds" +collect_predictions(recipe_res) +``` + +We can obtain the hold-out sets for all the resamples augmented with the predictions using `augment()`, which provides opportunities for flexible visualization of model results: + +```{r} +#| label: "augment-preds" +augment(recipe_res) %>% + ggplot(aes(V3, .pred_good, color = Class)) + + geom_point(show.legend = FALSE) + + facet_wrap(~Class) +``` + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/content/learn/work/tune-svm/ionosphere.csv b/learn/work/tune-svm/ionosphere.csv similarity index 100% rename from content/learn/work/tune-svm/ionosphere.csv rename to learn/work/tune-svm/ionosphere.csv diff --git a/learn/work/tune-text/figs/grid-plot-1.svg b/learn/work/tune-text/figs/grid-plot-1.svg new file mode 100644 index 00000000..69b4f2c1 --- /dev/null +++ b/learn/work/tune-text/figs/grid-plot-1.svg @@ -0,0 +1,531 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# Hash Features: 256 + + + + + + + + + + +# Hash Features: 1024 + + + + + + + + + + +# Hash Features: 4096 + + + + + + +0.001 +0.010 +0.100 +1.000 + + + + +0.001 +0.010 +0.100 +1.000 + + + + +0.001 +0.010 +0.100 +1.000 +0.5 +0.6 +0.7 +0.8 + + + + +Amount of Regularization +roc_auc + +P +r +o +p +o +r +t +i +o +n + +o +f + +L +a +s +s +o + +P +e +n +a +l +t +y + + + + + + + + + + + + + + + +0.01 +0.25 +0.50 +0.75 +1.00 + + diff --git a/learn/work/tune-text/figs/iter-plot-1.svg b/learn/work/tune-text/figs/iter-plot-1.svg new file mode 100644 index 00000000..ade51895 --- /dev/null +++ b/learn/work/tune-text/figs/iter-plot-1.svg @@ -0,0 +1,183 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.5 +0.6 +0.7 +0.8 + + + + + + + +0 +10 +20 +Iteration +roc_auc + + diff --git a/learn/work/tune-text/figs/var-plot-1.svg b/learn/work/tune-text/figs/var-plot-1.svg new file mode 100644 index 00000000..929f5cfa --- /dev/null +++ b/learn/work/tune-text/figs/var-plot-1.svg @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +1000 +2000 +3000 + + + + + + + +0.1 +1.0 +10.0 +penalty +Number of retained predictors +mixture = 0.01 and 4096 features + + diff --git a/learn/work/tune-text/index.qmd b/learn/work/tune-text/index.qmd new file mode 100644 index 00000000..8fe9c22f --- /dev/null +++ b/learn/work/tune-text/index.qmd @@ -0,0 +1,417 @@ +--- +title: "Tuning text models" +categories: + - model tuning + - text analysis + - logistic regression + - Bayesian optimization + - extracting results + +type: learn-subsection +weight: 4 +description: | + Prepare text data for predictive modeling and tune with both grid and iterative search. +toc: true +toc-depth: 2 +include-after-body: ../../../resources.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +library(tidymodels) +library(stopwords) +library(doMC) +registerDoMC(cores = parallel::detectCores()) + +pkgs <- c("tidymodels", "textrecipes", "textfeatures", "stopwords") + +theme_set(theme_bw()) +``` + +## Introduction + +`r article_req_pkgs(pkgs)` + +This article demonstrates an advanced example for training and tuning models for text data. Text data must be processed and transformed to a numeric representation to be ready for computation in modeling; in tidymodels, we use a recipe for this preprocessing. This article also shows how to extract information from each model fit during tuning to use later on. + + +## Text as data + +The text data we'll use in this article are from Amazon: + +> This dataset consists of reviews of fine foods from amazon. The data span a period of more than 10 years, including all ~500,000 reviews up to October 2012. Reviews include product and user information, ratings, and a plaintext review. + +This article uses a small subset of the total reviews [available at the original source](https://snap.stanford.edu/data/web-FineFoods.html). We sampled a single review from 5,000 random products and allocated 80% of these data to the training set, with the remaining 1,000 reviews held out for the test set. + +There is a column for the product, a column for the text of the review, and a factor column for the outcome variable. The outcome is whether the reviewer gave the product a five-star rating or not. + +```{r} +#| label: "data" +library(tidymodels) + +data("small_fine_foods") +training_data +``` + +Our modeling goal is to create modeling features from the text of the reviews to predict whether the review was five-star or not. + +## Inputs for the search + +Text, perhaps more so than tabular data we often deal with, must be heavily processed to be used as predictor data for modeling. There are multiple ways to process and prepare text for modeling; let's add several steps together to create different kinds of features: + +* Create an initial set of count-based features, such as the number of words, spaces, lower- or uppercase characters, URLs, and so on; we can use the [textfeatures](https://github.com/mkearney/textfeatures) package for this. + +* [Tokenize](https://smltar.com/tokenization.html) the text (i.e. break the text into smaller components such as words). + +* Remove stop words such as "the", "an", "of", etc. + +* [Stem](https://smltar.com/stemming.html) tokens to a common root where possible. + +* Convert tokens to dummy variables via a [signed, binary hash function](https://bookdown.org/max/FES/encoding-predictors-with-many-categories.html). + +* Optionally transform non-token features (the count-based features like number of lowercase characters) to a more symmetric state using a [Yeo-Johnson transformation](https://bookdown.org/max/FES/numeric-one-to-one.html). + +* Remove predictors with a single distinct value. + +* Center and scale all predictors. + + +::: {.callout-note} + We will end up with two kinds of features: + +- dummy/indicator variables for the count-based features like number of digits or punctuation characters +- hash features for the tokens like "salsa" or "delicious". +::: + +Some of these preprocessing steps (such as stemming) may or may not be good ideas but a full discussion of their effects is beyond the scope of this article. In this preprocessing approach, the main tuning parameter is the number of hashing features to use. + +Before we start building our preprocessing recipe, we need some helper objects. For example, for the Yeo-Johnson transformation, we need to know the set of count-based text features: + +```{r} +#| label: "feat-list" +library(textfeatures) + +basics <- names(textfeatures:::count_functions) +head(basics) +``` + +Also, the implementation of feature hashes does not produce the binary values we need. This small function will help convert the scores to values of -1, 0, or 1: + +```{r} +#| label: "hash-func" +binary_hash <- function(x) { + x <- ifelse(x < 0, -1, x) + x <- ifelse(x > 0, 1, x) + x +} +``` + +Now, let's put this all together in one recipe: + +```{r} +#| label: "text-rec" +library(textrecipes) + +pre_proc <- + recipe(score ~ product + review, data = training_data) %>% + # Do not use the product ID as a predictor + update_role(product, new_role = "id") %>% + # Make a copy of the raw text + step_mutate(review_raw = review) %>% + # Compute the initial features. This removes the `review_raw` column + step_textfeature(review_raw) %>% + # Make the feature names shorter + step_rename_at( + starts_with("textfeature_"), + fn = ~ gsub("textfeature_review_raw_", "", .) + ) %>% + step_tokenize(review) %>% + step_stopwords(review) %>% + step_stem(review) %>% + # Here is where the tuning parameter is declared + step_texthash(review, signed = TRUE, num_terms = tune()) %>% + # Simplify these names + step_rename_at(starts_with("review_hash"), fn = ~ gsub("review_", "", .)) %>% + # Convert the features from counts to values of -1, 0, or 1 + step_mutate_at(starts_with("hash"), fn = binary_hash) %>% + # Transform the initial feature set + step_YeoJohnson(one_of(!!basics)) %>% + step_zv(all_predictors()) %>% + step_normalize(all_predictors()) +``` + +::: {.callout-warning} + Note that, when objects from the global environment are used, they are injected into the step objects via `!!`. For some parallel processing technologies, these objects may not be found by the worker processes. +::: + +The preprocessing recipe is long and complex (often typical for working with text data) but the model we'll use is more straightforward. Let's stick with a regularized logistic regression model: + +```{r} +#| label: "lr" +lr_mod <- + logistic_reg(penalty = tune(), mixture = tune()) %>% + set_engine("glmnet") +``` + +There are three tuning parameters for this data analysis: + +- `num_terms`, the number of feature hash variables to create +- `penalty`, the amount of regularization for the model +- `mixture`, the proportion of L1 regularization + +## Resampling + +There are enough data here so that 10-fold resampling would hold out 400 reviews at a time to estimate performance. Performance estimates using this many observations have sufficiently low noise to measure and tune models. + +```{r} +#| label: "folds" +set.seed(8935) +folds <- vfold_cv(training_data) +folds +``` + +## Grid search + +Let's begin our tuning with [grid search](https://www.tidymodels.org/learn/work/tune-svm/) and a regular grid. For glmnet models, evaluating penalty values is fairly cheap because of the use of the ["submodel-trick"](https://tune.tidymodels.org/articles/extras/optimizations.html#sub-model-speed-ups-1). The grid will use 20 penalty values, 5 mixture values, and 3 values for the number of hash features. + +```{r} +#| label: "grid" +five_star_grid <- + crossing( + penalty = 10^seq(-3, 0, length = 20), + mixture = c(0.01, 0.25, 0.50, 0.75, 1), + num_terms = 2^c(8, 10, 12) + ) +five_star_grid +``` + +Note that, for each resample, the (computationally expensive) text preprocessing recipe is only prepped 6 times. This increases the efficiency of the analysis by avoiding redundant work. + +Let's save information on the number of predictors by penalty value for each glmnet model. This can help us understand how many features were used across the penalty values. Use an extraction function to do this: + +```{r} +#| label: "extract" +glmnet_vars <- function(x) { + # `x` will be a workflow object + mod <- extract_model(x) + # `df` is the number of model terms for each penalty value + tibble(penalty = mod$lambda, num_vars = mod$df) +} + +ctrl <- control_grid(extract = glmnet_vars, verbose = TRUE) +``` + +Finally, let's run the grid search: + +```{r} +#| label: "grid-search" +#| message: false +roc_scores <- metric_set(roc_auc) + +set.seed(1559) +five_star_glmnet <- + tune_grid( + lr_mod, + pre_proc, + resamples = folds, + grid = five_star_grid, + metrics = roc_scores, + control = ctrl + ) + +five_star_glmnet +``` + +This took a while to complete! What do the results look like? Let's get the resampling estimates of the area under the ROC curve for each tuning parameter: + +```{r} +#| label: "grid-roc" +grid_roc <- + collect_metrics(five_star_glmnet) %>% + arrange(desc(mean)) +grid_roc +``` + +The best results have a fairly high penalty value and focus on the ridge penalty (i.e. no feature selection via the lasso's L1 penalty). The best solutions also use the largest number of hashing features. + +What is the relationship between performance and the tuning parameters? + +```{r} +#| label: "grid-plot" +#| fig-width: 10 +autoplot(five_star_glmnet, metric = "roc_auc") +``` + +- We can definitely see that performance improves with the number of features included. In this article, we've used a small sample of the overall data set available. When more data are used, an even larger feature set is optimal. + +- The profiles with larger mixture values (greater than 0.01) have steep drop-offs in performance. What's that about? Those are cases where the lasso penalty is removing too many (and perhaps all) features from the model. +- The panel with at least 4096 features shows that there are several parameter combinations that have about the same performance; there isn't much difference between the best performance for the different mixture values. A case could be made that we should choose a _larger_ mixture value and a _smaller_ penalty to select a simpler model that contains fewer predictors. + +- If more experimentation were conducted, a larger set of features (more than 4096) should also be considered. + +We'll come back to the extracted glmnet components at the end of this article. + +## Directed search + +What if we had started with Bayesian optimization? Would a good set of conditions have been found more efficiently? + +Let's pretend that we haven't seen the grid search results. We'll initialize the Gaussian process model with five tuning parameter combinations chosen with a space-filling design. + +It might be good to use a custom `dials` object for the number of hash terms. The default object, `num_terms()`, uses a linear range and tries to set the upper bound of the parameter using the data. Instead, let's create a parameter set, change the scale to be `log2`, and define the same range as was used in grid search. + +```{r} +#| label: "hash-range" +hash_range <- num_terms(c(8, 12), trans = log2_trans()) +hash_range +``` + +To use this, we have to merge the recipe and `parsnip` model object into a workflow: + +```{r} +#| label: "wflow" +five_star_wflow <- + workflow() %>% + add_recipe(pre_proc) %>% + add_model(lr_mod) +``` + +Then we can extract and manipulate the corresponding parameter set: + +```{r} +#| label: "search-set" +five_star_set <- + five_star_wflow %>% + parameters() %>% + update( + num_terms = hash_range, + penalty = penalty(c(-3, 0)), + mixture = mixture(c(0.05, 1.00)) + ) +``` + +This is passed to the search function via the `param_info` argument. + +The initial rounds of search can be biased more towards exploration of the parameter space (as opposed to staying near the current best results). If expected improvement is used as the acquisition function, the trade-off value can be slowly moved from exploration to exploitation over iterations (see the tune vignette on [acquisition functions](https://tune.tidymodels.org/articles/acquisition_functions.html) for more details). The tune package has a built-in function called `expo_decay()` that can help accomplish this: + +```{r} +#| label: "decay" +trade_off_decay <- function(iter) { + expo_decay(iter, start_val = .01, limit_val = 0, slope = 1/4) +} +``` + +Using these values, let's run the search: + +```{r} +#| label: "search" +set.seed(12) +five_star_search <- + tune_bayes( + five_star_wflow, + resamples = folds, + param_info = five_star_set, + initial = 5, + iter = 30, + metrics = roc_scores, + objective = exp_improve(trade_off_decay), + control = control_bayes(verbose_iter = TRUE) + ) + +five_star_search +``` + +These results show some improvement over the initial set. One issue is that so many settings are sub-optimal (as shown in the plot above for grid search) so there are poor results periodically. There are regions where the penalty parameter becomes too large and all of the predictors are removed from the model. These regions are also dependent on the number of terms. There is a fairly narrow ridge (sorry, pun intended!) where good performance can be achieved. Using more iterations would probably result in the search finding better results. +Let's look at a plot of model performance versus the search iterations: + +```{r} +#| label: "iter-plot" +autoplot(five_star_search, type = "performance") +``` + +::: {.callout-note} +What would we do if we knew about the grid search results and wanted to try directed, iterative search? We would restrict the range for the number of hash features to be larger (especially with more data). We might also restrict the penalty and mixture parameters to have a lower upper bound. +::: + +## Extracted results + +Let's return to the grid search results and examine the results of our `extract` function. For each _fitted model_, a tibble was saved that contains the relationship between the number of predictors and the penalty value. Let's look at these results for the best model: + +```{r} +#| label: "best-res" +params <- select_best(five_star_glmnet, metric = "roc_auc") +params +``` + +Recall that we saved the glmnet results in a tibble. The column `five_star_glmnet$.extracts` is a list of tibbles. As an example, the first element of the list is: + +```{r} +#| label: "first-elem" +five_star_glmnet$.extracts[[1]] +``` + +More nested tibbles! Let's `unnest()` the `five_star_glmnet$.extracts` column: + +```{r} +#| label: "unnest" +library(tidyr) +extracted <- + five_star_glmnet %>% + dplyr::select(id, .extracts) %>% + unnest(cols = .extracts) +extracted +``` + +One thing to realize here is that `tune_grid()` [may not fit all of the models](https://tune.tidymodels.org/articles/extras/optimizations.html) that are evaluated. In this case, for each value of `mixture` and `num_terms`, the model is fit over _all_ penalty values (this is a feature of this particular model and is not generally true for other engines). To select the best parameter set, we can exclude the `penalty` column in `extracted`: + + +```{r} +#| label: "select-best" +extracted <- + extracted %>% + dplyr::select(-penalty) %>% + inner_join(params, by = c("num_terms", "mixture")) %>% + # Now remove it from the final results + dplyr::select(-penalty) +extracted +``` + +Now we can get at the results that we want using another `unnest()`: + +```{r} +#| label: "final-unnest" +extracted <- + extracted %>% + unnest(col = .extracts) # <- these contain a `penalty` column +extracted +``` + +Let's look at a plot of these results (per resample): + +```{r} +#| label: "var-plot" +ggplot(extracted, aes(x = penalty, y = num_vars)) + + geom_line(aes(group = id, col = id), alpha = .5) + + ylab("Number of retained predictors") + + scale_x_log10() + + ggtitle(paste("mixture = ", params$mixture, "and", params$num_terms, "features")) + + theme(legend.position = "none") +``` + +These results might help guide the choice of the `penalty` range if more optimization was conducted. + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/make_function_lists.R b/make_function_lists.R new file mode 100644 index 00000000..6f5f808c --- /dev/null +++ b/make_function_lists.R @@ -0,0 +1,257 @@ +# ------------------------------------------------------------------------------ +# Make data sets for function reference searches. Run this offline to refresh +# data objects. + +library(tidymodels) +library(glue) +library(utils) +library(revdepcheck) +library(fs) +library(pkgdown) +library(urlchecker) + +# ------------------------------------------------------------------------------ + +tidymodels_prefer() +theme_set(theme_bw()) +options(pillar.advice = FALSE, pillar.min_title_chars = Inf) + +# ------------------------------------------------------------------------------ +# Use the pkgdown package to parse the source files and put them into a usable format + +# TODO find a better way to figure out how to find the true "check_" recipe operations +# from just the source files + +get_pkg_info <- function(pkg, pth = tempdir(), keep_internal = FALSE, pattern = NULL) { + src_file <- + download.packages(pkg, + destdir = pth, + repos = "https://cran.rstudio.com/", + quiet = TRUE) + if (nrow(src_file) != length(pkg)) { + return(NULL) + rlang::warn(glue::glue("package {pkg} was not downloaded")) + } + pkg_path <- fs::path(pth, pkg) + on.exit(fs::dir_delete(pkg_path)) + + untar_res <- purrr::map_int(src_file[, 2], untar, exdir = pth) + fs::file_delete(src_file[, 2]) + if (any(untar_res != 0)) { + rlang::abort(glue::glue("package {pkg} did not unpack correctly")) + } + pkg_info <- pkgdown::as_pkgdown(pkg_path) + res <- pkg_info$topics + if (!keep_internal) { + res <- dplyr::filter(res, !internal) + } + res <- + res %>% + dplyr::select(file_out, functions = alias, title) %>% + tidyr::unnest(functions) %>% + mutate(package = pkg, all_urls = list(pkg_info$desc$get_urls())) %>% + relocate(package, all_urls) + if (!is.null(pattern)) { + res <- dplyr::filter(res, grepl(pattern, functions)) + } + res +} + +# See if any of the urls appear to correspond to the _standard_ pkgdown structure. +# Is so, link to the specific pkgdown html package, otherwise link to the first +# url or, if there are none listed, the canonical CRAN page link. +# We use an internal function in urlchecker to essentially ping the potential url + +sort_out_urls <- function(x) { + test_urls <- + x %>% + group_by(package) %>% + slice(1) %>% + ungroup() %>% + unnest(all_urls) %>% + mutate( + URL = map_chr(all_urls, ~ glue("{.x[[1]]}/reference/index.html")), + URL = gsub("//", "/", URL, fixed = TRUE) + ) %>% + select(URL, Parent = functions, package, all_urls) + url_check_fails <- + urlchecker:::tools$check_url_db(test_urls) %>% + dplyr::select(URL) + pkgdown_urls <- + test_urls %>% + anti_join(url_check_fails, by = "URL") %>% + select(package, pkgdown_url = all_urls) %>% + group_by(package) %>% + slice(1) %>% + ungroup() + x %>% + left_join(pkgdown_urls, by = "package") %>% + mutate( + first_url = map_chr(all_urls, ~ .x[1]), + first_url = ifelse(is.na(first_url), + glue("https://cran.r-project.org/package={package}"), + first_url), + base_url = ifelse(is.na(pkgdown_url), + first_url, + pkgdown_url), + url = ifelse(!is.na(pkgdown_url), + glue("{pkgdown_url}/reference/{file_out}"), + base_url), + topic = glue("{functions}") + ) %>% + dplyr::select(title, functions, topic, package) %>% + mutate(package = as.factor(package)) %>% + filter(!grepl("deprecated", tolower(title))) %>% + arrange(tolower(gsub("[[:punct:]]", "", title))) +} + +# ------------------------------------------------------------------------------ + +broom_pkgs <- revdepcheck::cran_revdeps("broom", dependencies = c("Depends", "Imports")) +generics_pkgs <- revdepcheck::cran_revdeps("generics", dependencies = "Imports") + +broom_pkgs <- sort(unique(c(broom_pkgs, generics_pkgs))) +excl <- c("hydrorecipes", "healthcareai") +broom_pkgs <- broom_pkgs[!(broom_pkgs %in% excl)] + +broom_functions <- + map_dfr( + broom_pkgs, + get_pkg_info, + pattern = "(^tidy\\.)|(^glance\\.)|(^augment\\.)", + .progress = TRUE + ) %>% + sort_out_urls() %>% + select(-functions) + +save( + broom_functions, + file = "find/broom/broom_functions.RData", + compress = TRUE) + +# ------------------------------------------------------------------------------ + +recipe_pkgs <- revdepcheck::cran_revdeps("recipes", dependencies = c("Depends", "Imports")) + +recipe_pkgs <- sort(unique(c(recipe_pkgs))) +excl <- c("hydrorecipes", "healthcareai") +recipe_pkgs <- recipe_pkgs[!(recipe_pkgs %in% excl)] + + +recipe_functions <- + map_dfr( + recipe_pkgs, + get_pkg_info, + pattern = "^step_", + .progress = TRUE + ) %>% + sort_out_urls() %>% + select(-functions) + +save( + recipe_functions, + file = "find/recipes/recipe_functions.RData", + compress = TRUE) + +# ------------------------------------------------------------------------------ + +all_tm <- + c("agua", "applicable", "baguette", "brulee", "broom", "butcher", + "censored", "corrr", "dials", "discrim", "embed", "finetune", + "hardhat", "infer", "modeldata", "modeldb", + "modelenv", "multilevelmod", "parsnip", "plsmod", "poissonreg", + "probably", "recipes", "rsample", "rules", "shinymodels", "spatialsample", + "stacks", "textrecipes", "themis", "tidyclust", "tidymodels", + "tidyposterior", "tidypredict", "tune", "usemodels", "workflows", + "workflowsets", "yardstick") + + +tidymodels_functions <- + map_dfr( + all_tm, + get_pkg_info, + .progress = TRUE + ) %>% + sort_out_urls() %>% + filter(grepl("^\\.", functions)) %>% + select(-functions) + +save( + tidymodels_functions, + file = "find/all/tidymodels_functions.RData", + compress = TRUE) + +# ------------------------------------------------------------------------------ + +parsnip_pkgs <- revdepcheck::cran_revdeps("parsnip", dependencies = c("Depends", "Imports")) +parsnip_pkgs <- c(parsnip_pkgs, "parsnip") +# These ignore the tidymodels design principles and/or don't work with the broader ecosystem +# or we don't don't have any models in them +excl <- c("additive", "bayesian", "SSLR", "workflowsets", "workflows", "tune", + "tidymodels", "shinymodels", "stacks") +parsnip_pkgs <- parsnip_pkgs[!(parsnip_pkgs %in% excl)] + +# Load them then get the model data base +loaded <- map_lgl(parsnip_pkgs, ~ suppressPackageStartupMessages(require(.x, character.only = TRUE))) +table(loaded) + +# h2o overwrites soooo many functions; this may take a few minutes +conflicted::conflict_prefer_all("base", loser = "h2o", quiet = TRUE) + +model_list <- + map_dfr(get_from_env("models"), ~ get_from_env(.x) %>% mutate(model = .x)) %>% + mutate( + mode = factor(mode, levels = c("classification", "regression", "censored regression")) + ) %>% + group_nest(model, engine) %>% + mutate( + modes = map_chr(data, ~ paste0(sort(.x$mode), collapse = ", ")), + functions = glue("details_{model}_{engine}") + ) %>% + select(-data) + +parsnip_model_info <- + map_dfr( + parsnip_pkgs, + get_pkg_info, + keep_internal = TRUE, + .progress = TRUE + ) %>% + sort_out_urls() + +# Split model/engine combinations by whether they have "details" pages. Link to +# the details pages whenever possible. + +has_details <- + parsnip_model_info %>% + filter(grepl("^details_", functions)) %>% + inner_join(model_list, by = "functions") %>% + mutate(topic = gsub("details_", "", topic)) + +no_details <- + model_list %>% + anti_join(has_details %>% select(model, engine), by = c("model", "engine")) %>% + mutate(functions = model) %>% + inner_join(parsnip_model_info, by = "functions") + +parsnip_models <- + no_details %>% + select(title, model, engine, topic, modes, package) %>% + bind_rows( + has_details %>% + select(title, model, engine, topic, modes, package) + ) %>% + mutate( + model = paste0("", model, ""), + engine = paste0("", engine, ""), + title = gsub("General Interface for ", "", title) + ) %>% + arrange(model, engine) %>% + select(title, model, engine, topic, modes, package) + +save( + parsnip_models, + file = "find/parsnip/parsnip_models.RData", + compress = TRUE) + + diff --git a/netlify.toml b/netlify.toml deleted file mode 100644 index 0902e581..00000000 --- a/netlify.toml +++ /dev/null @@ -1,21 +0,0 @@ -[build] - publish = "public" - command = "hugo" - -[context.production.environment] - HUGO_VERSION = "0.96.0" - HUGO_ENV = "production" - HUGO_ENABLEGITINFO = "true" - -[context.branch-deploy.environment] - HUGO_VERSION = "0.96.0" - -[context.deploy-preview.environment] - HUGO_VERSION = "0.96.0" - -[context.deploy-preview] - command = "hugo -b $DEPLOY_PRIME_URL --buildFuture" - -[context.branch-deploy] - command = "hugo -b $DEPLOY_PRIME_URL --buildFuture" - diff --git a/packages/index.qmd b/packages/index.qmd new file mode 100644 index 00000000..e6782029 --- /dev/null +++ b/packages/index.qmd @@ -0,0 +1,136 @@ +--- +title: Tidymodels packages +toc: true +toc-depth: 1 +include-after-body: ../resources.html +--- + +## Installation and use + +* Install many of the packages in the tidymodels ecosystem by running `install.packages("tidymodels")`. + +* Run `library(tidymodels)` to load the core packages and make them available in your current R session. + +
    + + +
    +
    + +
    +

    tidymodels

    +

    tidymodels is a meta-package that installs and load the core packages listed below that you need for modeling and machine learning. +

    +
    +
    +
    + +
    +

    rsample

    +

    rsample provides infrastructure for efficient data splitting and resampling.

    +
    +
    +
    + +
    +

    parsnip

    +

    parsnip is a tidy, unified interface to models that can be used to try a range of models without getting bogged down in the syntactical minutiae of the underlying packages.

    +
    +
    +
    + +
    +

    recipes

    +

    recipes is a tidy interface to data pre-processing tools for feature engineering.

    +
    +
    +
    + +
    +

    workflows

    +

    workflows bundle your pre-processing, modeling, and post-processing together.

    +
    +
    +
    + +
    +

    tune

    +

    tune helps you optimize the hyperparameters of your model and pre-processing steps.

    +
    +
    +
    + +
    +

    yardstick

    +

    yardstick measures the effectiveness of models using performance metrics.

    +
    +
    +
    + +
    +

    broom

    +

    broom converts the information in common statistical R objects into user-friendly, predictable formats. +

    +
    +
    +
    + +
    +

    dials

    +

    dials creates and manages tuning parameters and parameter grids. +

    +
    +
    +
    +
    + +Learn more about the tidymodels metapackage itself at . + +## Specialized packages + +The tidymodels framework also includes many other packages designed for specialized data analysis and modeling tasks. They are not loaded automatically with `library(tidymodels)`, so you'll need to load each one with its own call to `library()`. These packages include: + +### [Perform statistical analysis](/learn/statistics/) + +* [infer](https://infer.tidymodels.org/) is a high-level API for tidyverse-friendly statistical inference. + +* The [corrr](https://corrr.tidymodels.org/) package has tidy interfaces for working with correlation matrices. + +### [Create robust models](/learn/models/) + +* The [spatialsample](http://spatialsample.tidymodels.org/) package provides resampling functions and classes like rsample, but specialized for spatial data. + +* parsnip also has additional packages that contain more model definitions. [discrim](https://discrim.tidymodels.org/) contains definitions for discriminant analysis models, [poissonreg](https://poissonreg.tidymodels.org/) provides definitions for Poisson regression models, [plsmod](https://plsmod.tidymodels.org/) enables linear projection models, and [rules](https://rules.tidymodels.org/) does the same for rule-based classification and regression models. [baguette](https://baguette.tidymodels.org/) creates ensemble models via bagging, and [multilevelmod](https://multilevelmod.tidymodels.org/) provides support for multilevel models (otherwise known as mixed models or hierarchical models). + +* There are several add-on packages for creating recipes. [embed](https://embed.tidymodels.org/) contains steps to create embeddings or projections of predictors. [textrecipes](https://textrecipes.tidymodels.org/) has extra steps for text processing, and [themis](https://themis.tidymodels.org/) can help alleviate class imbalance using sampling methods. + +* [tidypredict](https://tidypredict.tidymodels.org/) and [modeldb](https://modeldb.tidymodels.org/) can convert prediction equations to different languages (e.g. SQL) and fit some models in-database. + +### [Tune, compare, and work with your models](/learn/work/) + +* To try out multiple different workflows (i.e. bundles of pre-processor and model) at once, [workflowsets](https://workflowsets.tidymodels.org/) lets you create sets of workflow objects for tuning and resampling. + +* To integrate predictions from many models, the [stacks](https://stacks.tidymodels.org/) package provides tools for stacked ensemble modeling. + +* The [finetune](https://finetune.tidymodels.org/) package extends the tune package with more approaches such as racing and simulated annealing. + +* The [usemodels](https://usemodels.tidymodels.org/) package creates templates and automatically generates code to fit and tune models. + +* [probably](https://probably.tidymodels.org/) has tools for post-processing class probability estimates. + +* The [tidyposterior](https://tidyposterior.tidymodels.org/) package enables users to make formal statistical comparisons between models using resampling and Bayesian methods. + +* Some R objects become inconveniently large when saved to disk. The [butcher](https://butcher.tidymodels.org/) package can reduce the size of those objects by removing the sub-components. + +* To know whether the data that you are predicting are _extrapolations_ from the training set, [applicable](https://applicable.tidymodels.org/) can produce metrics that measure extrapolation. + +* [shinymodels](https://shinymodels.tidymodels.org/) lets you explore tuning or resampling results via a Shiny app. + +### [Develop custom modeling tools](/learn/develop/) + +* [hardhat](https://hardhat.tidymodels.org/) is a _developer-focused_ package that helps beginners create high-quality R packages for modeling. diff --git a/resources.html b/resources.html new file mode 100644 index 00000000..54a16fea --- /dev/null +++ b/resources.html @@ -0,0 +1,27 @@ +
    +
    Resources
    +
    +
    +   Find +
    +
    Explore searchable tables of all tidymodels packages and functions.
    +
    +
    +
    +   Books +
    +
    Study up on statistics and modeling with our comprehensive books.
    +
    +
    +
    +   News +
    +
    Hear the latest about tidymodels packages at the tidyverse blog.
    +
    +
    + + diff --git a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_150x150_fill_q75_box_smart1.jpg b/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_150x150_fill_q75_box_smart1.jpg deleted file mode 100644 index 57fbb027..00000000 Binary files a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_150x150_fill_q75_box_smart1.jpg and /dev/null differ diff --git a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_150x150_fit_q75_box.jpg b/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_150x150_fit_q75_box.jpg deleted file mode 100644 index 96a886b1..00000000 Binary files a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_150x150_fit_q75_box.jpg and /dev/null differ diff --git a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_300x300_fill_q75_box_smart1.jpg b/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_300x300_fill_q75_box_smart1.jpg deleted file mode 100644 index e3aa9a8a..00000000 Binary files a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_300x300_fill_q75_box_smart1.jpg and /dev/null differ diff --git a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_300x300_fill_q75_box_top.jpg b/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_300x300_fill_q75_box_top.jpg deleted file mode 100644 index 465c26dd..00000000 Binary files a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_300x300_fill_q75_box_top.jpg and /dev/null differ diff --git a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_500x500_fill_q75_box_center.jpg b/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_500x500_fill_q75_box_center.jpg deleted file mode 100644 index 407d7bb8..00000000 Binary files a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_500x500_fill_q75_box_center.jpg and /dev/null differ diff --git a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_500x500_fill_q75_box_top.jpg b/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_500x500_fill_q75_box_top.jpg deleted file mode 100644 index c700c11a..00000000 Binary files a/resources/_gen/images/books/fes/cover_hu3326d100d263260914699e43b6fa4551_11841_500x500_fill_q75_box_top.jpg and /dev/null differ diff --git a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_150x150_fill_box_smart1_2.png b/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_150x150_fill_box_smart1_2.png deleted file mode 100644 index 45f7e8fe..00000000 Binary files a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_150x150_fill_box_smart1_2.png and /dev/null differ diff --git a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_150x150_fit_box_2.png b/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_150x150_fit_box_2.png deleted file mode 100644 index 06b3e182..00000000 Binary files a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_150x150_fit_box_2.png and /dev/null differ diff --git a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_300x300_fill_box_smart1_2.png b/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_300x300_fill_box_smart1_2.png deleted file mode 100644 index ce53f836..00000000 Binary files a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_300x300_fill_box_smart1_2.png and /dev/null differ diff --git a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_300x300_fill_box_top_2.png b/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_300x300_fill_box_top_2.png deleted file mode 100644 index 440f5ac3..00000000 Binary files a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_300x300_fill_box_top_2.png and /dev/null differ diff --git a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_center_2.png b/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_center_2.png deleted file mode 100644 index 71f8e43e..00000000 Binary files a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_center_2.png and /dev/null differ diff --git a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_top_2.png b/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_top_2.png deleted file mode 100644 index 4487b09e..00000000 Binary files a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_top_2.png and /dev/null differ diff --git a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_top_3.png b/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_top_3.png deleted file mode 100644 index 4487b09e..00000000 Binary files a/resources/_gen/images/books/moderndive/cover_hu18db51943d4fe6a133c464dc26b2dcd7_108553_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/smltar/cover_hu78ba12b063b8f23aca2700d6525fd775_47957_500x500_fill_q75_box_top.jpg b/resources/_gen/images/books/smltar/cover_hu78ba12b063b8f23aca2700d6525fd775_47957_500x500_fill_q75_box_top.jpg deleted file mode 100644 index ea151cc8..00000000 Binary files a/resources/_gen/images/books/smltar/cover_hu78ba12b063b8f23aca2700d6525fd775_47957_500x500_fill_q75_box_top.jpg and /dev/null differ diff --git a/resources/_gen/images/books/smltar/cover_hub976e5c6de164a699cbf934c19f6130b_8580_500x500_fill_box_top_2.png b/resources/_gen/images/books/smltar/cover_hub976e5c6de164a699cbf934c19f6130b_8580_500x500_fill_box_top_2.png deleted file mode 100644 index 73177f00..00000000 Binary files a/resources/_gen/images/books/smltar/cover_hub976e5c6de164a699cbf934c19f6130b_8580_500x500_fill_box_top_2.png and /dev/null differ diff --git a/resources/_gen/images/books/smltar/cover_hub976e5c6de164a699cbf934c19f6130b_8580_500x500_fill_box_top_3.png b/resources/_gen/images/books/smltar/cover_hub976e5c6de164a699cbf934c19f6130b_8580_500x500_fill_box_top_3.png deleted file mode 100644 index 73177f00..00000000 Binary files a/resources/_gen/images/books/smltar/cover_hub976e5c6de164a699cbf934c19f6130b_8580_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_150x150_fill_box_smart1_2.png b/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_150x150_fill_box_smart1_2.png deleted file mode 100644 index 26439708..00000000 Binary files a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_150x150_fill_box_smart1_2.png and /dev/null differ diff --git a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_150x150_fit_box_2.png b/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_150x150_fit_box_2.png deleted file mode 100644 index 10ed7e7a..00000000 Binary files a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_150x150_fit_box_2.png and /dev/null differ diff --git a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_300x300_fill_box_smart1_2.png b/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_300x300_fill_box_smart1_2.png deleted file mode 100644 index 93b90d74..00000000 Binary files a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_300x300_fill_box_smart1_2.png and /dev/null differ diff --git a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_300x300_fill_box_top_2.png b/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_300x300_fill_box_top_2.png deleted file mode 100644 index 3c3df13b..00000000 Binary files a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_300x300_fill_box_top_2.png and /dev/null differ diff --git a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_center_2.png b/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_center_2.png deleted file mode 100644 index 43d80bd9..00000000 Binary files a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_center_2.png and /dev/null differ diff --git a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_top_2.png b/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_top_2.png deleted file mode 100644 index 67860678..00000000 Binary files a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_top_2.png and /dev/null differ diff --git a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_top_3.png b/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_top_3.png deleted file mode 100644 index 3b63e75c..00000000 Binary files a/resources/_gen/images/books/tidytext/cover_huddc6c3235d904191d21f4e5f7e097c21_88341_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/tmwr/cover_hu0f2672b9420a6b581c38962b08745838_178729_500x500_fill_box_top_3.png b/resources/_gen/images/books/tmwr/cover_hu0f2672b9420a6b581c38962b08745838_178729_500x500_fill_box_top_3.png deleted file mode 100644 index ff4f249e..00000000 Binary files a/resources/_gen/images/books/tmwr/cover_hu0f2672b9420a6b581c38962b08745838_178729_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/tmwr/cover_hu1be595f2b9a42191080f50a90ae1e59c_116699_500x500_fill_q75_box_top.jpg b/resources/_gen/images/books/tmwr/cover_hu1be595f2b9a42191080f50a90ae1e59c_116699_500x500_fill_q75_box_top.jpg deleted file mode 100644 index b200e488..00000000 Binary files a/resources/_gen/images/books/tmwr/cover_hu1be595f2b9a42191080f50a90ae1e59c_116699_500x500_fill_q75_box_top.jpg and /dev/null differ diff --git a/resources/_gen/images/books/tmwr/cover_hu1f9c069fa4003c768d62d7d9f59935c1_2351_500x500_fill_box_top_3.png b/resources/_gen/images/books/tmwr/cover_hu1f9c069fa4003c768d62d7d9f59935c1_2351_500x500_fill_box_top_3.png deleted file mode 100644 index decf698c..00000000 Binary files a/resources/_gen/images/books/tmwr/cover_hu1f9c069fa4003c768d62d7d9f59935c1_2351_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/tmwr/cover_hu23e99dedaa40f2273e8f7ee8a663c02f_246939_500x500_fill_box_top_3.png b/resources/_gen/images/books/tmwr/cover_hu23e99dedaa40f2273e8f7ee8a663c02f_246939_500x500_fill_box_top_3.png deleted file mode 100644 index bd97387a..00000000 Binary files a/resources/_gen/images/books/tmwr/cover_hu23e99dedaa40f2273e8f7ee8a663c02f_246939_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/tmwr/cover_hu3850665c946d86f10f9de3689b9095ad_32768_500x500_fill_box_top_3.png b/resources/_gen/images/books/tmwr/cover_hu3850665c946d86f10f9de3689b9095ad_32768_500x500_fill_box_top_3.png deleted file mode 100644 index e69de29b..00000000 diff --git a/resources/_gen/images/books/tmwr/cover_hu3850665c946d86f10f9de3689b9095ad_606844_500x500_fill_box_top_3.png b/resources/_gen/images/books/tmwr/cover_hu3850665c946d86f10f9de3689b9095ad_606844_500x500_fill_box_top_3.png deleted file mode 100644 index b5ffc8a4..00000000 Binary files a/resources/_gen/images/books/tmwr/cover_hu3850665c946d86f10f9de3689b9095ad_606844_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/tmwr/cover_hu3b2a1b3dbb7e8e261d9cf8b1596f63f0_2343156_500x500_fill_box_top_3.png b/resources/_gen/images/books/tmwr/cover_hu3b2a1b3dbb7e8e261d9cf8b1596f63f0_2343156_500x500_fill_box_top_3.png deleted file mode 100644 index e944dad8..00000000 Binary files a/resources/_gen/images/books/tmwr/cover_hu3b2a1b3dbb7e8e261d9cf8b1596f63f0_2343156_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/tmwr/cover_hu4887ab403897df9972c1c51f39593b50_122242_500x500_fill_box_top_3.png b/resources/_gen/images/books/tmwr/cover_hu4887ab403897df9972c1c51f39593b50_122242_500x500_fill_box_top_3.png deleted file mode 100644 index 5688e5bf..00000000 Binary files a/resources/_gen/images/books/tmwr/cover_hu4887ab403897df9972c1c51f39593b50_122242_500x500_fill_box_top_3.png and /dev/null differ diff --git a/resources/_gen/images/books/tmwr/cover_hub21bd02bbe2eabcd4b78cc19ae857e8e_40822_500x500_fill_box_top_3.png b/resources/_gen/images/books/tmwr/cover_hub21bd02bbe2eabcd4b78cc19ae857e8e_40822_500x500_fill_box_top_3.png deleted file mode 100644 index 917e3d9c..00000000 Binary files a/resources/_gen/images/books/tmwr/cover_hub21bd02bbe2eabcd4b78cc19ae857e8e_40822_500x500_fill_box_top_3.png and /dev/null differ diff --git a/start/case-study/figs/logistic-results-1.svg b/start/case-study/figs/logistic-results-1.svg new file mode 100644 index 00000000..80966d9e --- /dev/null +++ b/start/case-study/figs/logistic-results-1.svg @@ -0,0 +1,101 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.5 +0.6 +0.7 +0.8 + + + + + + + + +0.0001 +0.0010 +0.0100 +0.1000 +penalty +Area under the ROC Curve + + diff --git a/start/case-study/figs/logistic-roc-curve-1.svg b/start/case-study/figs/logistic-roc-curve-1.svg new file mode 100644 index 00000000..8a52ebeb --- /dev/null +++ b/start/case-study/figs/logistic-roc-curve-1.svg @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +1 - specificity +sensitivity + + diff --git a/start/case-study/figs/lr-plot-lines-1.svg b/start/case-study/figs/lr-plot-lines-1.svg new file mode 100644 index 00000000..dcc3c5e8 --- /dev/null +++ b/start/case-study/figs/lr-plot-lines-1.svg @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.5 +0.6 +0.7 +0.8 + + + + + + + + +0.0001 +0.0010 +0.0100 +0.1000 +penalty +Area under the ROC Curve + + diff --git a/start/case-study/figs/rf-importance-1.svg b/start/case-study/figs/rf-importance-1.svg new file mode 100644 index 00000000..758cccf0 --- /dev/null +++ b/start/case-study/figs/rf-importance-1.svg @@ -0,0 +1,137 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +is_repeated_guest +required_car_parking_spaces +customer_type +distribution_channel +meal +stays_in_weekend_nights +arrival_date_year +hotel +market_segment +booking_changes +total_of_special_requests +arrival_date_dow +stays_in_week_nights +adults +country +arrival_date_month +assigned_room_type +lead_time +reserved_room_type +average_daily_rate + + + + + + + + + + + + + + + + + + + + + + + + + +0 +200 +400 +600 +800 +Importance + + diff --git a/start/case-study/figs/rf-lr-roc-curve-1.svg b/start/case-study/figs/rf-lr-roc-curve-1.svg new file mode 100644 index 00000000..bc54f903 --- /dev/null +++ b/start/case-study/figs/rf-lr-roc-curve-1.svg @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +1 - specificity +sensitivity + +model + + + + +Logistic Regression +Random Forest + + diff --git a/start/case-study/figs/rf-results-1.svg b/start/case-study/figs/rf-results-1.svg new file mode 100644 index 00000000..c0ee0b9b --- /dev/null +++ b/start/case-study/figs/rf-results-1.svg @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# Randomly Selected Predictors + + + + + + + + + + +Minimal Node Size + + + + + +0 +10 +20 + + + + +10 +20 +30 +40 +0.87 +0.88 +0.89 +0.90 +0.91 +0.92 + + + + + + +roc_auc + + diff --git a/start/case-study/figs/test-set-roc-curve-1.svg b/start/case-study/figs/test-set-roc-curve-1.svg new file mode 100644 index 00000000..4bc73f63 --- /dev/null +++ b/start/case-study/figs/test-set-roc-curve-1.svg @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +1 - specificity +sensitivity + + diff --git a/content/start/case-study/hotels.csv b/start/case-study/hotels.csv similarity index 100% rename from content/start/case-study/hotels.csv rename to start/case-study/hotels.csv diff --git a/start/case-study/img/hotel.jpg b/start/case-study/img/hotel.jpg new file mode 100644 index 00000000..9facc454 Binary files /dev/null and b/start/case-study/img/hotel.jpg differ diff --git a/start/case-study/img/validation-split.svg b/start/case-study/img/validation-split.svg new file mode 100644 index 00000000..b22dfe03 --- /dev/null +++ b/start/case-study/img/validation-split.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + Produced by OmniGraffle 7.11.3 + 2020-03-23 01:54:23 +0000 + + + Canvas 1 + + Layer 1 + + + + + All Data + + + + + + + Not + Testing + + + + + + + Testing + + + + + + + + + + + + + Validation + + + + + + + Training + + + + + + + Partition + + + + + + + + + + + + + + diff --git a/start/case-study/index.qmd b/start/case-study/index.qmd new file mode 100644 index 00000000..a9af16c1 --- /dev/null +++ b/start/case-study/index.qmd @@ -0,0 +1,572 @@ +--- +title: "A predictive modeling case study" +weight: 5 +categories: + - model fitting + - tuning + - parsnip + - recipes + - rsample + - workflows + - tune +description: | + Develop, from beginning to end, a predictive model using best practices. +toc-location: body +toc-depth: 2 +toc-title: "" +css: ../styles.css +include-after-body: ../repo-actions-delete.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(readr) +library(vip) +library(tidymodels) +pkgs <- c("tidymodels", "readr", "glmnet", "ranger", "vip") +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction {#intro} + +Each of the four previous [*Get Started*](/start/) articles has focused on a single task related to modeling. Along the way, we also introduced core packages in the tidymodels ecosystem and some of the key functions you'll need to start working with models. In this final case study, we will use all of the previous articles as a foundation to build a predictive model from beginning to end with data on hotel stays. + +```{r} +#| echo: false +#| out-width: "90%" +knitr::include_graphics("img/hotel.jpg") +``` + +`r article_req_pkgs(pkgs)` + +```{r} +#| eval: false +library(tidymodels) + +# Helper packages +library(readr) # for importing data +library(vip) # for variable importance plots +``` + +{{< test-drive url="https://rstudio.cloud/project/2674862" >}} + +## The Hotel Bookings Data {#data} + +Let's use hotel bookings data from [Antonio, Almeida, and Nunes (2019)](https://doi.org/10.1016/j.dib.2018.11.126) to predict which hotel stays included children and/or babies, based on the other characteristics of the stays such as which hotel the guests stay at, how much they pay, etc. This was also a [`#TidyTuesday`](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11) dataset with a [data dictionary](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-02-11#data-dictionary) you may want to look over to learn more about the variables. We'll use a slightly [edited version of the dataset](https://gist.github.com/topepo/05a74916c343e57a71c51d6bc32a21ce) for this case study. + +To start, let's read our hotel data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (""): + +```{r} +#| label: "hotel-import" +#| message: false +library(tidymodels) +library(readr) + +hotels <- + read_csv('https://tidymodels.org/start/case-study/hotels.csv') %>% + mutate(across(where(is.character), as.factor)) + +dim(hotels) +``` + +In the original paper, the [authors](https://doi.org/10.1016/j.dib.2018.11.126) caution that the distribution of many variables (such as number of adults/children, room type, meals bought, country of origin of the guests, and so forth) is different for hotel stays that were canceled versus not canceled. This makes sense because much of that information is gathered (or gathered again more accurately) when guests check in for their stay, so canceled bookings are likely to have more missing data than non-canceled bookings, and/or to have different characteristics when data is not missing. Given this, it is unlikely that we can reliably detect meaningful differences between guests who cancel their bookings and those who do not with this dataset. To build our models here, we have already filtered the data to include only the bookings that did not cancel, so we'll be analyzing *hotel stays* only. + +```{r} +#| label: "glimpse-hotels" +glimpse(hotels) +``` + +We will build a model to predict which actual hotel stays included children and/or babies, and which did not. Our outcome variable `children` is a factor variable with two levels: + +```{r} +#| label: "count-children" +hotels %>% + count(children) %>% + mutate(prop = n/sum(n)) +``` + +We can see that children were only in `r round(mean(hotels$children == "children") * 100, 1)`% of the reservations. This type of class imbalance can often wreak havoc on an analysis. While there are several methods for combating this issue using [recipes](/find/recipes/) (search for steps to `upsample` or `downsample`) or other more specialized packages like [themis](https://themis.tidymodels.org/), the analyses shown below analyze the data as-is. + +## Data Splitting & Resampling {#data-split} + +For a data splitting strategy, let's reserve 25% of the stays to the test set. As in our [*Evaluate your model with resampling*](/start/resampling/#data-split) article, we know our outcome variable `children` is pretty imbalanced so we'll use a stratified random sample: + +```{r} +#| label: "tr-te-split" +set.seed(123) +splits <- initial_split(hotels, strata = children) + +hotel_other <- training(splits) +hotel_test <- testing(splits) + +# training set proportions by children +hotel_other %>% + count(children) %>% + mutate(prop = n/sum(n)) + +# test set proportions by children +hotel_test %>% + count(children) %>% + mutate(prop = n/sum(n)) +``` + +In our articles so far, we've relied on 10-fold cross-validation as the primary resampling method using [`rsample::vfold_cv()`](https://rsample.tidymodels.org/reference/vfold_cv.html). This has created 10 different resamples of the training set (which we further split into *analysis* and *assessment* sets), producing 10 different performance metrics that we then aggregated. + +For this case study, rather than using multiple iterations of resampling, let's create a single resample called a *validation set*. In tidymodels, a validation set is treated as a single iteration of resampling. This will be a split from the `r format(nrow(hotel_other), big.mark = ",")` stays that were not used for testing, which we called `hotel_other`. This split creates two new datasets: + +- the set held out for the purpose of measuring performance, called the *validation set*, and + +- the remaining data used to fit the model, called the *training set*. + +```{r} +#| label: "validation-fig" +#| echo: false +#| fig-align: center +#| out-width: "50%" +knitr::include_graphics("img/validation-split.svg") +``` + +We'll use the `validation_split()` function to allocate 20% of the `hotel_other` stays to the *validation set* and `r format(nrow(hotel_other) * .8, big.mark = ",")` stays to the *training set*. This means that our model performance metrics will be computed on a single set of `r format(nrow(hotel_other) * .2, big.mark = ",")` hotel stays. This is fairly large, so the amount of data should provide enough precision to be a reliable indicator for how well each model predicts the outcome with a single iteration of resampling. + +```{r} +#| label: "validation-set" +set.seed(234) +val_set <- validation_split(hotel_other, + strata = children, + prop = 0.80) +val_set +``` + +This function, like `initial_split()`, has the same `strata` argument, which uses stratified sampling to create the resample. This means that we'll have roughly the same proportions of hotel stays with and without children in our new validation and training sets, as compared to the original `hotel_other` proportions. + +## A first model: penalized logistic regression {#first-model} + +Since our outcome variable `children` is categorical, logistic regression would be a good first model to start. Let's use a model that can perform feature selection during training. The [glmnet](https://cran.r-project.org/web/packages/glmnet/index.html) R package fits a generalized linear model via penalized maximum likelihood. This method of estimating the logistic regression slope parameters uses a *penalty* on the process so that less relevant predictors are driven towards a value of zero. One of the glmnet penalization methods, called the [lasso method](https://en.wikipedia.org/wiki/Lasso_(statistics)), can actually set the predictor slopes to zero if a large enough penalty is used. + +### Build the model + +To specify a penalized logistic regression model that uses a feature selection penalty, let's use the parsnip package with the [glmnet engine](/find/parsnip/): + +```{r} +#| label: "logistic-model" +lr_mod <- + logistic_reg(penalty = tune(), mixture = 1) %>% + set_engine("glmnet") +``` + +We'll set the `penalty` argument to `tune()` as a placeholder for now. This is a model hyperparameter that we will [tune](/start/tuning/) to find the best value for making predictions with our data. Setting `mixture` to a value of one means that the glmnet model will potentially remove irrelevant predictors and choose a simpler model. + +### Create the recipe + +Let's create a [recipe](/start/recipes/) to define the preprocessing steps we need to prepare our hotel stays data for this model. It might make sense to create a set of date-based predictors that reflect important components related to the arrival date. We have already introduced a [number of useful recipe steps](/start/recipes/#features) for creating features from dates: + +- `step_date()` creates predictors for the year, month, and day of the week. + +- `step_holiday()` generates a set of indicator variables for specific holidays. Although we don't know where these two hotels are located, we do know that the countries for origin for most stays are based in Europe. + +- `step_rm()` removes variables; here we'll use it to remove the original date variable since we no longer want it in the model. + +Additionally, all categorical predictors (e.g., `distribution_channel`, `hotel`, ...) should be converted to dummy variables, and all numeric predictors need to be centered and scaled. + +- `step_dummy()` converts characters or factors (i.e., nominal variables) into one or more numeric binary model terms for the levels of the original data. + +- `step_zv()` removes indicator variables that only contain a single unique value (e.g. all zeros). This is important because, for penalized models, the predictors should be centered and scaled. + +- `step_normalize()` centers and scales numeric variables. + +Putting all these steps together into a recipe for a penalized logistic regression model, we have: + +```{r} +#| label: "logistic-features" +holidays <- c("AllSouls", "AshWednesday", "ChristmasEve", "Easter", + "ChristmasDay", "GoodFriday", "NewYearsDay", "PalmSunday") + +lr_recipe <- + recipe(children ~ ., data = hotel_other) %>% + step_date(arrival_date) %>% + step_holiday(arrival_date, holidays = holidays) %>% + step_rm(arrival_date) %>% + step_dummy(all_nominal_predictors()) %>% + step_zv(all_predictors()) %>% + step_normalize(all_predictors()) +``` + +### Create the workflow + +As we introduced in [*Preprocess your data with recipes*](/start/recipes/#fit-workflow), let's bundle the model and recipe into a single `workflow()` object to make management of the R objects easier: + +```{r} +#| label: "logistic-workflow" +lr_workflow <- + workflow() %>% + add_model(lr_mod) %>% + add_recipe(lr_recipe) +``` + +### Create the grid for tuning + +Before we fit this model, we need to set up a grid of `penalty` values to tune. In our [*Tune model parameters*](/start/tuning/) article, we used [`dials::grid_regular()`](start/tuning/#tune-grid) to create an expanded grid based on a combination of two hyperparameters. Since we have only one hyperparameter to tune here, we can set the grid up manually using a one-column tibble with 30 candidate values: + +```{r} +#| label: "logistic-grid" +lr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30)) + +lr_reg_grid %>% top_n(-5) # lowest penalty values +lr_reg_grid %>% top_n(5) # highest penalty values +``` + +### Train and tune the model + +Let's use `tune::tune_grid()` to train these 30 penalized logistic regression models. We'll also save the validation set predictions (via the call to `control_grid()`) so that diagnostic information can be available after the model fit. The area under the ROC curve will be used to quantify how well the model performs across a continuum of event thresholds (recall that the event rate---the proportion of stays including children--- is very low for these data). + +```{r} +#| label: "logistic-fit" +#| cache: false +#| message: false +lr_res <- + lr_workflow %>% + tune_grid(val_set, + grid = lr_reg_grid, + control = control_grid(save_pred = TRUE), + metrics = metric_set(roc_auc)) +``` + +It might be easier to visualize the validation set metrics by plotting the area under the ROC curve against the range of penalty values: + +```{r} +#| label: "logistic-results" +#| fig-height: 4.25 +#| fig-width: 6 +lr_plot <- + lr_res %>% + collect_metrics() %>% + ggplot(aes(x = penalty, y = mean)) + + geom_point() + + geom_line() + + ylab("Area under the ROC Curve") + + scale_x_log10(labels = scales::label_number()) + +lr_plot +``` + +This plots shows us that model performance is generally better at the smaller penalty values. This suggests that the majority of the predictors are important to the model. We also see a steep drop in the area under the ROC curve towards the highest penalty values. This happens because a large enough penalty will remove *all* predictors from the model, and not surprisingly predictive accuracy plummets with no predictors in the model (recall that an ROC AUC value of 0.50 means that the model does no better than chance at predicting the correct class). + +Our model performance seems to plateau at the smaller penalty values, so going by the `roc_auc` metric alone could lead us to multiple options for the "best" value for this hyperparameter: + +```{r} +top_models <- + lr_res %>% + show_best("roc_auc", n = 15) %>% + arrange(penalty) +top_models +``` + +```{r} +#| label: "top-cand-mods" +#| echo: false +# Adding a candidate model ID variable +top_models <- + top_models %>% + mutate(cand_model = row_number()) + +top_pen <- + lr_res %>% + select_best("roc_auc") %>% + pull(penalty) + +top_cand <- + top_models %>% + filter(penalty == top_pen) %>% + pull(cand_model) + +select_cand <- 12 + +select_pen <- + top_models %>% + filter(cand_model == select_cand) %>% + pull(penalty) + +select_roc <- + top_models %>% + filter(cand_model == select_cand) %>% + pull(mean) +``` + +Every candidate model in this tibble likely includes more predictor variables than the model in the row below it. If we used `select_best()`, it would return candidate model `r top_cand` with a penalty value of `r format(top_pen, digits = 3)`, shown with the dotted line below. + +```{r} +#| label: "lr-plot-lines" +#| echo: false +#| fig-height: 4.25 +#| fig-width: 6 +lr_plot + + geom_vline(color = "#CA225E", lty = 3, xintercept = top_pen) + + geom_vline(color = "#CA225E", xintercept = select_pen) +``` + +However, we may want to choose a penalty value further along the x-axis, closer to where we start to see the decline in model performance. For example, candidate model `r select_cand` with a penalty value of `r format(select_pen, digits = 3)` has effectively the same performance as the numerically best model, but might eliminate more predictors. This penalty value is marked by the solid line above. In general, fewer irrelevant predictors is better. If performance is about the same, we'd prefer to choose a higher penalty value. + +Let's select this value and visualize the validation set ROC curve: + +```{r} +#| label: "logistic-best" +lr_best <- + lr_res %>% + collect_metrics() %>% + arrange(penalty) %>% + slice(12) +lr_best +``` + +```{r} +#| label: "logistic-roc-curve" +lr_auc <- + lr_res %>% + collect_predictions(parameters = lr_best) %>% + roc_curve(children, .pred_children) %>% + mutate(model = "Logistic Regression") + +autoplot(lr_auc) +``` + +The level of performance generated by this logistic regression model is good, but not groundbreaking. Perhaps the linear nature of the prediction equation is too limiting for this data set. As a next step, we might consider a highly non-linear model generated using a tree-based ensemble method. + +## A second model: tree-based ensemble {#second-model} + +An effective and low-maintenance modeling technique is a *random forest*. This model was also used in our [*Evaluate your model with resampling*](/start/resampling/) article. Compared to logistic regression, a random forest model is more flexible. A random forest is an *ensemble model* typically made up of thousands of decision trees, where each individual tree sees a slightly different version of the training data and learns a sequence of splitting rules to predict new data. Each tree is non-linear, and aggregating across trees makes random forests also non-linear but more robust and stable compared to individual trees. Tree-based models like random forests require very little preprocessing and can effectively handle many types of predictors (sparse, skewed, continuous, categorical, etc.). + +### Build the model and improve training time + +Although the default hyperparameters for random forests tend to give reasonable results, we'll plan to tune two hyperparameters that we think could improve performance. Unfortunately, random forest models can be computationally expensive to train and to tune. The computations required for model tuning can usually be easily parallelized to improve training time. The tune package can do [parallel processing](https://tune.tidymodels.org/articles/extras/optimizations.html#parallel-processing) for you, and allows users to use multiple cores or separate machines to fit models. + +But, here we are using a single validation set, so parallelization isn't an option using the tune package. For this specific case study, a good alternative is provided by the engine itself. The ranger package offers a built-in way to compute individual random forest models in parallel. To do this, we need to know the the number of cores we have to work with. We can use the parallel package to query the number of cores on your own computer to understand how much parallelization you can do: + +```{r} +#| label: "num-cores" +cores <- parallel::detectCores() +cores +``` + +We have `r cores` cores to work with. We can pass this information to the ranger engine when we set up our parsnip `rand_forest()` model. To enable parallel processing, we can pass engine-specific arguments like `num.threads` to ranger when we set the engine: + +```{r} +#| label: "rf-model" +rf_mod <- + rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% + set_engine("ranger", num.threads = cores) %>% + set_mode("classification") +``` + +This works well in this modeling context, but it bears repeating: if you use any other resampling method, let tune do the parallel processing for you --- we typically do not recommend relying on the modeling engine (like we did here) to do this. + +In this model, we used `tune()` as a placeholder for the `mtry` and `min_n` argument values, because these are our two hyperparameters that we will [tune](/start/tuning/). + +### Create the recipe and workflow + +Unlike penalized logistic regression models, random forest models do not require [dummy](https://bookdown.org/max/FES/categorical-trees.html) or normalized predictor variables. Nevertheless, we want to do some feature engineering again with our `arrival_date` variable. As before, the date predictor is engineered so that the random forest model does not need to work hard to tease these potential patterns from the data. + +```{r} +#| label: "rf-features" +rf_recipe <- + recipe(children ~ ., data = hotel_other) %>% + step_date(arrival_date) %>% + step_holiday(arrival_date) %>% + step_rm(arrival_date) +``` + +Adding this recipe to our parsnip model gives us a new workflow for predicting whether a hotel stay included children and/or babies as guests with a random forest: + +```{r} +#| label: "rf-workflow" +rf_workflow <- + workflow() %>% + add_model(rf_mod) %>% + add_recipe(rf_recipe) +``` + +### Train and tune the model + +When we set up our parsnip model, we chose two hyperparameters for tuning: + +```{r} +#| message: false +rf_mod + +# show what will be tuned +extract_parameter_set_dials(rf_mod) +``` + +The `mtry` hyperparameter sets the number of predictor variables that each node in the decision tree "sees" and can learn about, so it can range from 1 to the total number of features present; when `mtry` = all possible features, the model is the same as bagging decision trees. The `min_n` hyperparameter sets the minimum `n` to split at any node. + +We will use a space-filling design to tune, with 25 candidate models: + +```{r} +#| label: "rf-fit" +#| cache: false +set.seed(345) +rf_res <- + rf_workflow %>% + tune_grid(val_set, + grid = 25, + control = control_grid(save_pred = TRUE), + metrics = metric_set(roc_auc)) +``` + +The message printed above *"Creating pre-processing data to finalize unknown parameter: mtry"* is related to the size of the data set. Since `mtry` depends on the number of predictors in the data set, `tune_grid()` determines the upper bound for `mtry` once it receives the data. + +Here are our top 5 random forest models, out of the 25 candidates: + +```{r} +rf_res %>% + show_best(metric = "roc_auc") +``` + +Right away, we see that these values for area under the ROC look more promising than our top model using penalized logistic regression, which yielded an ROC AUC of `r format(select_roc, digits = 3)`. + +Plotting the results of the tuning process highlights that both `mtry` (number of predictors at each node) and `min_n` (minimum number of data points required to keep splitting) should be fairly small to optimize performance. However, the range of the y-axis indicates that the model is very robust to the choice of these parameter values --- all but one of the ROC AUC values are greater than 0.90. + +```{r} +#| label: "rf-results" +#| fig-height: 4 +autoplot(rf_res) +``` + +Let's select the best model according to the ROC AUC metric. Our final tuning parameter values are: + +```{r} +#| label: "rf-best" +rf_best <- + rf_res %>% + select_best(metric = "roc_auc") +rf_best +``` + +To calculate the data needed to plot the ROC curve, we use `collect_predictions()`. This is only possible after tuning with `control_grid(save_pred = TRUE)`. In the output, you can see the two columns that hold our class probabilities for predicting hotel stays including and not including children. + +```{r} +rf_res %>% + collect_predictions() +``` + +To filter the predictions for only our best random forest model, we can use the `parameters` argument and pass it our tibble with the best hyperparameter values from tuning, which we called `rf_best`: + +```{r} +#| label: "rf-roc" +rf_auc <- + rf_res %>% + collect_predictions(parameters = rf_best) %>% + roc_curve(children, .pred_children) %>% + mutate(model = "Random Forest") +``` + +Now, we can compare the validation set ROC curves for our top penalized logistic regression model and random forest model: + +```{r} +#| label: "rf-lr-roc-curve" +bind_rows(rf_auc, lr_auc) %>% + ggplot(aes(x = 1 - specificity, y = sensitivity, col = model)) + + geom_path(lwd = 1.5, alpha = 0.8) + + geom_abline(lty = 3) + + coord_equal() + + scale_color_viridis_d(option = "plasma", end = .6) +``` + +The random forest is uniformly better across event probability thresholds. + +## The last fit {#last-fit} + +Our goal was to predict which hotel stays included children and/or babies. The random forest model clearly performed better than the penalized logistic regression model, and would be our best bet for predicting hotel stays with and without children. After selecting our best model and hyperparameter values, our last step is to fit the final model on all the rows of data not originally held out for testing (both the training and the validation sets combined), and then evaluate the model performance one last time with the held-out test set. + +We'll start by building our parsnip model object again from scratch. We take our best hyperparameter values from our random forest model. When we set the engine, we add a new argument: `importance = "impurity"`. This will provide *variable importance* scores for this last model, which gives some insight into which predictors drive model performance. + +```{r} +#| label: "last-rf" +#| cache: false +# the last model +last_rf_mod <- + rand_forest(mtry = 8, min_n = 7, trees = 1000) %>% + set_engine("ranger", num.threads = cores, importance = "impurity") %>% + set_mode("classification") + +# the last workflow +last_rf_workflow <- + rf_workflow %>% + update_model(last_rf_mod) + +# the last fit +set.seed(345) +last_rf_fit <- + last_rf_workflow %>% + last_fit(splits) + +last_rf_fit +``` + +This fitted workflow contains *everything*, including our final metrics based on the test set. So, how did this model do on the test set? Was the validation set a good estimate of future performance? + +```{r} +last_rf_fit %>% + collect_metrics() +``` + +This ROC AUC value is pretty close to what we saw when we tuned the random forest model with the validation set, which is good news. That means that our estimate of how well our model would perform with new data was not too far off from how well our model actually performed with the unseen test data. + +We can access those variable importance scores via the `.workflow` column. We can [extract out the fit](https://tune.tidymodels.org/reference/extract-tune.html) from the workflow object, and then use the vip package to visualize the variable importance scores for the top 20 features: + +```{r} +#| label: "rf-importance" +last_rf_fit %>% + extract_fit_parsnip() %>% + vip(num_features = 20) +``` + +The most important predictors in whether a hotel stay had children or not were the daily cost for the room, the type of room reserved, the time between the creation of the reservation and the arrival date, and the type of room that was ultimately assigned. + +Let's generate our last ROC curve to visualize. Since the event we are predicting is the first level in the `children` factor ("children"), we provide `roc_curve()` with the [relevant class probability](https://yardstick.tidymodels.org/reference/roc_curve.html#relevant-level) `.pred_children`: + +```{r} +#| label: "test-set-roc-curve" +last_rf_fit %>% + collect_predictions() %>% + roc_curve(children, .pred_children) %>% + autoplot() +``` + +Based on these results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data. + +## Where to next? {#next} + +If you've made it to the end of this series of [*Get Started*](/start/) articles, we hope you feel ready to learn more! You now know the core tidymodels packages and how they fit together. After you are comfortable with the basics we introduced in this series, you can [learn how to go farther](/learn/) with tidymodels in your modeling and machine learning projects. + +Here are some more ideas for where to go next: + +- Study up on statistics and modeling with our comprehensive [books](/books/). + +- Dig deeper into the [package documentation sites](/packages/) to find functions that meet your modeling needs. Use the [searchable tables](/find/) to explore what is possible. + +- Keep up with the latest about tidymodels packages at the [tidyverse blog](https://www.tidyverse.org/tags/tidymodels/). + +- Find ways to ask for [help](/help/) and [contribute to tidymodels](/contribute) to help others. + +\### + +
    Happy modeling!
    + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/start/index.qmd b/start/index.qmd new file mode 100644 index 00000000..b849bb1f --- /dev/null +++ b/start/index.qmd @@ -0,0 +1,18 @@ +--- +title: Welcome! +description: "What do you need to know to start using tidymodels? Learn what you need in 5 articles." +toc: false +css: styles.css +--- + +Here, learn what you need to get started with tidymodels in five articles, starting with how to create a model and ending with a beginning-to-end modeling case study. After you are comfortable with these basics, you can [learn how to go farther with tidymodels](/learn/). + +## If you are new to R or the tidyverse + +To get the most out of tidymodels, we recommend that you start by learning some basics about R and the [tidyverse](https://www.tidyverse.org/) first, then return here when you feel ready. Here are some resources to start learning: + +- [Finding Your Way To R](https://education.rstudio.com/learn/), from the RStudio Education team. + +- [Learn the tidyverse](https://www.tidyverse.org/learn/), from the tidyverse team. + +- [Statistical Inference via Data Science: A ModernDive into R and the Tidyverse](/books/moderndive/). diff --git a/start/models/figs/dwplot-1.svg b/start/models/figs/dwplot-1.svg new file mode 100644 index 00000000..71009646 --- /dev/null +++ b/start/models/figs/dwplot-1.svg @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +initial_volume:food_regimeHigh +initial_volume:food_regimeLow +food_regimeHigh +food_regimeLow +initial_volume + + + + + + + + + + + +0.00 +0.01 +0.02 +0.03 +0.04 +0.05 + + diff --git a/start/models/figs/lm-all-pred-1.svg b/start/models/figs/lm-all-pred-1.svg new file mode 100644 index 00000000..27e05fe7 --- /dev/null +++ b/start/models/figs/lm-all-pred-1.svg @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.05 +0.06 +0.07 +0.08 +0.09 +0.10 + + + + + + + + + +Initial +Low +High +food_regime +urchin size + + diff --git a/start/models/figs/stan-pred-1.svg b/start/models/figs/stan-pred-1.svg new file mode 100644 index 00000000..4e7a314f --- /dev/null +++ b/start/models/figs/stan-pred-1.svg @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.05 +0.06 +0.07 +0.08 +0.09 +0.10 + + + + + + + + + +Initial +Low +High +food_regime +urchin size +Bayesian model with t(1) prior distribution + + diff --git a/start/models/figs/urchin-plot-1.svg b/start/models/figs/urchin-plot-1.svg new file mode 100644 index 00000000..15ab2858 --- /dev/null +++ b/start/models/figs/urchin-plot-1.svg @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.04 +0.08 +0.12 +0.16 + + + + + + + + +10 +20 +30 +40 +initial_volume +width + +food_regime + + + + + + + + + +Initial +Low +High + + diff --git a/start/models/index.qmd b/start/models/index.qmd new file mode 100644 index 00000000..4c663a02 --- /dev/null +++ b/start/models/index.qmd @@ -0,0 +1,323 @@ +--- +title: "Build a model" +weight: 1 +categories: + - model fitting + - parsnip + - broom +description: | + Get started by learning how to specify and train a model using tidymodels. +toc-location: body +toc-depth: 2 +toc-title: "" +css: ../styles.css +include-after-body: ../repo-actions-delete.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(readr) +library(rstanarm) +library(tidymodels) +library(broom.mixed) +library(dotwhisker) + +pkgs <- c("tidymodels", "readr", "rstanarm", "broom.mixed", "dotwhisker") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + + + +## Introduction {#intro} + +How do you create a statistical model using tidymodels? In this article, we will walk you through the steps. We start with data for modeling, learn how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/), and understand why these functions are designed this way. + +`r article_req_pkgs(pkgs)` + +```{r} +#| eval: false +library(tidymodels) # for the parsnip package, along with the rest of tidymodels + +# Helper packages +library(readr) # for importing data +library(broom.mixed) # for converting bayesian models to tidy tibbles +library(dotwhisker) # for visualizing regression results +``` + + +{{< test-drive url="https://rstudio.cloud/project/2674862" >}} + + +## The Sea Urchins Data {#data} + +Let's use the data from [Constable (1993)](https://link.springer.com/article/10.1007/BF00349318) to explore how three different feeding regimes affect the size of sea urchins over time. The initial size of the sea urchins at the beginning of the experiment probably affects how big they grow as they are fed. + +To start, let's read our urchins data into R, which we'll do by providing [`readr::read_csv()`](https://readr.tidyverse.org/reference/read_delim.html) with a url where our CSV data is located (""): + +```{r} +#| label: "data" +urchins <- + # Data were assembled for a tutorial + # at https://www.flutterbys.com.au/stats/tut/tut7.5a.html + read_csv("https://tidymodels.org/start/models/urchins.csv") %>% + # Change the names to be a little more verbose + setNames(c("food_regime", "initial_volume", "width")) %>% + # Factors are very helpful for modeling, so we convert one column + mutate(food_regime = factor(food_regime, levels = c("Initial", "Low", "High"))) +``` + +Let's take a quick look at the data: + +```{r} +urchins +``` + +The urchins data is a [tibble](https://tibble.tidyverse.org/index.html). If you are new to tibbles, the best place to start is the [tibbles chapter](https://r4ds.had.co.nz/tibbles.html) in *R for Data Science*. For each of the `r nrow(urchins)` urchins, we know their: + ++ experimental feeding regime group (`food_regime`: either `Initial`, `Low`, or `High`), ++ size in milliliters at the start of the experiment (`initial_volume`), and ++ suture width at the end of the experiment (`width`). + +As a first step in modeling, it's always a good idea to plot the data: + +```{r} +#| label: "urchin-plot" +ggplot(urchins, + aes(x = initial_volume, + y = width, + group = food_regime, + col = food_regime)) + + geom_point() + + geom_smooth(method = lm, se = FALSE) + + scale_color_viridis_d(option = "plasma", end = .7) +``` + +We can see that urchins that were larger in volume at the start of the experiment tended to have wider sutures at the end, but the slopes of the lines look different so this effect may depend on the feeding regime condition. + +## Build and fit a model {#build-model} + +A standard two-way analysis of variance ([ANOVA](https://www.itl.nist.gov/div898/handbook/prc/section4/prc43.htm)) model makes sense for this dataset because we have both a continuous predictor and a categorical predictor. Since the slopes appear to be different for at least two of the feeding regimes, let's build a model that allows for two-way interactions. Specifying an R formula with our variables in this way: + +```{r} +#| label: "two-way-int" +#| eval: false +width ~ initial_volume * food_regime +``` + +allows our regression model depending on initial volume to have separate slopes and intercepts for each food regime. + +For this kind of model, ordinary least squares is a good initial approach. With tidymodels, we start by specifying the _functional form_ of the model that we want using the [parsnip package](https://parsnip.tidymodels.org/). Since there is a numeric outcome and the model should be linear with slopes and intercepts, the model type is ["linear regression"](https://parsnip.tidymodels.org/reference/linear_reg.html). We can declare this with: + + +```{r} +#| label: "lm-tm" +linear_reg() +``` + +That is pretty underwhelming since, on its own, it doesn't really do much. However, now that the type of model has been specified, we can think about a method for _fitting_ or training the model, the model **engine**. The engine value is often a mash-up of the software that can be used to fit or train the model as well as the estimation method. The default for `linear_reg()` is `"lm"` for ordinary least squares, as you can see above. We could set a non-default option instead: + +```{r} +#| label: "lm-spec" +linear_reg() %>% + set_engine("keras") +``` + +The [documentation page for `linear_reg()`](https://parsnip.tidymodels.org/reference/linear_reg.html) lists all the possible engines. We'll save our model object using the default engine as `lm_mod`. + +```{r} +lm_mod <- linear_reg() +``` + +From here, the model can be estimated or trained using the [`fit()`](https://parsnip.tidymodels.org/reference/fit.html) function: + +```{r} +#| label: "lm-fit" +lm_fit <- + lm_mod %>% + fit(width ~ initial_volume * food_regime, data = urchins) +lm_fit +``` + +Perhaps our analysis requires a description of the model parameter estimates and their statistical properties. Although the `summary()` function for `lm` objects can provide that, it gives the results back in an unwieldy format. Many models have a `tidy()` method that provides the summary results in a more predictable and useful format (e.g. a data frame with standard column names): + +```{r} +#| label: "lm-table" +tidy(lm_fit) +``` + +This kind of output can be used to generate a dot-and-whisker plot of our regression results using the dotwhisker package: + +```{r} +#| label: "dwplot" +tidy(lm_fit) %>% + dwplot(dot_args = list(size = 2, color = "black"), + whisker_args = list(color = "black"), + vline = geom_vline(xintercept = 0, colour = "grey50", linetype = 2)) +``` + + +## Use a model to predict {#predict-model} + +This fitted object `lm_fit` has the `lm` model output built-in, which you can access with `lm_fit$fit`, but there are some benefits to using the fitted parsnip model object when it comes to predicting. + +Suppose that, for a publication, it would be particularly interesting to make a plot of the mean body size for urchins that started the experiment with an initial volume of 20ml. To create such a graph, we start with some new example data that we will make predictions for, to show in our graph: + +```{r} +#| label: "new-points" +new_points <- expand.grid(initial_volume = 20, + food_regime = c("Initial", "Low", "High")) +new_points +``` + +To get our predicted results, we can use the `predict()` function to find the mean values at 20ml. + +It is also important to communicate the variability, so we also need to find the predicted confidence intervals. If we had used `lm()` to fit the model directly, a few minutes of reading the [documentation page](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/predict.lm.html) for `predict.lm()` would explain how to do this. However, if we decide to use a different model to estimate urchin size (_spoiler:_ we will!), it is likely that a completely different syntax would be required. + +Instead, with tidymodels, the types of predicted values are standardized so that we can use the same syntax to get these values. + +First, let's generate the mean body width values: + +```{r} +#| label: "lm-pred-mean" +mean_pred <- predict(lm_fit, new_data = new_points) +mean_pred +``` + +When making predictions, the tidymodels convention is to always produce a tibble of results with standardized column names. This makes it easy to combine the original data and the predictions in a usable format: + +```{r} +#| label: "lm-all-pred" +conf_int_pred <- predict(lm_fit, + new_data = new_points, + type = "conf_int") +conf_int_pred + +# Now combine: +plot_data <- + new_points %>% + bind_cols(mean_pred) %>% + bind_cols(conf_int_pred) + +# and plot: +ggplot(plot_data, aes(x = food_regime)) + + geom_point(aes(y = .pred)) + + geom_errorbar(aes(ymin = .pred_lower, + ymax = .pred_upper), + width = .2) + + labs(y = "urchin size") +``` + +## Model with a different engine {#new-engine} + +Every one on your team is happy with that plot _except_ that one person who just read their first book on [Bayesian analysis](https://bayesian.org/what-is-bayesian-analysis/). They are interested in knowing if the results would be different if the model were estimated using a Bayesian approach. In such an analysis, a [_prior distribution_](https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7) needs to be declared for each model parameter that represents the possible values of the parameters (before being exposed to the observed data). After some discussion, the group agrees that the priors should be bell-shaped but, since no one has any idea what the range of values should be, to take a conservative approach and make the priors _wide_ using a Cauchy distribution (which is the same as a t-distribution with a single degree of freedom). + +The [documentation](https://mc-stan.org/rstanarm/articles/priors.html) on the rstanarm package shows us that the `stan_glm()` function can be used to estimate this model, and that the function arguments that need to be specified are called `prior` and `prior_intercept`. It turns out that `linear_reg()` has a [`stan` engine](https://parsnip.tidymodels.org/reference/linear_reg.html#details). Since these prior distribution arguments are specific to the Stan software, they are passed as arguments to [`parsnip::set_engine()`](https://parsnip.tidymodels.org/reference/set_engine.html). After that, the same exact `fit()` call is used: + +```{r} +#| label: "go-stan" +#| message: false +# set the prior distribution +prior_dist <- rstanarm::student_t(df = 1) + +set.seed(123) + +# make the parsnip model +bayes_mod <- + linear_reg() %>% + set_engine("stan", + prior_intercept = prior_dist, + prior = prior_dist) + +# train the model +bayes_fit <- + bayes_mod %>% + fit(width ~ initial_volume * food_regime, data = urchins) + +print(bayes_fit, digits = 5) +``` + +This kind of Bayesian analysis (like many models) involves randomly generated numbers in its fitting procedure. We can use `set.seed()` to ensure that the same (pseudo-)random numbers are generated each time we run this code. The number `123` isn't special or related to our data; it is just a "seed" used to choose random numbers. + +To update the parameter table, the `tidy()` method is once again used: + +```{r} +#| label: "tidy-stan" +tidy(bayes_fit, conf.int = TRUE) +``` + +A goal of the tidymodels packages is that the **interfaces to common tasks are standardized** (as seen in the `tidy()` results above). The same is true for getting predictions; we can use the same code even though the underlying packages use very different syntax: + +```{r} +#| label: "stan-pred" +bayes_plot_data <- + new_points %>% + bind_cols(predict(bayes_fit, new_data = new_points)) %>% + bind_cols(predict(bayes_fit, new_data = new_points, type = "conf_int")) + +ggplot(bayes_plot_data, aes(x = food_regime)) + + geom_point(aes(y = .pred)) + + geom_errorbar(aes(ymin = .pred_lower, ymax = .pred_upper), width = .2) + + labs(y = "urchin size") + + ggtitle("Bayesian model with t(1) prior distribution") +``` + +This isn't very different from the non-Bayesian results (except in interpretation). + +::: {.callout-note} +The [parsnip](https://parsnip.tidymodels.org/) package can work with many model types, engines, and arguments. Check out [tidymodels.org/find/parsnip](/find/parsnip/) to see what is available. +::: + +## Why does it work that way? {#why} + +The extra step of defining the model using a function like `linear_reg()` might seem superfluous since a call to `lm()` is much more succinct. However, the problem with standard modeling functions is that they don't separate what you want to do from the execution. For example, the process of executing a formula has to happen repeatedly across model calls even when the formula does not change; we can't recycle those computations. + +Also, using the tidymodels framework, we can do some interesting things by incrementally creating a model (instead of using single function call). [Model tuning](/start/tuning/) with tidymodels uses the specification of the model to declare what parts of the model should be tuned. That would be very difficult to do if `linear_reg()` immediately fit the model. + +If you are familiar with the tidyverse, you may have noticed that our modeling code uses the magrittr pipe (`%>%`). With dplyr and other tidyverse packages, the pipe works well because all of the functions take the _data_ as the first argument. For example: + +```{r} +#| label: "tidy-data" +urchins %>% + group_by(food_regime) %>% + summarize(med_vol = median(initial_volume)) +``` + +whereas the modeling code uses the pipe to pass around the _model object_: + +```{r} +#| label: "tidy-model" +#| eval: false +bayes_mod %>% + fit(width ~ initial_volume * food_regime, data = urchins) +``` + +This may seem jarring if you have used dplyr a lot, but it is extremely similar to how ggplot2 operates: + +```{r} +#| eval: false +ggplot(urchins, + aes(initial_volume, width)) + # returns a ggplot object + geom_jitter() + # same + geom_smooth(method = lm, se = FALSE) + # same + labs(x = "Volume", y = "Width") # etc +``` + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/content/start/models/urchins.csv b/start/models/urchins.csv similarity index 100% rename from content/start/models/urchins.csv rename to start/models/urchins.csv diff --git a/start/recipes/figs/roc-plot-1.svg b/start/recipes/figs/roc-plot-1.svg new file mode 100644 index 00000000..e1f84bc1 --- /dev/null +++ b/start/recipes/figs/roc-plot-1.svg @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +1 - specificity +sensitivity + + diff --git a/start/recipes/index.qmd b/start/recipes/index.qmd new file mode 100644 index 00000000..3f7bedc3 --- /dev/null +++ b/start/recipes/index.qmd @@ -0,0 +1,442 @@ +--- +title: "Preprocess your data with recipes" +weight: 2 +categories: + - pre-processing + - recipes + - parsnip + - workflows + - yardstick + - broom +description: | + Prepare data for modeling with modular preprocessing steps. +toc-location: body +toc-depth: 2 +toc-title: "" +css: ../styles.css +include-after-body: ../repo-actions-delete.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(tidymodels) +library(nycflights13) +library(kableExtra) +library(skimr) +pkgs <- c("tidymodels", "nycflights13", "skimr") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction {#intro} + +In our [*Build a Model*](/start/models/) article, we learned how to specify and train models with different engines using the [parsnip package](https://parsnip.tidymodels.org/). In this article, we'll explore another tidymodels package, [recipes](https://recipes.tidymodels.org/), which is designed to help you preprocess your data *before* training your model. Recipes are built as a series of preprocessing steps, such as: + +- converting qualitative predictors to indicator variables (also known as dummy variables), + +- transforming data to be on a different scale (e.g., taking the logarithm of a variable), + +- transforming whole groups of predictors together, + +- extracting key features from raw variables (e.g., getting the day of the week out of a date variable), + +and so on. If you are familiar with R's formula interface, a lot of this might sound familiar and like what a formula already does. Recipes can be used to do many of the same things, but they have a much wider range of possibilities. This article shows how to use recipes for modeling. + +`r article_req_pkgs(pkgs)` + +```{r} +#| eval: false +library(tidymodels) # for the recipes package, along with the rest of tidymodels + +# Helper packages +library(nycflights13) # for flight data +library(skimr) # for variable summaries +``` + +{{< test-drive url="https://rstudio.cloud/project/2674862" >}} + +## The New York City flight data {#data} + +```{r} +#| label: "flight-start" +#| echo: false +set.seed(123) + +flight_data <- + flights %>% + mutate( + # Convert the arrival delay to a factor + arr_delay = ifelse(arr_delay >= 30, "late", "on_time"), + arr_delay = factor(arr_delay), + # We will use the date (not date-time) in the recipe below + date = lubridate::as_date(time_hour) + ) %>% + # Include the weather data + inner_join(weather, by = c("origin", "time_hour")) %>% + # Only retain the specific columns we will use + select(dep_time, flight, origin, dest, air_time, distance, + carrier, date, arr_delay, time_hour) %>% + # Exclude missing data + na.omit() %>% + # For creating models, it is better to have qualitative columns + # encoded as factors (instead of character strings) + mutate_if(is.character, as.factor) +``` + +Let's use the [nycflights13 data](https://github.com/hadley/nycflights13) to predict whether a plane arrives more than 30 minutes late. This data set contains information on `r scales::comma(nrow(flight_data))` flights departing near New York City in 2013. Let's start by loading the data and making a few changes to the variables: + +```{r} +#| ref.label: "flight-start" + +``` + +We can see that about `r percent(mean(flight_data$arr_delay == "late"))` of the flights in this data set arrived more than 30 minutes late. + +```{r} +#| label: "count-delays" +flight_data %>% + count(arr_delay) %>% + mutate(prop = n/sum(n)) +``` + +Before we start building up our recipe, let's take a quick look at a few specific variables that will be important for both preprocessing and modeling. + +First, notice that the variable we created called `arr_delay` is a factor variable; it is important that our outcome variable for training a logistic regression model is a factor. + +```{r} +#| label: "glimpse-flights" +glimpse(flight_data) +``` + +Second, there are two variables that we don't want to use as predictors in our model, but that we would like to retain as identification variables that can be used to troubleshoot poorly predicted data points. These are `flight`, a numeric value, and `time_hour`, a date-time value. + +Third, there are `r length(levels(flight_data$dest))` flight destinations contained in `dest` and `r length(levels(flight_data$carrier))` distinct `carrier`s. + +```{r} +#| label: "skim-flights" +flight_data %>% + skimr::skim(dest, carrier) +``` + +Because we'll be using a simple logistic regression model, the variables `dest` and `carrier` will be converted to [dummy variables](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html). However, some of these values do not occur very frequently and this could complicate our analysis. We'll discuss specific steps later in this article that we can add to our recipe to address this issue before modeling. + +## Data splitting {#data-split} + +To get started, let's split this single dataset into two: a *training* set and a *testing* set. We'll keep most of the rows in the original dataset (subset chosen randomly) in the *training* set. The training data will be used to *fit* the model, and the *testing* set will be used to measure model performance. + +To do this, we can use the [rsample](https://rsample.tidymodels.org/) package to create an object that contains the information on *how* to split the data, and then two more rsample functions to create data frames for the training and testing sets: + +```{r} +#| label: "split" +# Fix the random numbers by setting the seed +# This enables the analysis to be reproducible when random numbers are used +set.seed(222) +# Put 3/4 of the data into the training set +data_split <- initial_split(flight_data, prop = 3/4) + +# Create data frames for the two sets: +train_data <- training(data_split) +test_data <- testing(data_split) +``` + +## Create recipe and roles {#recipe} + +To get started, let's create a recipe for a simple logistic regression model. Before training the model, we can use a recipe to create a few new predictors and conduct some preprocessing required by the model. + +Let's initiate a new recipe: + +```{r} +#| label: "initial-recipe" +flights_rec <- + recipe(arr_delay ~ ., data = train_data) +``` + +The [`recipe()` function](https://recipes.tidymodels.org/reference/recipe.html) as we used it here has two arguments: + +- A **formula**. Any variable on the left-hand side of the tilde (`~`) is considered the model outcome (here, `arr_delay`). On the right-hand side of the tilde are the predictors. Variables may be listed by name, or you can use the dot (`.`) to indicate all other variables as predictors. + +- The **data**. A recipe is associated with the data set used to create the model. This will typically be the *training* set, so `data = train_data` here. Naming a data set doesn't actually change the data itself; it is only used to catalog the names of the variables and their types, like factors, integers, dates, etc. + +Now we can add [roles](https://recipes.tidymodels.org/reference/roles.html) to this recipe. We can use the [`update_role()` function](https://recipes.tidymodels.org/reference/roles.html) to let recipes know that `flight` and `time_hour` are variables with a custom role that we called `"ID"` (a role can have any character value). Whereas our formula included all variables in the training set other than `arr_delay` as predictors, this tells the recipe to keep these two variables but not use them as either outcomes or predictors. + +```{r} +#| label: "recipe-roles" +flights_rec <- + recipe(arr_delay ~ ., data = train_data) %>% + update_role(flight, time_hour, new_role = "ID") +``` + +This step of adding roles to a recipe is optional; the purpose of using it here is that those two variables can be retained in the data but not included in the model. This can be convenient when, after the model is fit, we want to investigate some poorly predicted value. These ID columns will be available and can be used to try to understand what went wrong. + +To get the current set of variables and roles, use the `summary()` function: + +```{r} +#| label: "summary" +summary(flights_rec) +``` + +## Create features {#features} + +Now we can start adding steps onto our recipe using the pipe operator. Perhaps it is reasonable for the date of the flight to have an effect on the likelihood of a late arrival. A little bit of **feature engineering** might go a long way to improving our model. How should the date be encoded into the model? The `date` column has an R `date` object so including that column "as is" will mean that the model will convert it to a numeric format equal to the number of days after a reference date: + +```{r} +#| label: "dates" +flight_data %>% + distinct(date) %>% + mutate(numeric_date = as.numeric(date)) +``` + +It's possible that the numeric date variable is a good option for modeling; perhaps the model would benefit from a linear trend between the log-odds of a late arrival and the numeric date variable. However, it might be better to add model terms *derived* from the date that have a better potential to be important to the model. For example, we could derive the following meaningful features from the single `date` variable: + +- the day of the week, + +- the month, and + +- whether or not the date corresponds to a holiday. + +Let's do all three of these by adding steps to our recipe: + +```{r} +#| label: "date-recipe" +flights_rec <- + recipe(arr_delay ~ ., data = train_data) %>% + update_role(flight, time_hour, new_role = "ID") %>% + step_date(date, features = c("dow", "month")) %>% + step_holiday(date, + holidays = timeDate::listHolidays("US"), + keep_original_cols = FALSE) +``` + +What do each of these steps do? + +- With [`step_date()`](https://recipes.tidymodels.org/reference/step_date.html), we created two new factor columns with the appropriate day of the week and the month. + +- With [`step_holiday()`](https://recipes.tidymodels.org/reference/step_holiday.html), we created a binary variable indicating whether the current date is a holiday or not. The argument value of `timeDate::listHolidays("US")` uses the [timeDate package](https://cran.r-project.org/web/packages/timeDate/index.html) to list the `r length(timeDate::listHolidays("US"))` standard US holidays. + +- With `keep_original_cols = FALSE`, we remove the original `date` variable since we no longer want it in the model. Many recipe steps that create new variables have this argument. + +Next, we'll turn our attention to the variable types of our predictors. Because we plan to train a logistic regression model, we know that predictors will ultimately need to be numeric, as opposed to nominal data like strings and factor variables. In other words, there may be a difference in how we store our data (in factors inside a data frame), and how the underlying equations require them (a purely numeric matrix). + +For factors like `dest` and `origin`, [standard practice](https://bookdown.org/max/FES/creating-dummy-variables-for-unordered-categories.html) is to convert them into *dummy* or *indicator* variables to make them numeric. These are binary values for each level of the factor. For example, our `origin` variable has values of `"EWR"`, `"JFK"`, and `"LGA"`. The standard dummy variable encoding, shown below, will create *two* numeric columns of the data that are 1 when the originating airport is `"JFK"` or `"LGA"` and zero otherwise, respectively. + +```{r} +#| label: "calc-dummy" +#| include: false +four_origins <- + train_data %>% + select(origin, arr_delay) %>% + slice(1:4) + +origin_dummies <- + recipe(arr_delay ~ origin, data = train_data) %>% + step_dummy(origin, keep_original_cols = TRUE) %>% + prep(training = four_origins) +``` + +```{r} +#| label: "dummy-table" +#| echo: false +# Get a row for each factor level +bake(origin_dummies, new_data = NULL, origin, starts_with("origin")) %>% + distinct() %>% + knitr::kable() %>% + kable_styling(full_width = FALSE) +``` + +But, unlike the standard model formula methods in R, a recipe **does not** automatically create these dummy variables for you; you'll need to tell your recipe to add this step. This is for two reasons. First, many models do not require [numeric predictors](https://bookdown.org/max/FES/categorical-trees.html), so dummy variables may not always be preferred. Second, recipes can also be used for purposes outside of modeling, where non-dummy versions of the variables may work better. For example, you may want to make a table or a plot with a variable as a single factor. For those reasons, you need to explicitly tell recipes to create dummy variables using `step_dummy()`: + +```{r} +#| label: "dummy" +flights_rec <- + recipe(arr_delay ~ ., data = train_data) %>% + update_role(flight, time_hour, new_role = "ID") %>% + step_date(date, features = c("dow", "month")) %>% + step_holiday(date, + holidays = timeDate::listHolidays("US"), + keep_original_cols = FALSE) %>% + step_dummy(all_nominal_predictors()) +``` + +Here, we did something different than before: instead of applying a step to an individual variable, we used [selectors](https://recipes.tidymodels.org/reference/selections.html) to apply this recipe step to several variables at once, `all_nominal_predictors()`. The [selector functions](https://recipes.tidymodels.org/reference/selections.html) can be combined to select intersections of variables. + +At this stage in the recipe, this step selects the `origin`, `dest`, and `carrier` variables. It also includes two new variables, `date_dow` and `date_month`, that were created by the earlier `step_date()`. + +More generally, the recipe selectors mean that you don't always have to apply steps to individual variables one at a time. Since a recipe knows the *variable type* and *role* of each column, they can also be selected (or dropped) using this information. + +We need one final step to add to our recipe. Since `carrier` and `dest` have some infrequently occurring factor values, it is possible that dummy variables might be created for values that don't exist in the training set. For example, there is one destination that is only in the test set: + +```{r} +#| label: "zv-cols" +test_data %>% + distinct(dest) %>% + anti_join(train_data) +``` + +When the recipe is applied to the training set, a column is made for `r dplyr::setdiff(test_data$dest, train_data$dest)` because the factor levels come from `flight_data` (not the training set), but this column will contain all zeros. This is a "zero-variance predictor" that has no information within the column. While some R functions will not produce an error for such predictors, it usually causes warnings and other issues. `step_zv()` will remove columns from the data when the training set data have a single value, so it is added to the recipe *after* `step_dummy()`: + +```{r} +#| label: "zv" +flights_rec <- + recipe(arr_delay ~ ., data = train_data) %>% + update_role(flight, time_hour, new_role = "ID") %>% + step_date(date, features = c("dow", "month")) %>% + step_holiday(date, + holidays = timeDate::listHolidays("US"), + keep_original_cols = FALSE) %>% + step_dummy(all_nominal_predictors()) %>% + step_zv(all_predictors()) +``` + +Now we've created a *specification* of what should be done with the data. How do we use the recipe we made? + +## Fit a model with a recipe {#fit-workflow} + +Let's use logistic regression to model the flight data. As we saw in [*Build a Model*](/start/models/), we start by [building a model specification](/start/models/#build-model) using the parsnip package: + +```{r} +#| label: "model" +lr_mod <- + logistic_reg() %>% + set_engine("glm") +``` + +We will want to use our recipe across several steps as we train and test our model. We will: + +1. **Process the recipe using the training set**: This involves any estimation or calculations based on the training set. For our recipe, the training set will be used to determine which predictors should be converted to dummy variables and which predictors will have zero-variance in the training set, and should be slated for removal. + +2. **Apply the recipe to the training set**: We create the final predictor set on the training set. + +3. **Apply the recipe to the test set**: We create the final predictor set on the test set. Nothing is recomputed and no information from the test set is used here; the dummy variable and zero-variance results from the training set are applied to the test set. + +To simplify this process, we can use a *model workflow*, which pairs a model and recipe together. This is a straightforward approach because different recipes are often needed for different models, so when a model and recipe are bundled, it becomes easier to train and test *workflows*. We'll use the [workflows package](https://workflows.tidymodels.org/) from tidymodels to bundle our parsnip model (`lr_mod`) with our recipe (`flights_rec`). + +```{r} +#| label: "workflow" +flights_wflow <- + workflow() %>% + add_model(lr_mod) %>% + add_recipe(flights_rec) + +flights_wflow +``` + +Now, there is a single function that can be used to prepare the recipe and train the model from the resulting predictors: + +```{r} +#| label: "fit" +flights_fit <- + flights_wflow %>% + fit(data = train_data) +``` + +This object has the finalized recipe and fitted model objects inside. You may want to extract the model or recipe objects from the workflow. To do this, you can use the helper functions `extract_fit_parsnip()` and `extract_recipe()`. For example, here we pull the fitted model object then use the `broom::tidy()` function to get a tidy tibble of model coefficients: + +```{r} +#| label: "fit-glance" +flights_fit %>% + extract_fit_parsnip() %>% + tidy() +``` + +## Use a trained workflow to predict {#predict-workflow} + +Our goal was to predict whether a plane arrives more than 30 minutes late. We have just: + +1. Built the model (`lr_mod`), + +2. Created a preprocessing recipe (`flights_rec`), + +3. Bundled the model and recipe (`flights_wflow`), and + +4. Trained our workflow using a single call to `fit()`. + +The next step is to use the trained workflow (`flights_fit`) to predict with the unseen test data, which we will do with a single call to `predict()`. The `predict()` method applies the recipe to the new data, then passes them to the fitted model. + +```{r} +#| label: "pred-class" +predict(flights_fit, test_data) +``` + +Because our outcome variable here is a factor, the output from `predict()` returns the predicted class: `late` versus `on_time`. But, let's say we want the predicted class probabilities for each flight instead. To return those, we can specify `type = "prob"` when we use `predict()` or use `augment()` with the model plus test data to save them together: + +```{r} +#| label: "test-pred" +flights_aug <- + augment(flights_fit, test_data) + +# The data look like: +flights_aug %>% + select(arr_delay, time_hour, flight, .pred_class, .pred_on_time) +``` + +Now that we have a tibble with our predicted class probabilities, how will we evaluate the performance of our workflow? We can see from these first few rows that our model predicted these 5 on time flights correctly because the values of `.pred_on_time` are *p* \> .50. But we also know that we have `r scales::comma(nrow(flights_aug))` rows total to predict. We would like to calculate a metric that tells how well our model predicted late arrivals, compared to the true status of our outcome variable, `arr_delay`. + +Let's use the area under the [ROC curve](https://bookdown.org/max/FES/measuring-performance.html#class-metrics) as our metric, computed using `roc_curve()` and `roc_auc()` from the [yardstick package](https://yardstick.tidymodels.org/). + +To generate a ROC curve, we need the predicted class probabilities for `late` and `on_time`, which we just calculated in the code chunk above. We can create the ROC curve with these values, using `roc_curve()` and then piping to the `autoplot()` method: + +```{r} +#| label: "roc-plot" +flights_aug %>% + roc_curve(truth = arr_delay, .pred_late) %>% + autoplot() +``` + +Similarly, `roc_auc()` estimates the area under the curve: + +```{r} +#| label: "roc-auc" +flights_aug %>% + roc_auc(truth = arr_delay, .pred_late) +``` + +Not too bad! We leave it to the reader to test out this workflow [*without*](https://workflows.tidymodels.org/reference/add_formula.html) this recipe. You can use `workflows::add_formula(arr_delay ~ .)` instead of `add_recipe()` (remember to remove the identification variables first!), and see whether our recipe improved our model's ability to predict late arrivals. + +```{r} +#| eval: false +#| include: false +set.seed(555) +flights_cens <- flight_data %>% + select(-flight, -time_hour) + +flights_cens_split <- initial_split(flights_cens, prop = 3/4) +flights_cens_train <- training(flights_cens_split) +flights_cens_test <- testing(flights_cens_split) + +flights_wflow_raw <- + workflow() %>% + add_model(lr_mod) %>% + add_formula(arr_delay ~ .) + +flights_fit_raw <- + flights_wflow_raw %>% + fit(data = flights_cens_train) + +flights_preds_raw <- + predict(flights_fit_raw, + flights_cens_test, + type = "prob") %>% + bind_cols(flights_cens_test %>% select(arr_delay)) + +flights_preds_raw %>% + roc_auc(truth = arr_delay, .pred_late) +``` + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/start/repo-actions-delete.html b/start/repo-actions-delete.html new file mode 100644 index 00000000..a672d1a1 --- /dev/null +++ b/start/repo-actions-delete.html @@ -0,0 +1,3 @@ + diff --git a/start/resampling/img/cells.png b/start/resampling/img/cells.png new file mode 100644 index 00000000..d606425b Binary files /dev/null and b/start/resampling/img/cells.png differ diff --git a/start/resampling/img/resampling.svg b/start/resampling/img/resampling.svg new file mode 100644 index 00000000..34a43892 --- /dev/null +++ b/start/resampling/img/resampling.svg @@ -0,0 +1,172 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Produced by OmniGraffle 7.9.4 + 2019-02-16 02:42:35 +0000 + + + Canvas 1 + + Layer 1 + + + + + All Data + + + + + + + Training + + + + + + + Testing + + + + + + + + + + + + + Assessment + + + + + + + Analysis + + + + + + + Resample 1 + + + + + + + + + + + + + + + + Assessment + + + + + + + Analysis + + + + + + + Resample 2 + + + + + + + + + + + + + + + + Assessment + + + + + + + Analysis + + + + + + + Resample + B + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/start/resampling/index.qmd b/start/resampling/index.qmd new file mode 100644 index 00000000..b74825ea --- /dev/null +++ b/start/resampling/index.qmd @@ -0,0 +1,382 @@ +--- +title: "Evaluate your model with resampling" +weight: 3 +categories: + - resampling + - rsample + - parsnip + - tune + - workflows + - yardstick +description: | + Measure model performance by generating different versions of the training data through resampling. +toc-location: body +toc-depth: 2 +toc-title: "" +css: ../styles.css +include-after-body: ../repo-actions-delete.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(tidymodels) +library(ranger) +library(modeldata) +library(kableExtra) + +data(cells, package = "modeldata") + +pkgs <- c("tidymodels", "ranger", "modeldata") + +theme_set(theme_bw() + theme(legend.position = "top")) +``` + +## Introduction {#intro} + +So far, we have [built a model](/start/models/) and [preprocessed data with a recipe](/start/recipes/). We also introduced [workflows](/start/recipes/#fit-workflow) as a way to bundle a [parsnip model](https://parsnip.tidymodels.org/) and [recipe](https://recipes.tidymodels.org/) together. Once we have a model trained, we need a way to measure how well that model predicts new data. This tutorial explains how to characterize model performance based on **resampling** statistics. + +`r article_req_pkgs(pkgs)` + +```{r} +#| eval: false +library(tidymodels) # for the rsample package, along with the rest of tidymodels + +# Helper packages +library(modeldata) # for the cells data +``` + +{{< test-drive url="https://rstudio.cloud/project/2674862" >}} + +## The cell image data {#data} + +Let's use data from [Hill, LaPan, Li, and Haney (2007)](http://www.biomedcentral.com/1471-2105/8/340), available in the [modeldata package](https://cran.r-project.org/web/packages/modeldata/index.html), to predict cell image segmentation quality with resampling. To start, we load this data into R: + +```{r} +#| label: "cell-import" +data(cells, package = "modeldata") +cells +``` + +We have data for `r nrow(cells)` cells, with `r ncol(cells)` variables. The main outcome variable of interest for us here is called `class`, which you can see is a factor. But before we jump into predicting the `class` variable, we need to understand it better. Below is a brief primer on cell image segmentation. + +### Predicting image segmentation quality + +Some biologists conduct experiments on cells. In drug discovery, a particular type of cell can be treated with either a drug or control and then observed to see what the effect is (if any). A common approach for this kind of measurement is cell imaging. Different parts of the cells can be colored so that the locations of a cell can be determined. + +For example, in top panel of this image of five cells, the green color is meant to define the boundary of the cell (coloring something called the cytoskeleton) while the blue color defines the nucleus of the cell. + +```{r} +#| label: "cell-fig" +#| echo: false +#| fig-align: center +#| out-width: "70%" +knitr::include_graphics("img/cells.png") +``` + +Using these colors, the cells in an image can be *segmented* so that we know which pixels belong to which cell. If this is done well, the cell can be measured in different ways that are important to the biology. Sometimes the shape of the cell matters and different mathematical tools are used to summarize characteristics like the size or "oblongness" of the cell. + +The bottom panel shows some segmentation results. Cells 1 and 5 are fairly well segmented. However, cells 2 to 4 are bunched up together because the segmentation was not very good. The consequence of bad segmentation is data contamination; when the biologist analyzes the shape or size of these cells, the data are inaccurate and could lead to the wrong conclusion. + +A cell-based experiment might involve millions of cells so it is unfeasible to visually assess them all. Instead, a subsample can be created and these cells can be manually labeled by experts as either poorly segmented (`PS`) or well-segmented (`WS`). If we can predict these labels accurately, the larger data set can be improved by filtering out the cells most likely to be poorly segmented. + +### Back to the cells data + +The `cells` data has `class` labels for `r nrow(cells)` cells --- each cell is labeled as either poorly segmented (`PS`) or well-segmented (`WS`). Each also has a total of `r ncol(cells) - 2` predictors based on automated image analysis measurements. For example, `avg_inten_ch_1` is the mean intensity of the data contained in the nucleus, `area_ch_1` is the total size of the cell, and so on (some predictors are fairly arcane in nature). + +```{r} +#| label: "cells-show" +cells +``` + +The rates of the classes are somewhat imbalanced; there are more poorly segmented cells than well-segmented cells: + +```{r} +#| label: "rates" +cells %>% + count(class) %>% + mutate(prop = n/sum(n)) +``` + +## Data splitting {#data-split} + +In our previous [*Preprocess your data with recipes*](/start/recipes/#data-split) article, we started by splitting our data. It is common when beginning a modeling project to [separate the data set](https://bookdown.org/max/FES/data-splitting.html) into two partitions: + +- The *training set* is used to estimate parameters, compare models and feature engineering techniques, tune models, etc. + +- The *test set* is held in reserve until the end of the project, at which point there should only be one or two models under serious consideration. It is used as an unbiased source for measuring final model performance. + +There are different ways to create these partitions of the data. The most common approach is to use a random sample. Suppose that one quarter of the data were reserved for the test set. Random sampling would randomly select 25% for the test set and use the remainder for the training set. We can use the [rsample](https://rsample.tidymodels.org/) package for this purpose. + +Since random sampling uses random numbers, it is important to set the random number seed. This ensures that the random numbers can be reproduced at a later time (if needed). + +The function `rsample::initial_split()` takes the original data and saves the information on how to make the partitions. In the original analysis, the authors made their own training/test set and that information is contained in the column `case`. To demonstrate how to make a split, we'll remove this column before we make our own split: + +```{r} +#| label: "cell-split" +set.seed(123) +cell_split <- initial_split(cells %>% select(-case), + strata = class) +``` + +Here we used the [`strata` argument](https://rsample.tidymodels.org/reference/initial_split.html), which conducts a stratified split. This ensures that, despite the imbalance we noticed in our `class` variable, our training and test data sets will keep roughly the same proportions of poorly and well-segmented cells as in the original data. After the `initial_split`, the `training()` and `testing()` functions return the actual data sets. + +```{r} +#| label: "cell-train-test" +cell_train <- training(cell_split) +cell_test <- testing(cell_split) + +nrow(cell_train) +nrow(cell_train)/nrow(cells) + +# training set proportions by class +cell_train %>% + count(class) %>% + mutate(prop = n/sum(n)) + +# test set proportions by class +cell_test %>% + count(class) %>% + mutate(prop = n/sum(n)) +``` + +The majority of the modeling work is then conducted on the training set data. + +## Modeling + +[Random forest models](https://en.wikipedia.org/wiki/Random_forest) are [ensembles](https://en.wikipedia.org/wiki/Ensemble_learning) of [decision trees](https://en.wikipedia.org/wiki/Decision_tree). A large number of decision tree models are created for the ensemble based on slightly different versions of the training set. When creating the individual decision trees, the fitting process encourages them to be as diverse as possible. The collection of trees are combined into the random forest model and, when a new sample is predicted, the votes from each tree are used to calculate the final predicted value for the new sample. For categorical outcome variables like `class` in our `cells` data example, the majority vote across all the trees in the random forest determines the predicted class for the new sample. + +One of the benefits of a random forest model is that it is very low maintenance; it requires very little preprocessing of the data and the default parameters tend to give reasonable results. For that reason, we won't create a recipe for the `cells` data. + +At the same time, the number of trees in the ensemble should be large (in the thousands) and this makes the model moderately expensive to compute. + +To fit a random forest model on the training set, let's use the [parsnip](https://parsnip.tidymodels.org/) package with the [ranger](https://cran.r-project.org/package=ranger) engine. We first define the model that we want to create: + +```{r} +#| label: "rf-def" +rf_mod <- + rand_forest(trees = 1000) %>% + set_engine("ranger") %>% + set_mode("classification") +``` + +Starting with this parsnip model object, the `fit()` function can be used with a model formula. Since random forest models use random numbers, we again set the seed prior to computing: + +```{r} +#| label: "rf-fit" +set.seed(234) +rf_fit <- + rf_mod %>% + fit(class ~ ., data = cell_train) +rf_fit +``` + +This new `rf_fit` object is our fitted model, trained on our training data set. + +## Estimating performance {#performance} + +During a modeling project, we might create a variety of different models. To choose between them, we need to consider how well these models do, as measured by some performance statistics. In our example in this article, some options we could use are: + +- the area under the Receiver Operating Characteristic (ROC) curve, and + +- overall classification accuracy. + +The ROC curve uses the class probability estimates to give us a sense of performance across the entire set of potential probability cutoffs. Overall accuracy uses the hard class predictions to measure performance. The hard class predictions tell us whether our model predicted `PS` or `WS` for each cell. But, behind those predictions, the model is actually estimating a probability. A simple 50% probability cutoff is used to categorize a cell as poorly segmented. + +The [yardstick package](https://yardstick.tidymodels.org/) has functions for computing both of these measures called `roc_auc()` and `accuracy()`. + +At first glance, it might seem like a good idea to use the training set data to compute these statistics. (This is actually a very bad idea.) Let's see what happens if we try this. To evaluate performance based on the training set, we call the `predict()` method to get both types of predictions (i.e. probabilities and hard class predictions). + +```{r} +#| label: "rf-train-pred" +rf_training_pred <- + predict(rf_fit, cell_train) %>% + bind_cols(predict(rf_fit, cell_train, type = "prob")) %>% + # Add the true outcome data back in + bind_cols(cell_train %>% + select(class)) +``` + +Using the yardstick functions, this model has spectacular results, so spectacular that you might be starting to get suspicious: + +```{r} +#| label: "rf-train-perf" +rf_training_pred %>% # training set predictions + roc_auc(truth = class, .pred_PS) +rf_training_pred %>% # training set predictions + accuracy(truth = class, .pred_class) +``` + +Now that we have this model with exceptional performance, we proceed to the test set. Unfortunately, we discover that, although our results aren't bad, they are certainly worse than what we initially thought based on predicting the training set: + +```{r} +#| label: "rf-test" +rf_testing_pred <- + predict(rf_fit, cell_test) %>% + bind_cols(predict(rf_fit, cell_test, type = "prob")) %>% + bind_cols(cell_test %>% select(class)) +``` + +```{r} +#| label: "rf-test-perf" +rf_testing_pred %>% # test set predictions + roc_auc(truth = class, .pred_PS) +rf_testing_pred %>% # test set predictions + accuracy(truth = class, .pred_class) +``` + +### What happened here? + +There are several reasons why training set statistics like the ones shown in this section can be unrealistically optimistic: + +- Models like random forests, neural networks, and other black-box methods can essentially memorize the training set. Re-predicting that same set should always result in nearly perfect results. + +- The training set does not have the capacity to be a good arbiter of performance. It is not an independent piece of information; predicting the training set can only reflect what the model already knows. + +To understand that second point better, think about an analogy from teaching. Suppose you give a class a test, then give them the answers, then provide the same test. The student scores on the *second* test do not accurately reflect what they know about the subject; these scores would probably be higher than their results on the first test. + +## Resampling to the rescue {#resampling} + +Resampling methods, such as cross-validation and the bootstrap, are empirical simulation systems. They create a series of data sets similar to the training/testing split discussed previously; a subset of the data are used for creating the model and a different subset is used to measure performance. Resampling is always used with the *training set*. This schematic from [Kuhn and Johnson (2019)](https://bookdown.org/max/FES/resampling.html) illustrates data usage for resampling methods: + +```{r} +#| label: "resampling-fig" +#| echo: false +#| fig-align: center +#| out-width: "85%" +knitr::include_graphics("img/resampling.svg") +``` + +In the first level of this diagram, you see what happens when you use `rsample::initial_split()`, which splits the original data into training and test sets. Then, the training set is chosen for resampling, and the test set is held out. + +Let's use 10-fold cross-validation (CV) in this example. This method randomly allocates the `r nrow(cell_train)` cells in the training set to 10 groups of roughly equal size, called "folds". For the first iteration of resampling, the first fold of about `r floor(nrow(cell_train)/10)` cells are held out for the purpose of measuring performance. This is similar to a test set but, to avoid confusion, we call these data the *assessment set* in the tidymodels framework. + +The other 90% of the data (about `r floor(nrow(cell_train) * .9)` cells) are used to fit the model. Again, this sounds similar to a training set, so in tidymodels we call this data the *analysis set*. This model, trained on the analysis set, is applied to the assessment set to generate predictions, and performance statistics are computed based on those predictions. + +In this example, 10-fold CV moves iteratively through the folds and leaves a different 10% out each time for model assessment. At the end of this process, there are 10 sets of performance statistics that were created on 10 data sets that were not used in the modeling process. For the cell example, this means 10 accuracies and 10 areas under the ROC curve. While 10 models were created, these are not used further; we do not keep the models themselves trained on these folds because their only purpose is calculating performance metrics. + +```{r} +#| label: "rf-rs" +#| include: false +set.seed(345) +folds <- vfold_cv(cell_train) + +rf_wf <- + workflow() %>% + add_model(rf_mod) %>% + add_formula(class ~ .) + +set.seed(456) +rf_fit_rs <- fit_resamples(rf_wf, folds) + +assessment_size <- + folds %>% + tidy() %>% + group_by(Fold, Data) %>% + count() %>% + ungroup() %>% + filter(Data == "Assessment") %>% + select(`assessment size` = n, id = Fold) + +assessment_stats <- + collect_metrics(rf_fit_rs, summarize = FALSE) %>% + select(id, .estimate, .metric) %>% + pivot_wider( + id_cols = c(id), + names_from = c(.metric), + values_from = c(.estimate) + ) %>% + full_join(assessment_size, by = "id") %>% + dplyr::rename(resample = id) + +rs_stats <- collect_metrics(rf_fit_rs) +``` + +The final resampling estimates for the model are the **averages** of the performance statistics replicates. For example, suppose for our data the results were: + +```{r} +#| label: "rs-table" +#| echo: false +#| results: asis +assessment_stats %>% + kable() %>% + kable_styling(full_width = FALSE) +``` + +From these resampling statistics, the final estimate of performance for this random forest model would be `r round(rs_stats$mean[rs_stats$.metric == "roc_auc"], 3)` for the area under the ROC curve and `r round(rs_stats$mean[rs_stats$.metric == "accuracy"], 3)` for accuracy. + +These resampling statistics are an effective method for measuring model performance *without* predicting the training set directly as a whole. + +## Fit a model with resampling {#fit-resamples} + +To generate these results, the first step is to create a resampling object using rsample. There are [several resampling methods](https://rsample.tidymodels.org/reference/index.html#section-resampling-methods) implemented in rsample; cross-validation folds can be created using `vfold_cv()`: + +```{r} +#| label: "folds" +set.seed(345) +folds <- vfold_cv(cell_train, v = 10) +folds +``` + +The list column for `splits` contains the information on which rows belong in the analysis and assessment sets. There are functions that can be used to extract the individual resampled data called `analysis()` and `assessment()`. + +However, the tune package contains high-level functions that can do the required computations to resample a model for the purpose of measuring performance. You have several options for building an object for resampling: + +- Resample a model specification preprocessed with a formula or [recipe](/start/recipes/), or + +- Resample a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and formula/recipe. + +For this example, let's use a `workflow()` that bundles together the random forest model and a formula, since we are not using a recipe. Whichever of these options you use, the syntax to `fit_resamples()` is very similar to `fit()`: + +```{r} +#| label: "rs" +#| eval: false +rf_wf <- + workflow() %>% + add_model(rf_mod) %>% + add_formula(class ~ .) + +set.seed(456) +rf_fit_rs <- + rf_wf %>% + fit_resamples(folds) +``` + +```{r} +#| label: "rs-show" +rf_fit_rs +``` + +The results are similar to the `folds` results with some extra columns. The column `.metrics` contains the performance statistics created from the 10 assessment sets. These can be manually unnested but the tune package contains a number of simple functions that can extract these data: + +```{r} +#| label: "metrics" +collect_metrics(rf_fit_rs) +``` + +Think about these values we now have for accuracy and AUC. These performance metrics are now more realistic (i.e. lower) than our ill-advised first attempt at computing performance metrics in the section above. If we wanted to try different model types for this data set, we could more confidently compare performance metrics computed using resampling to choose between models. Also, remember that at the end of our project, we return to our test set to estimate final model performance. We have looked at this once already before we started using resampling, but let's remind ourselves of the results: + +```{r} +#| ref.label: "rf-test-perf" + +``` + +The performance metrics from the test set are much closer to the performance metrics computed using resampling than our first ("bad idea") attempt. Resampling allows us to simulate how well our model will perform on new data, and the test set acts as the final, unbiased check for our model's performance. + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/start/styles.css b/start/styles.css new file mode 100644 index 00000000..89b1cc8b --- /dev/null +++ b/start/styles.css @@ -0,0 +1,25 @@ +.sidebar-menu-container { + border-right: solid : #DDDDDD 1pt; /* $theme-grey */ + counter-reset: section; +} + +#quarto-sidebar > * { + padding-top: 3em; + padding-right: 51px; +} + +.sidebar-item { + display: flex; + margin-bottom: 30px; +} + +div.sidebar-item-container { + color: rgba(26, 22, 45, 0.6); +} + +.sidebar-item:not(:first-child)::before { + counter-increment: section; + content: counter(section, decimal) ""; + padding-right: 0.5em; + color: #CA225E; /* $theme-cranberry */ +} diff --git a/start/tuning/figs/best-tree-1.svg b/start/tuning/figs/best-tree-1.svg new file mode 100644 index 00000000..10376cbb --- /dev/null +++ b/start/tuning/figs/best-tree-1.svg @@ -0,0 +1,213 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +roc_auc + + + + + + + + + + +accuracy + + + + + +0.000000 +0.000010 +0.010000 + + + +0.000000 +0.000010 +0.010000 +0.73 +0.75 +0.77 +0.79 +0.81 + + + + + +0.78 +0.80 +0.82 +0.84 + + + + +cost_complexity +mean + +tree_depth + + + + + + + + + + + + + + + +1 +4 +8 +11 +15 + + diff --git a/start/tuning/figs/last-fit-1.svg b/start/tuning/figs/last-fit-1.svg new file mode 100644 index 00000000..03f23f23 --- /dev/null +++ b/start/tuning/figs/last-fit-1.svg @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 + + + + + + + + + + +0.00 +0.25 +0.50 +0.75 +1.00 +1 - specificity +sensitivity + + diff --git a/start/tuning/figs/rpart-plot-1.svg b/start/tuning/figs/rpart-plot-1.svg new file mode 100644 index 00000000..5c6107e6 --- /dev/null +++ b/start/tuning/figs/rpart-plot-1.svg @@ -0,0 +1,117 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +total_inten_ch_2 < 42e+3 +shape_p_2_a_ch_1 >= 1.3 +fiber_width_ch_1 < 11 +avg_inten_ch_1 < 145 +total_inten_ch_3 >= 58e+3 +eq_ellipse_oblate_vol_ch_1 >= 1674 +var_inten_ch_3 >= 41 + + + + + + + + + + + + + + + +PS +0.36 +100% +PS +0.05 +42% +PS +0.04 +42% +WS +0.55 +1% +WS +0.58 +58% +PS +0.39 +27% +PS +0.29 +19% +WS +0.66 +7% +PS +0.30 +2% +WS +0.81 +5% +WS +0.74 +31% +PS +0.27 +2% +PS +0.10 +1% +WS +0.60 +1% +WS +0.78 +29% + + +yes +no + + diff --git a/start/tuning/figs/vip-1.svg b/start/tuning/figs/vip-1.svg new file mode 100644 index 00000000..a4f25e17 --- /dev/null +++ b/start/tuning/figs/vip-1.svg @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +shape_bfr_ch_1 +eq_ellipse_lwr_ch_1 +convex_hull_area_ratio_ch_1 +shape_p_2_a_ch_1 +inten_cooc_contrast_ch_3 +total_inten_ch_4 +total_inten_ch_1 +avg_inten_ch_2 +fiber_width_ch_1 +total_inten_ch_2 + + + + + + + + + + + + + + + +0 +50 +100 +150 +200 +Importance + + diff --git a/start/tuning/index.qmd b/start/tuning/index.qmd new file mode 100644 index 00000000..9addf5fe --- /dev/null +++ b/start/tuning/index.qmd @@ -0,0 +1,316 @@ +--- +title: "Tune model parameters" +weight: 4 +categories: + - tuning + - rsample + - parsnip + - tune + - dials + - workflows + - yardstick +description: | + Estimate the best values for hyperparameters that cannot be learned directly during model training. +toc-location: body +toc-depth: 2 +toc-title: "" +css: ../styles.css +include-after-body: ../repo-actions-delete.html +--- + +```{r} +#| label: "setup" +#| include: false +#| message: false +#| warning: false +source(here::here("common.R")) +``` + +```{r} +#| label: "load" +#| include: false +#| message: false +#| warning: false +library(tidymodels) +library(rpart) +library(rpart.plot) +library(kableExtra) +library(vip) +theme_set(theme_bw()) +doParallel::registerDoParallel() +pkgs <- c("tidymodels", "rpart", "rpart.plot", "vip") +``` + +## Introduction {#intro} + +Some model parameters cannot be learned directly from a data set during model training; these kinds of parameters are called **hyperparameters**. Some examples of hyperparameters include the number of predictors that are sampled at splits in a tree-based model (we call this `mtry` in tidymodels) or the learning rate in a boosted tree model (we call this `learn_rate`). Instead of learning these kinds of hyperparameters during model training, we can *estimate* the best values for these values by training many models on resampled data sets and exploring how well all these models perform. This process is called **tuning**. + +`r article_req_pkgs(pkgs)` + +```{r} +#| eval: false +library(tidymodels) # for the tune package, along with the rest of tidymodels + +# Helper packages +library(rpart.plot) # for visualizing a decision tree +library(vip) # for variable importance plots +``` + +{{< test-drive url="https://rstudio.cloud/project/2674862" >}} + +## The cell image data, revisited {#data} + +In our previous [*Evaluate your model with resampling*](/start/resampling/) article, we introduced a data set of images of cells that were labeled by experts as well-segmented (`WS`) or poorly segmented (`PS`). We trained a [random forest model](/start/resampling/#modeling) to predict which images are segmented well vs. poorly, so that a biologist could filter out poorly segmented cell images in their analysis. We used [resampling](/start/resampling/#resampling) to estimate the performance of our model on this data. + +```{r} +#| label: "cell-import" +data(cells, package = "modeldata") +cells +``` + +## Predicting image segmentation, but better {#why-tune} + +Random forest models are a tree-based ensemble method, and typically perform well with [default hyperparameters](https://bradleyboehmke.github.io/HOML/random-forest.html#out-of-the-box-performance). However, the accuracy of some other tree-based models, such as [boosted tree models](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) or [decision tree models](https://en.wikipedia.org/wiki/Decision_tree), can be sensitive to the values of hyperparameters. In this article, we will train a **decision tree** model. There are several hyperparameters for decision tree models that can be tuned for better performance. Let's explore: + +- the complexity parameter (which we call `cost_complexity` in tidymodels) for the tree, and +- the maximum `tree_depth`. + +Tuning these hyperparameters can improve model performance because decision tree models are prone to [overfitting](https://bookdown.org/max/FES/important-concepts.html#overfitting). This happens because single tree models tend to fit the training data *too well* --- so well, in fact, that they over-learn patterns present in the training data that end up being detrimental when predicting new data. + +We will tune the model hyperparameters to avoid overfitting. Tuning the value of `cost_complexity` helps by [pruning](https://bradleyboehmke.github.io/HOML/DT.html#pruning) back our tree. It adds a cost, or penalty, to error rates of more complex trees; a cost closer to zero decreases the number tree nodes pruned and is more likely to result in an overfit tree. However, a high cost increases the number of tree nodes pruned and can result in the opposite problem---an underfit tree. Tuning `tree_depth`, on the other hand, helps by [stopping](https://bradleyboehmke.github.io/HOML/DT.html#early-stopping) our tree from growing after it reaches a certain depth. We want to tune these hyperparameters to find what those two values should be for our model to do the best job predicting image segmentation. + +Before we start the tuning process, we split our data into training and testing sets, just like when we trained the model with one default set of hyperparameters. As [before](/start/resampling/), we can use `strata = class` if we want our training and testing sets to be created using stratified sampling so that both have the same proportion of both kinds of segmentation. + +```{r} +#| label: "cell-split" +set.seed(123) +cell_split <- initial_split(cells %>% select(-case), + strata = class) +cell_train <- training(cell_split) +cell_test <- testing(cell_split) +``` + +We use the training data for tuning the model. + +## Tuning hyperparameters {#tuning} + +Let's start with the parsnip package, using a [`decision_tree()`](https://parsnip.tidymodels.org/reference/decision_tree.html) model with the [rpart](https://cran.r-project.org/web/packages/rpart/index.html) engine. To tune the decision tree hyperparameters `cost_complexity` and `tree_depth`, we create a model specification that identifies which hyperparameters we plan to tune. + +```{r} +#| label: "tune-spec" +tune_spec <- + decision_tree( + cost_complexity = tune(), + tree_depth = tune() + ) %>% + set_engine("rpart") %>% + set_mode("classification") + +tune_spec +``` + +Think of `tune()` here as a placeholder. After the tuning process, we will select a single numeric value for each of these hyperparameters. For now, we specify our parsnip model object and identify the hyperparameters we will `tune()`. + +We can't train this specification on a single data set (such as the entire training set) and learn what the hyperparameter values should be, but we *can* train many models using resampled data and see which models turn out best. We can create a regular grid of values to try using some convenience functions for each hyperparameter: + +```{r} +#| label: "tree-grid" +tree_grid <- grid_regular(cost_complexity(), + tree_depth(), + levels = 5) +``` + +The function [`grid_regular()`](https://dials.tidymodels.org/reference/grid_regular.html) is from the [dials](https://dials.tidymodels.org/) package. It chooses sensible values to try for each hyperparameter; here, we asked for 5 of each. Since we have two to tune, `grid_regular()` returns 5 $\times$ 5 = 25 different possible tuning combinations to try in a tidy tibble format. + +```{r} +#| label: "tree-grid-tibble" +tree_grid +``` + +Here, you can see all 5 values of `cost_complexity` ranging up to `r max(tree_grid$cost_complexity)`. These values get repeated for each of the 5 values of `tree_depth`: + +```{r} +tree_grid %>% + count(tree_depth) +``` + +Armed with our grid filled with 25 candidate decision tree models, let's create [cross-validation folds](/start/resampling/) for tuning: + +```{r} +#| label: "cell-folds" +#| dependson: "cell-split" +set.seed(234) +cell_folds <- vfold_cv(cell_train) +``` + +Tuning in tidymodels requires a resampled object created with the [rsample](https://rsample.tidymodels.org/) package. + +## Model tuning with a grid {#tune-grid} + +We are ready to tune! Let's use [`tune_grid()`](https://tune.tidymodels.org/reference/tune_grid.html) to fit models at all the different values we chose for each tuned hyperparameter. There are several options for building the object for tuning: + +- Tune a model specification along with a recipe or model, or + +- Tune a [`workflow()`](https://workflows.tidymodels.org/) that bundles together a model specification and a recipe or model preprocessor. + +Here we use a `workflow()` with a straightforward formula; if this model required more involved data preprocessing, we could use `add_recipe()` instead of `add_formula()`. + +```{r} +#| label: "tree-res" +#| dependson: c("tune-spec", "cell-folds", "tree-grid") +#| message: false +set.seed(345) + +tree_wf <- workflow() %>% + add_model(tune_spec) %>% + add_formula(class ~ .) + +tree_res <- + tree_wf %>% + tune_grid( + resamples = cell_folds, + grid = tree_grid + ) + +tree_res +``` + +Once we have our tuning results, we can both explore them through visualization and then select the best result. The function `collect_metrics()` gives us a tidy tibble with all the results. We had 25 candidate models and two metrics, `accuracy` and `roc_auc`, and we get a row for each `.metric` and model. + +```{r} +#| label: "collect-trees" +#| dependson: "tree-res" +tree_res %>% + collect_metrics() +``` + +We might get more out of plotting these results: + +```{r} +#| label: "best-tree" +#| dependson: "tree-res" +#| fig-width: 8 +#| fig-height: 7 +tree_res %>% + collect_metrics() %>% + mutate(tree_depth = factor(tree_depth)) %>% + ggplot(aes(cost_complexity, mean, color = tree_depth)) + + geom_line(size = 1.5, alpha = 0.6) + + geom_point(size = 2) + + facet_wrap(~ .metric, scales = "free", nrow = 2) + + scale_x_log10(labels = scales::label_number()) + + scale_color_viridis_d(option = "plasma", begin = .9, end = 0) +``` + +We can see that our "stubbiest" tree, with a depth of `r min(tree_grid$tree_depth)`, is the worst model according to both metrics and across all candidate values of `cost_complexity`. Our deepest tree, with a depth of `r max(tree_grid$tree_depth)`, did better. However, the best tree seems to be between these values with a tree depth of 4. The [`show_best()`](https://tune.tidymodels.org/reference/show_best.html) function shows us the top 5 candidate models by default: + +```{r} +#| label: "show-best-tree" +#| dependson: "tree-res" +tree_res %>% + show_best("accuracy") +``` + +We can also use the [`select_best()`](https://tune.tidymodels.org/reference/show_best.html) function to pull out the single set of hyperparameter values for our best decision tree model: + +```{r} +#| label: "select-best-tree" +#| dependson: "tree-res" +best_tree <- tree_res %>% + select_best("accuracy") + +best_tree +``` + +These are the values for `tree_depth` and `cost_complexity` that maximize accuracy in this data set of cell images. + +## Finalizing our model {#final-model} + +We can update (or "finalize") our workflow object `tree_wf` with the values from `select_best()`. + +```{r} +#| label: "final-wf" +#| dependson: "best-tree" +final_wf <- + tree_wf %>% + finalize_workflow(best_tree) + +final_wf +``` + +Our tuning is done! + +### The last fit + +Finally, let's fit this final model to the training data and use our test data to estimate the model performance we expect to see with new data. We can use the function [`last_fit()`](https://tune.tidymodels.org/reference/last_fit.html) with our finalized model; this function *fits* the finalized model on the full training data set and *evaluates* the finalized model on the testing data. + +```{r} +#| label: "last-fit" +#| dependson: c("final-wf", "cell-split") +final_fit <- + final_wf %>% + last_fit(cell_split) + +final_fit %>% + collect_metrics() + +final_fit %>% + collect_predictions() %>% + roc_curve(class, .pred_PS) %>% + autoplot() +``` + +The performance metrics from the test set indicate that we did not overfit during our tuning procedure. + +The `final_fit` object contains a finalized, fitted workflow that you can use for predicting on new data or further understanding the results. You may want to extract this object, using [one of the `extract_` helper functions](https://tune.tidymodels.org/reference/extract-tune.html). + +```{r} +#| label: "last-fit-wf" +#| dependson: "last-fit" +final_tree <- extract_workflow(final_fit) +final_tree +``` + +We can create a visualization of the decision tree using another helper function to extract the underlying engine-specific fit. + +```{r} +#| label: "rpart-plot" +#| dependson: "last-fit-wf" +#| fig-width: 8 +#| fig-height: 5 +final_tree %>% + extract_fit_engine() %>% + rpart.plot(roundint = FALSE) +``` + +Perhaps we would also like to understand what variables are important in this final model. We can use the [vip](https://koalaverse.github.io/vip/) package to estimate variable importance [based on the model's structure](https://koalaverse.github.io/vip/reference/vi_model.html#details). + +```{r} +#| label: "vip" +#| dependson: "final-tree" +#| fig-width: 6 +#| fig-height: 5 +library(vip) + +final_tree %>% + extract_fit_parsnip() %>% + vip() +``` + +These are the automated image analysis measurements that are the most important in driving segmentation quality predictions. + +We leave it to the reader to explore whether you can tune a different decision tree hyperparameter. You can explore the [reference docs](/find/parsnip/#models), or use the `args()` function to see which parsnip object arguments are available: + +```{r} +args(decision_tree) +``` + +You could tune the other hyperparameter we didn't use here, `min_n`, which sets the minimum `n` to split at any node. This is another early stopping method for decision trees that can help prevent overfitting. Use this [searchable table](/find/parsnip/#model-args) to find the original argument for `min_n` in the rpart package ([hint](https://stat.ethz.ch/R-manual/R-devel/library/rpart/html/rpart.control.html)). See whether you can tune a different combination of hyperparameters and/or values to improve a tree's ability to predict cell segmentation quality. + +## Session information {#session-info} + +```{r} +#| label: "si" +#| echo: false +small_session(pkgs) +``` diff --git a/static/_redirects b/static/_redirects deleted file mode 100644 index 58a17cf5..00000000 --- a/static/_redirects +++ /dev/null @@ -1,3 +0,0 @@ -http://www.tidymodels.org/* https://www.tidymodels.org/:splat 301! -http://tidymodels.org/* https://www.tidymodels.org/:splat 301! -https://tidymodels.org/* https://www.tidymodels.org/:splat 301! diff --git a/static/code/curves_card.jpg b/static/code/curves_card.jpg deleted file mode 100644 index df22b5ed..00000000 Binary files a/static/code/curves_card.jpg and /dev/null differ diff --git a/static/code/get_pkgdown_urls.R b/static/code/get_pkgdown_urls.R deleted file mode 100644 index 1a26985c..00000000 --- a/static/code/get_pkgdown_urls.R +++ /dev/null @@ -1,72 +0,0 @@ -library(dplyr) -library(purrr) -library(tidyr) -library(fs) -library(rlang) -library(pkgdown) -library(glue) -library(stringr) - -get_pkgdown_urls <- - function(x, pth = tempdir(), fltr = "[\\s\\S]*") { - pkg_name <- x$pkg - - x <- mutate(x, base_url = paste0(base_url, "reference/")) - - base_url <- paste0(x$base_url, "{file}") - null_res <- - tibble::tibble( - topic = rlang::na_chr, - title = rlang::na_chr, - url = rlang::na_chr, - pkg = pkg_name - ) - src_file <- - download.packages(pkg_name, destdir = pth, repos = "https://cran.rstudio.com/") - if (nrow(src_file) != length(pkg_name)) { - found <- src_file[, 1] - lost <- pkg_name[!(pkg_name %in% found)] - lost <- paste0("'", lost, "'", collapse = ", ") - msg <- paste("packages", lost, "were not downloaded") - rlang::abort(msg) - } - untar_res <- purrr::map_int(src_file[, 2], untar, exdir = pth) - if (any(untar_res != 0)) { - msg <- paste("packages", - paste0("'", pkg_name[untar_res != 0], "'", collapse = ", "), - "did not unpack correctly") - rlang::abort(msg) - } - - topic_info <- - purrr::map(pkg_name, ~ pkgdown::as_pkgdown(fs::path(pth, .x))) %>% - purrr::map( ~ pluck(.x, "topics")) %>% - purrr::map2(pkg_name, ~ .x %>% mutate(pkg = .y)) %>% - bind_rows() %>% - unnest(cols = c(alias)) %>% - full_join(x, by = "pkg") %>% - mutate(url = map2_chr(base_url, file_out, paste0), - topic = alias) %>% - dplyr::select(topic, alias, title, url, pkg) %>% - mutate(title = str_replace(title, "\\n", " ")) %>% - dplyr::filter(str_detect(topic, fltr)) %>% - na.omit() %>% - dplyr::filter( - str_detect(topic, "reexport", negate = TRUE), - str_detect(topic, "-package$", negate = TRUE), - str_detect(title, "^Internal", negate = TRUE), - str_detect(title, "^Tidy eval", negate = TRUE), - topic != "_PACKAGE", - title != "Pipe", - topic != "%>%", - title != "Objects exported from other packages" - ) %>% - dplyr::arrange(topic, pkg) %>% - mutate(topic = paste0("", - topic, "") - ) %>% - dplyr::select(topic, package = pkg, title, alias) - - topic_info - } diff --git a/static/code/installs.R b/static/code/installs.R deleted file mode 100644 index e2290cdc..00000000 --- a/static/code/installs.R +++ /dev/null @@ -1,22 +0,0 @@ -install.packages("remotes", repos = "https://cran.rstudio.com") - -pkg_list <- read.dcf("DESCRIPTION")[,"Imports"] -pkg_list <- gsub("\n", "", pkg_list, fixed = TRUE) -pkg_list <- strsplit(pkg_list, ",")[[1]] - -remotes::install_cran( - pkg_list, - repos = "https://cran.rstudio.com", - upgrade = "alwyas", - type = "source", - force = TRUE -) - -library(remotes) - -install_github("rstudio/DT") - -library(keras) - -install_keras(method = "virtualenv") - diff --git a/static/code/make_tidymodels_curves.R b/static/code/make_tidymodels_curves.R deleted file mode 100644 index 97b7b097..00000000 --- a/static/code/make_tidymodels_curves.R +++ /dev/null @@ -1,111 +0,0 @@ -library(AppliedPredictiveModeling) -library(caret) -library(RColorBrewer) -library(tidyverse) -library(tidymodels) -library(ggpubr) - -# alison added -library(beyonce) -library(showtext) -font_add_google("Lato") - - -set.seed(2115) -two_class_dat <- quadBoundaryFunc(1000) -two_class_dat$X1 <- -two_class_dat$X1 - -extras1 <- tibble( - X1 = rnorm(100, mean = -1, sd = .5), - X2 = rnorm(100, mean = -1.25), sd = .5, - class = "Class2" -) - -extras2 <- tibble( - X1 = rnorm(100, mean = -2, sd = .5), - X2 = rnorm(100, mean = 2), sd = .5, - class = "Class2" -) - -two_class_dat <- bind_rows(two_class_dat, extras1, extras2) - -ggplot(two_class_dat, aes(x =X2, y = X1, col = class)) + geom_point() - - - -rng_1 <- extendrange(two_class_dat$X1) -rng_2 <- extendrange(two_class_dat$X2) - -set.seed(1) -contour_grid <- expand.grid(X1 = seq(rng_1[1], rng_1[2], length = 200), - X2 = seq(rng_2[1], rng_2[2], length = 200)) - -two_class_ctrl_rand <- trainControl(method = "none", classProbs = TRUE, - search = "random", - summaryFunction = twoClassSummary) - - -set.seed(235) -tt <- contour_grid -for(i in 1:10) { - - - bt_version <- two_class_dat[sample(1:nrow(two_class_dat), replace = TRUE),] - - - - nn_fit <- train(class ~ X1 + X2, - data = bt_version, - method = "nnet", - tuneGrid = data.frame(size = 4, decay = 2), - trace = FALSE, - metric = "ROC", - trControl = two_class_ctrl_rand) - - - tt[[paste0("nnet", i)]] <- predict(nn_fit, contour_grid, type ="prob")[,1] -} - - -p <- ggplot(tt, aes(x = X2, y = X1)) + - theme_bw()+ - theme( - legend.position = "none", - axis.line = element_blank(), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks = element_blank(), - axis.title.x = element_blank(), - axis.title.y = element_blank(), - panel.background = element_blank(), - panel.border = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - plot.background = element_rect(fill = "black") - ) - -brewcols <- brewer.pal(9, "Set1") -cols <- beyonce_palette(66)[2:11] - -for(i in seq_along(cols)) - p <- - p + - geom_contour( - aes_string(z = paste0("nnet", i)), - breaks = .5, - col = cols[i], - lwd = .7, - alpha = 1 - ) - -card <- p + - theme_void() + - theme_transparent() + - theme(panel.background = element_rect(fill = "#1a162d")) + - expand_limits(y = c(-3, 1)) + - coord_cartesian(xlim = c(-4.5, 4.5)) + - annotate("text", x = 0, y = -1.5, label = "tidymodels", - colour = "white", size = 12, family = "Lato") - -ggsave(here::here("static/code/curves_card.jpg")) - diff --git a/static/code/req_pkgs.R b/static/code/req_pkgs.R deleted file mode 100644 index cc858d72..00000000 --- a/static/code/req_pkgs.R +++ /dev/null @@ -1,10 +0,0 @@ -req_pkgs <- function(x, what = "This article") { - x <- sort(x) - x <- paste0("`", x, "`") - x <- knitr::combine_words(x, and = " and ") - paste0( - what, - " requires that you have the following packages installed: ", - x, "." - ) -} diff --git a/static/css/fonts.css b/static/css/fonts.css deleted file mode 100644 index 9f7d3d87..00000000 --- a/static/css/fonts.css +++ /dev/null @@ -1,85 +0,0 @@ -/* lato-300 - latin */ -@font-face { - font-family: 'Lato'; - font-style: normal; - font-weight: 300; - src: url('../fonts/lato-v16-latin-300.eot'); /* IE9 Compat Modes */ - src: local('Lato Light'), local('Lato-Light'), - url('../fonts/lato-v16-latin-300.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ - url('../fonts/lato-v16-latin-300.woff2') format('woff2'), /* Super Modern Browsers */ - url('../fonts/lato-v16-latin-300.woff') format('woff'), /* Modern Browsers */ - url('../fonts/lato-v16-latin-300.ttf') format('truetype'), /* Safari, Android, iOS */ - url('../fonts/lato-v16-latin-300.svg#Lato') format('svg'); /* Legacy iOS */ -} - -/* lato-regular - latin */ -@font-face { - font-family: 'Lato'; - font-style: normal; - font-weight: 400; - src: url('../fonts/lato-v16-latin-regular.eot'); /* IE9 Compat Modes */ - src: local('Lato Regular'), local('Lato-Regular'), - url('../fonts/lato-v16-latin-regular.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ - url('../fonts/lato-v16-latin-regular.woff2') format('woff2'), /* Super Modern Browsers */ - url('../fonts/lato-v16-latin-regular.woff') format('woff'), /* Modern Browsers */ - url('../fonts/lato-v16-latin-regular.ttf') format('truetype'), /* Safari, Android, iOS */ - url('../fonts/lato-v16-latin-regular.svg#Lato') format('svg'); /* Legacy iOS */ -} - -/* lato-italic - latin */ -@font-face { - font-family: 'Lato'; - font-style: italic; - font-weight: 400; - src: url('../fonts/lato-v16-latin-italic.eot'); /* IE9 Compat Modes */ - src: local('Lato Italic'), local('Lato-Italic'), - url('../fonts/lato-v16-latin-italic.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ - url('../fonts/lato-v16-latin-italic.woff2') format('woff2'), /* Super Modern Browsers */ - url('../fonts/lato-v16-latin-italic.woff') format('woff'), /* Modern Browsers */ - url('../fonts/lato-v16-latin-italic.ttf') format('truetype'), /* Safari, Android, iOS */ - url('../fonts/lato-v16-latin-italic.svg#Lato') format('svg'); /* Legacy iOS */ -} - -/* lato-700 - latin */ -@font-face { - font-family: 'Lato'; - font-style: normal; - font-weight: 700; - src: url('../fonts/lato-v16-latin-700.eot'); /* IE9 Compat Modes */ - src: local('Lato Bold'), local('Lato-Bold'), - url('../fonts/lato-v16-latin-700.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ - url('../fonts/lato-v16-latin-700.woff2') format('woff2'), /* Super Modern Browsers */ - url('../fonts/lato-v16-latin-700.woff') format('woff'), /* Modern Browsers */ - url('../fonts/lato-v16-latin-700.ttf') format('truetype'), /* Safari, Android, iOS */ - url('../fonts/lato-v16-latin-700.svg#Lato') format('svg'); /* Legacy iOS */ -} - - -/* source-code-pro-regular - latin */ -@font-face { - font-family: 'Source Code Pro'; - font-style: normal; - font-weight: 400; - src: url('../fonts/source-code-pro-v11-latin-regular.eot'); /* IE9 Compat Modes */ - src: local('Source Code Pro Regular'), local('SourceCodePro-Regular'), - url('../fonts/source-code-pro-v11-latin-regular.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ - url('../fonts/source-code-pro-v11-latin-regular.woff2') format('woff2'), /* Super Modern Browsers */ - url('../fonts/source-code-pro-v11-latin-regular.woff') format('woff'), /* Modern Browsers */ - url('../fonts/source-code-pro-v11-latin-regular.ttf') format('truetype'), /* Safari, Android, iOS */ - url('../fonts/source-code-pro-v11-latin-regular.svg#SourceCodePro') format('svg'); /* Legacy iOS */ -} - - -/* source-code-pro-italic - latin */ -@font-face { - font-family: 'Source Code Pro'; - font-style: italic; - font-weight: 400; - src: url('../fonts/source-code-pro-v11-latin-italic.eot'); /* IE9 Compat Modes */ - src: local('Source Code Pro Italic'), local('SourceCodePro-It'), - url('../fonts/source-code-pro-v11-latin-italic.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ - url('../fonts/source-code-pro-v11-latin-italic.woff2') format('woff2'), /* Super Modern Browsers */ - url('../fonts/source-code-pro-v11-latin-italic.woff') format('woff'), /* Modern Browsers */ - url('../fonts/source-code-pro-v11-latin-italic.ttf') format('truetype'), /* Safari, Android, iOS */ - url('../fonts/source-code-pro-v11-latin-italic.svg#SourceCodePro') format('svg'); /* Legacy iOS */ -} diff --git a/static/css/tm.css b/static/css/tm.css deleted file mode 100644 index 9a1e9c54..00000000 --- a/static/css/tm.css +++ /dev/null @@ -1,1069 +0,0 @@ -@import url('https://fonts.googleapis.com/css?family=Libre+Baskerville:400,400i&display=swap'); - - -.serif { - font-family: 'Libre Baskerville', serif; - color: #302c41; -} - -body, button, input, textarea { - font-family: 'Lato', sans-serif; - font-size: 17px; - font-weight: normal; - -webkit-font-smoothing: antialiased; -} - - -.listItem .itemDetails .itemMeta .itemDate, .listItem .itemDetails .itemMeta .itemInfo { - font-size: .875rem; -} - -.listItem .itemDetails .itemHeader .itemTag { - font-size: .875rem; -} - - - -/* ----WHITE SPACE ----*/ - -.article-content h2 { - margin-bottom: 1.5em; /* increased from 0.5 main-site.css */ -} - -.bandContent{ - max-width: 1100px; /*Main container width with page content */ -} - -.splitColumns { - display: flex; - justify-content: space-between; - display: -ms-flexbox; - -ms-flex-pack: justify; - display: -webkit-box; - display: -webkit-flex; - -webkit-justify-content: space-between; - align-items: flex-start; } - .splitColumns .column75 { - width: 70%; } /* Change from main-site's 73% > 70% */ - .splitColumns .column67 { - width: 65%; } - .splitColumns .column50 { - width: 48%; } - .splitColumns .column33 { - width: 31%; } - .splitColumns .column25 { - width: 23%; } - .splitColumns .column25 .columnImage { - width: 276px; } - .splitColumns .columnImage { - max-width: 100%; - margin-bottom: 1em; - background-repeat: no-repeat; - background-size: cover; } - -@media screen and (max-width: 1023px) { /* Same as main-site, but ensures that media query is not overrident by above rule */ - .splitColumns { - flex-wrap: wrap; - -ms-flex-wrap: wrap; } - .splitColumns .column75, .splitColumns .column67, .splitColumns .column50, .splitColumns .column33, .splitColumns .column25 { - width: 100%; } - .splitColumns.withMobileMargins .column75, .splitColumns.withMobileMargins .column67, .splitColumns.withMobileMargins .column50, .splitColumns.withMobileMargins .column33, .splitColumns.withMobileMargins .column25 { - margin-bottom: 30px; } } - - -/*----- general LINKS ----- */ - -a { - transition: color 0.3s; -} - -/*----- COLORED LIST BULLET----*/ - -.article-content ul, -.learn-top-nav ul { - list-style: none; /* Remove default bullets */ -} - -.article-content ul li::before, -.learn-top-nav ul li::before { - content: "\2022"; /* Add content: \2022 is the CSS Code/unicode for a bullet */ - color: #CA225E; /* Change the color */ - display: inline-block; /* Needed to add space between the bullet and the text */ - width: 1.5em; /* Also needed for space (tweak if needed) */ - margin-left: -1.5em; /* Also needed for space (tweak if needed) */ -} - -.article-content ul > li > p { - display: inline; /* to have content show up next to bullet */ -} - -.article-content ul > li { /*to add back in space between bullet items that I took away by making p display inline */ - margin-top: 1em; - margin-bottom: 1em; -} - - - -/*--------- BODY HEADERS -------*/ - -div.tagline { - font-family: 'Lato', sans-serif !important; - text-transform: uppercase; - font-size: 1.2rem !important; - font-weight: bold; - letter-spacing: 2px; -} - -.article-title, /* Most page titles */ -.column75 div.sectionTitle { /* Learn page section title */ - font-family: 'Lato', sans-serif; - color: #CA225E; - font-weight: normal; - text-transform: none; - letter-spacing: normal; - font-size: 2.4em; /* same as main-site, but duplicated here so it applies to Learn .sectionTitle */ -} - -h1, h2, h3, .itemTitle { - font-family: 'Lato', sans-serif; - font-weight: normal; - font-size: 1.3em; - text-transform: uppercase; - letter-spacing: 1.5pt; -} - -h2 { - color: #1a162d; -} - -.itemTitle a { /* For Learn Page headers */ - color: #1a162d; -} - -.itemTitle a:hover { /* For Learn List Page headers */ - color: #1a162d99; - text-decoration: none; -} - -h3 { - font-size: 1.1em; - color: #1a162d90; -} - -h3 a { - color: #CA225E; - text-transform: none; - font-size: 1.5em; - letter-spacing: normal; -} - -h4 { - font-size: 1.1em; - color: #1a162d90; -} - - -/* Style code that occurs in headers */ - -h2 code, -h3 code, -h4 code { - text-transform: none !important; - font-size: 1em !important; -} - -/*---------TOC----------*/ - -/*TOC header font attributes*/ -/*#TOC is for Getting Started body content TOC */ -/*.column25 .sectionTitle is for right sidebar TOC headers */ - -/*#TOC, */ -.column25 .sectionTitle, -.sectionTitle a { - text-transform: uppercase; - letter-spacing: 2pt; - font-size: 0.8rem !important; - font-weight: bold; -} - - .sectionTitle, .sectionTitle a{ - color: #CA225E; - } - - /* #TOC a { */ /* Get Started Page TOC */ - /* color: #ca225ec9; - } */ - - -/*Sidebar TOC links for Learn and Packages pages */ - -#TableOfContents a, -.articleCategory a { - font-family: 'Lato', sans-serif; - color: #1a162d99; - transition: color 0.3s; - font-size: 15px; - letter-spacing: 1pt; -} - -#TableOfContents a:hover, -.articleCategory a:hover { - color: #1a162dde; - text-decoration: none; -} - - -#TableOfContents ul li { - padding-left: 0.5em; /* Wraps li when headers are too long */ - text-indent: -0.5em; - - /* Make spacing BETWEEN list items greater than wrapped lines WITHIN list items */ - padding-bottom: 1.2em; - line-height: 1.2rem; -} - - - - -/*-- Getting Started Page Left Sidebar -- */ - -.column25-left .sectionTitle { /* Numbered content sections, hanging indent */ - display: flex; - text-transform: uppercase; - letter-spacing: 2pt; - font-size: 0.8rem !important; - font-weight: bold; -} - -.column25-left .sectionTitle a { - font-family: 'Lato', sans-serif; - color: #1a162d99; - transition: color 0.3s; - font-size: 15px !important; - text-transform: none; - letter-spacing: 1pt; - font-weight: normal; -} - -.column25-left .sectionTitle.start:first-of-type a { /* GET STARTED LABEL */ - text-transform: uppercase; - letter-spacing: 2pt; - font-size: 0.8rem !important; - font-weight: bold; -} - -.column25-left .sectionTitle a:hover:not(.current) { - color: #1a162dde; - text-decoration: none; -} - -.column25-left .sectionTitle.current { - color: #CA225E; - text-decoration: none; -} - -.column25 { - padding-left: 3em; - border-left: solid #d9d9d9 1pt; -} - -.column25-left { - padding-top: 20px; - padding-right: 3em; - border-right: solid #d9d9d9 1pt; - max-width: 250px; -} - -.column25-left .section { - margin-bottom: 20px; -} - -/* Learn List Page */ - -.learn-list .article-title { - font-size: 1.1em; -} - -.article-content.learn-list ul { - list-style: none; - border-left: 1pt solid #ffc0cb78; -} - -.article-content.learn-list ul li::before { - content: " "; /* Add content: \2022 is the CSS Code/unicode for a bullet */ -} - - -.learning-objective-text { - color: #1a162d9e; -} - -.icon-attribution, -.icon-attribution a { - font-size: 9pt; - text-transform: uppercase; - color: #a19ea9; -} - -/* Learn subsection*/ - -.learning-objective { - color: #1a162dcf; - font-size: 0.8em; - letter-spacing: 1pt; - display: inline-block; -} - -.single-learning-objective-text { - color: #1a162d9e; - font-style: italic; -} - - - - - -/*-------- NAVBAR --------- */ - -/*navbar header items */ - -a.productName { - transition: color 0.3s; /*is this being applied? */ -} - -#menu { - text-transform: uppercase; - letter-spacing: 2pt; - font-size: 0.8rem !important; -} - -#rStudioHeader { - -moz-box-shadow: 0 1px 0px 0px rgba(0, 0, 0, 0.15); - -webkit-box-shadow: 0 1px 0px 0px rgba(0, 0, 0, 0.15); - box-shadow: 0 1px 0px 0px rgba(0, 0, 0, 0.15); -} - -/*---- HOME BAND TEXT COLOR -----*/ -/*first band*/ - -#homeContent .band.first a{ - } - -#homeContent .band.first a:hover { - } - -/*second band*/ -#homeContent .band.second { - color: black; - border-bottom: solid 1pt #d9d9d9; - border-top: solid 1pt #d9d9d9; -} - - -/* Easter egg in corner of homepage */ - -#homeContent .bee1 { - display: block; -} - -/*-----TABLES-----*/ - -.article-content { - overflow-x: auto; /*was scroll, but we probably want to create a parent container around only tables*/ -} - -table tbody tr td:last-child { - padding: 15px 13px; -} - -table thead tr th:last-child { - padding: 15px 13px; -} - - -.selectize-dropdown, -.selectize-input.items.not-full.has-options.has-items { - font-size: 0.8rem !important; - text-transform: none !important; -} - - - -/* MISC */ -/* This is to specifically target the package tag links within GEtting Started.*/ -/* This is a pretty brittle style rule and should probably get replaced */ - -.column75 > a { - color: #1a162d80; -} - -/*---- STICKY TOC for all w/ SMOOTH SCROLL-----*/ - -.column25, .column25-left { - position: sticky; - top: 5.53em; -} - -html { - scroll-behavior: smooth; -} - - -/*-----CUSTOM STICKY NOTE EVENTS-----*/ - -.event { - -moz-box-shadow: 0 0px 0px 0px rgba(0, 0, 0, 0.1); - -webkit-box-shadow: 0 0px 0px 0px rgba(0, 0, 0, 0.1); - box-shadow: 0 0px 0px 0px rgba(0, 0, 0, 0.1); -} - - .event:nth-child(2) { - -webkit-transform: rotate(360deg); - transform: rotate(360deg); } - - .event:nth-child(3) { - -webkit-transform: rotate(0deg); - transform: rotate(0deg); } - -.section .event { - border-top: 7pt solid #a19ea936; - background-color: #eab0c41a; -} - -.section .event a{ - color: #1a162d; -} - -/* LEARN THUMBNAILS */ - -.flex-container.learn-list{ - display: flex; -} - -.image-container.learn-list { - width: 100px; - margin-right: 2em; -} - -.image-cropper { - width: 100px; - height: 100px; - position: relative; - overflow: hidden; - margin: 2em auto; -} - -.learn-thumbnail { - display: inline; - margin: 0 auto; - width: auto; -} - -.learn-list.itemDescription { /* "See all" link */ - margin-left: 2em; -} - - -/* Bracketed TOC for single articles */ - -a.learn-top-nav-item, -.tutorial #TableOfContents a { - text-transform: uppercase; - letter-spacing: 2pt; - font-size: 0.8rem !important; - font-weight: bold; - color: #ca225ec9; -} - -.tutorial #TableOfContents ul li { - padding-bottom: 0.5em; -} - -a.learn-top-nav-item:hover, -.tutorial #TableOfContents a:hover { - color: #CA225E; - text-decoration: none; -} - - -.listItem.learn-top-nav, -.listItem.tutorial { - padding: 1em; - position: relative; -} - -.listItem.learn-top-nav:after, -.listItem.learn-top-nav:before, -.listItem.tutorial:after, -.listItem.tutorial:before { - border: 1pt solid #ca225e3d; - bottom: 0; - content: ''; - position: absolute; - top: 0; - width: 2.5%; -} -.listItem.learn-top-nav:after, -.listItem.tutorial:after { - border-left: none; - right: 0; -} -.listItem.learn-top-nav:before, -.listItem.tutorial:before{ - border-right: none; - left: 0; -} - -/*----ANCHORS----*/ - -.anchor-container{ - position: relative; -} - -.anchor{ - top: -100px; /* absolute value must be > than navbar */ - position: absolute; -} - - -/*----Single Tutorial TOC----*/ - -.tutorial #TableOfContents ul li::before { - content: "\2022"; - color: #CA225E; - display: inline-block; - width: 1.5em; - margin-left: -1.5em; -} - -.tutorial #TableOfContents ul { - list-style: none; - line-height: 1.6em; - padding-left: 2.5em; - margin-bottom: 0; -} - -/*----Increase font sizes and spacing----*/ - -.listItem .itemDetails .itemDescription { - font-size: 1.2em; - line-height: 1.6em; -} - -.article-title { - line-height: 1.3em; -} - -.articleCategory { - padding-left: 0px; -} - -#rStudioHeader #menu .menuItem:last-of-type { - padding-right: 0px; -} - -/*----DataTables formatting----*/ - - -table.dataTable { - display: block; - width: 770px; - text-align: left; - font-size: 1rem; - max-width: 770px; - table-layout: fixed; - word-wrap:break-word; - overflow-x: auto; -} - -table.dataTable tbody td { - line-height: 1.5em; -} - -table.dataTable thead th, -table.dataTable thead td, -table.dataTable.no-footer, -.dataTables_wrapper.no-footer .dataTables_scrollBody { - border-bottom: 1px solid #a19ea936 !important; -} - -table.dataTable thead { - background-color: #eab0c41a; - text-transform: uppercase; -} - -/*----Make all TOCs sentence case----*/ - -#TableOfContents:first-letter { - text-transform: capitalize; -} - -sup { - vertical-align: super; - font-size: smaller; -} - -/*----Format tags taxonomy list on start pages----*/ - -.tags-list { - margin-bottom: 20px; -} - -.tags-list a { - color: #1a162d99; - transition: color 0.3s; -} - -.tags-list a:hover { - color: #1a162dde; - text-decoration: none; -} - -/*----Article footers with previous/next in section links----*/ - -.article-footer { - border-top: solid #d9d9d9 1pt; - margin-top: 30px; - padding-top: 10px; - font-size: .9em; - color: #707070; -} - -/*----Landing page----*/ - -#homeContent .bookCover { - width: 600px; - height: auto; - -moz-box-shadow: none !important; - -webkit-box-shadow: none !important; - box-shadow: none !important; - -webkit-transform: rotate(359deg); - transform: rotate(359deg); - float: left; - margin: 60px 40px 40px 40px; -} - - - -/*stickies in the bands */ -#homeContent .blurb .event { - background-color: #eab0c400; -} - - -#homeContent .blurb .event { - border: 1pt solid #eab0c452; - background-color: #f5e2e800; -} - -#homeContent .blurb .event a { - color: #1a162d; -} - -#homeContent .blurb .event .eventTitle, -#homeContent .blurb .event .eventTitle a { - text-transform: uppercase; - letter-spacing: 2pt; - font-weight: bold !important; - color: #ca235e; - font-size: 0.85em; -} - -#homeContent .event .eventTitle a:hover { - color: #3c061e; - text-decoration: none; -} - -#homeContent .event .eventTitle a .fa-xs { /* font awesome icon */ - font-size: .8em; -} - -#homeContent .event .eventDetails { - color: #272626; - line-height: 30px; - font-size: 18px; -} - -#homeContent .event .eventDetails a { - text-decoration: underline; -} - -/*----Mobile display----*/ - -@media screen and (max-width: 1023px) { - - #homeContent .bookCover { - margin: 40px auto; - } - - .column25 { - padding-left: 0em; - border-left: none; -} - - .column25-left { - position: static; - padding-top: 50px; - padding-right: 0em; - border-right: none; - max-width: 100%; - border-top: 1px solid #80808094; -} - - .column25-left .section .section { - margin-bottom: 25px; -} - - .start { - flex-direction: column-reverse; -} - -/* dont show anchor link on small screens */ - h2 a.hanchor { - visibility: hidden; - } -} - -@media screen and (max-width: 700px) { - .flex-container.learn-list { - display: block; - } - - .image-container.learn-list { - margin: 0 auto; - } -} - -/*---Giraffe easter egg */ -.giraffe { - height: 70px; - width: 50px; - background-image: url('../images/giraffe.jpg'); - background-size: contain; - background-repeat: no-repeat; -} - -.hanchor { - font-size: 100%; - /*visibility: hidden; */ - color: #1a162d50; - opacity: 0.6; -} - -h2:hover a { - color: #1a162d99; - /*visibility: visible;*/ - text-decoration: none; - opacity: 0.9; -} - - -/* Copy to Clipboard */ - -.copy { - width: inherit; - background-color: #e2e2e2 ; - border: none; - border-radius: 2px; - float: right; - font-size: 60%; - padding: 4px 4px 4px 4px; -} - -/* pkgdown clipboard */ - -.hasCopyButton { - position: relative; -} - -.btn-copy-ex { - position: absolute; - right: 0; - top: 0; - visibility: hidden; -} - -.hasCopyButton:hover button.btn-copy-ex { - visibility: visible; -} - -/* Bootstrap Tooltips */ - -.tooltip { - position: absolute; - z-index: 1070; - display: block; - margin: 0; - font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; - font-style: normal; - font-weight: 400; - line-height: 1.5; - text-align: left; - text-align: start; - text-decoration: none; - text-shadow: none; - text-transform: none; - letter-spacing: normal; - word-break: normal; - word-spacing: normal; - white-space: normal; - line-break: auto; - font-size: 0.875rem; - word-wrap: break-word; - opacity: 0; -} - -.tooltip.show { - opacity: 0.9; -} - -.tooltip .arrow { - position: absolute; - display: block; - width: 0.8rem; - height: 0.4rem; -} - -.tooltip .arrow::before { - position: absolute; - content: ""; - border-color: transparent; - border-style: solid; -} - -.bs-tooltip-top, .bs-tooltip-auto[x-placement^="top"] { - padding: 0.4rem 0; -} - -.bs-tooltip-top .arrow, .bs-tooltip-auto[x-placement^="top"] .arrow { - bottom: 0; -} - -.bs-tooltip-top .arrow::before, .bs-tooltip-auto[x-placement^="top"] .arrow::before { - top: 0; - border-width: 0.4rem 0.4rem 0; - border-top-color: #000; -} - -.bs-tooltip-right, .bs-tooltip-auto[x-placement^="right"] { - padding: 0 0.4rem; -} - -.bs-tooltip-right .arrow, .bs-tooltip-auto[x-placement^="right"] .arrow { - left: 0; - width: 0.4rem; - height: 0.8rem; -} - -.bs-tooltip-right .arrow::before, .bs-tooltip-auto[x-placement^="right"] .arrow::before { - right: 0; - border-width: 0.4rem 0.4rem 0.4rem 0; - border-right-color: #000; -} - -.bs-tooltip-bottom, .bs-tooltip-auto[x-placement^="bottom"] { - padding: 0.4rem 0; -} - -.bs-tooltip-bottom .arrow, .bs-tooltip-auto[x-placement^="bottom"] .arrow { - top: 0; -} - -.bs-tooltip-bottom .arrow::before, .bs-tooltip-auto[x-placement^="bottom"] .arrow::before { - bottom: 0; - border-width: 0 0.4rem 0.4rem; - border-bottom-color: #000; -} - -.bs-tooltip-left, .bs-tooltip-auto[x-placement^="left"] { - padding: 0 0.4rem; -} - -.bs-tooltip-left .arrow, .bs-tooltip-auto[x-placement^="left"] .arrow { - right: 0; - width: 0.4rem; - height: 0.8rem; -} - -.bs-tooltip-left .arrow::before, .bs-tooltip-auto[x-placement^="left"] .arrow::before { - left: 0; - border-width: 0.4rem 0 0.4rem 0.4rem; - border-left-color: #000; -} - -.tooltip-inner { - max-width: 200px; - padding: 0.25rem 0.5rem; - color: #fff; - text-align: center; - background-color: #1a162d; - border-radius: 0.25rem; -} - -.tooltip.left .tooltip-arrow { - top: 50%; - right: 0; - margin-top: -5px; - border-width: 5px 0 5px 5px; - border-left-color: #1a162d; -} -.tooltip-arrow { - position: absolute; - width: 0; - height: 0; - border-color: transparent; - border-style: solid; -} - -.tooltip.left { - padding: 0 5px; - margin-left: -3px; -} -.tooltip.in { - filter: alpha(opacity=90); - opacity: .9; -} -.fade.in { - opacity: 1; -} - -/* button */ - -.btn-primary { - color: #fff; - background-color: #1a162db5 ; - border-color: #1a162d; -} - -.btn-primary:hover { - color: #fcfcfc; - background-color: #1a162d; - border-color: #1a162d; -} - -.btn { - display: inline-block; - margin-bottom: 0; - font-weight: 400; - text-align: center; - white-space: nowrap; - vertical-align: middle; - -ms-touch-action: manipulation; - touch-action: manipulation; - cursor: pointer; - background-image: none; - border: 1px solid transparent; - padding: 6px 12px; - font-size: 14px; - line-height: 1.42857143; - border-radius: 4px; - -webkit-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; -} - -.fade { - transition: opacity 0.15s linear; -} - -/*------- Div tip Shortcodes --------*/ - -div.note, -div.rstudio-tip, -div.warning { - padding: 1em; - margin: 1em 0; - padding-left: 100px; - background-size: 70px; - background-repeat: no-repeat; - background-position: 15px 1.5em; - min-height: 120px; - color: #1a162dcc; - background-color: #91CBD738; - border-top: 7pt solid #a19ea936; - max-width: 100%; - line-height: 1.75em; - font-size: 0.85em; - letter-spacing: 1pt; -} - -div.note { - background-image: url("../images/robot.png"); -} - -div.rstudio-tip { - background-image: url("../images/rstudio.png"); -} - -div.warning { - background-image: url("../images/error.png"); - background-color: #eab0c41a !important; -} - -div.note code, -div.rstudio-tip code { - background-color: #91CBD765 !important; - font-size: 1em; -} - -div.warning code { - background-color: #cb9faf52 !important; - font-size: 1em; -} - -div.help { - padding: 1em; - margin: 0 auto; - padding-top: 72px; - background-size: 50px; - background-repeat: no-repeat; - background-position: center 1em; - color: #808080; - background-color: #1a162d08; - max-width: 74%; - line-height: 1.3em; - font-size: 0.75em; - letter-spacing: 1pt; - background-image: url(../images/help.png); - text-align: center; -} - -div.help:hover, -a.help-link:hover { - background-color: #fcfcfc; - text-decoration: none; - transition: color 0.3s; -} - -/*---fix code scrolling-- */ -pre { - white-space: pre-wrap; -} - - -/* Test Drive Button */ - -/* Style buttons */ -.test-drive-btn { - background-color: #eab0c46b; - border: solid 1pt #ca225e47; - color: #ca225e; - padding: 12px 16px; - font-size: 16px; - cursor: pointer; - margin: 1.5em auto; - display: block; - border-radius: 0; -} - -/* Darker background on mouse-over */ -.test-drive-btn:hover { - background-color: #CA225E; - color: #fcfcfc; - border: 1pt solid; -} - - - diff --git a/static/data/imbal_data.csv b/static/data/imbal_data.csv deleted file mode 100644 index b6a4a6d9..00000000 --- a/static/data/imbal_data.csv +++ /dev/null @@ -1,1201 +0,0 @@ -TwoFactor1,TwoFactor2,Linear01,Linear02,Linear03,Linear04,Linear05,Linear06,Linear07,Linear08,Linear09,Linear10,Nonlinear1,Nonlinear2,Nonlinear3,Class --1.050448358576796,-1.2503626701432702,0.3505955029660696,-0.5332326605783434,-1.75972109531921,-0.3982187256249124,-2.1183800066907397,-0.18156586778233885,1.1736880840198407,2.124658794265186,-0.7569580041069164,-1.144063367729547,-0.7357489736750722,0.6520899212919176,0.5201526065357029,Class2 --0.2725597696929678,-1.8437998600042733,0.9098543422314587,-0.846302280224414,1.8801383823189248,-0.5975415485368284,-0.34935960340507305,-1.3395722546705722,-0.8548199391791427,-0.25202090431863194,-0.26988872241335904,-0.3889538369989788,-0.6365775135345757,0.361195019679144,0.27093705581501126,Class2 --1.437421021375927,-1.2421812514873116,-0.6810105705359026,0.9806795359916605,0.5919246165672597,0.8467027248838295,-0.1386933638325863,1.1019914758194986,-0.07617629230345939,0.1338843783779243,0.9569929095711455,-0.4164035957791011,0.28748502489179373,0.22648784425109625,0.592713859397918,Class2 --0.17298144137597624,0.1931097150496678,1.2406715281962772,-0.6685728831594665,-0.5973993623746668,0.07226257760649309,2.192709287935263,-0.7555343750817373,0.6926688118502222,-0.4965733147534453,1.3701286056497046,-0.4516189302026275,0.3366582174785435,0.6295022543054074,0.09917391068302095,Class2 -1.0158970258546987,0.8591501025837214,-1.1361529383408264,2.26220882513911,1.107600854431348,1.6582229996564986,-0.05151493000865497,-1.613417072840704,0.28606338518478613,-2.4924904625818805,0.39003474212874173,2.0160827980450162,0.5185342337936163,0.7501614594366401,0.06322511052712798,Class2 --2.743323563777341,-1.9757993574689192,-0.6393861542122559,-2.4064743751119173,-1.7443528755144946,2.192797792876945,-0.2299047174022389,0.5095108650395695,1.5090805757539603,0.48471406541434314,1.485274922533934,0.43105145972035525,-0.19150446727871895,0.5212328266352415,0.7415211435873061,Class2 --0.2472250299375333,0.522639953090025,-0.9655372561981667,0.5184827558645234,0.8523376921881207,-0.6512494560195382,0.36930116012796615,-0.943165368227377,-0.4025361279415346,-0.3047453824600811,0.16168630518342725,-0.4704173778137373,-0.061472047585994005,0.9347071903757751,0.3720119872596115,Class2 --0.014139626322092091,-0.918353147824017,-0.08991817301600487,0.8741108109593555,0.14792389852882576,0.09823369645149266,-0.17001796435223251,-0.9761243784423792,0.7602178162820583,-0.3959223892335497,-0.05654647317571672,-0.008743246248251534,0.11657636612653732,0.7522077856119722,0.9175470892805606,Class2 --1.745697915495135,-1.5464462418604916,-0.02258076417018444,-1.2620674511495193,0.9217519173144775,0.8966452717387204,-0.17119499352526887,-0.5103366627683128,-0.4973483472087131,-0.43221439178834037,-0.7324743739895863,-0.015566785065016481,0.9504748396575451,0.6332042107824236,0.6224031841848046,Class2 --0.8137042599604571,-0.8617011866825377,0.743358190456148,0.5624419746977538,-0.2484261400130093,-0.28840851162981135,-1.4139194983711192,1.646446178249097,-0.09668889522170777,1.0633078075839606,-1.2218655781217032,0.27699790076384007,-0.13039021752774715,0.06326758279465139,0.8955704795662314,Class2 --1.0847642034587563,-0.35639948998233656,0.733114831021799,0.08477638919975249,-0.16784778235393857,-1.1572290316993414,0.64947080757868,0.9215069782861466,0.2503040107890363,0.7133524390087578,-1.9248915361572156,-1.4965625309780974,0.17519981507211924,0.42139433859847486,0.0032081906683743,Class2 --0.864842857742651,-1.461764333487488,-0.9098581311712696,0.5084714887511205,-0.04404774369863052,-0.4999368701556382,1.5696924758243977,-0.07960032019864408,-0.30883236108630807,1.0493142495233418,-0.24978291493312368,0.06418718560435889,0.026839628349989653,0.9675405016168952,0.6233227180782706,Class2 --0.05931663271664403,-0.5518875986773725,0.026370006946022043,0.9167030713285167,-0.8953693398251751,-0.7330809091136113,0.4474554460037893,-0.945311276575479,0.06997206760866052,-0.837957205528908,-0.529322925382072,1.1702953888158503,0.4408305622637272,0.5183784603141248,0.3517410068307072,Class2 -0.26668286637037797,0.8249307158715957,-0.05684872880713521,1.8066680672251672,-0.5630535383047152,0.6841851669959644,0.3614812373048678,-0.1500179737247673,-1.3061941080232466,1.3910923198459972,-0.04629790511950837,0.31427933296011423,0.489833019208163,0.5288356284145266,0.9421648439019918,Class2 -0.33565573233265256,1.0402164081124463,-2.5098315362424577,-0.8946634070315154,0.06844915372833463,1.4682241354003651,-0.1560421641525292,1.0893878007702262,-0.3024544179362619,-1.673031347397926,0.08468955433938898,-0.9597268104844063,0.41068644914776087,0.4466295246966183,0.6391481307800859,Class2 -1.7923763902755951,1.8108819041179316,-0.11782362242388346,-0.9126720161322663,0.84017956514373,-2.3309340705315993,-0.8256110448029096,0.07398290810820388,-1.3409225428853122,2.0987273476901582,0.23964514990810007,-1.0257136387868186,0.7961134379729629,0.3058308893814683,0.5068505711387843,Class2 -0.27069685258972487,1.371842033590911,-0.410754671984988,-0.2365824093714109,-0.24091389060228413,-0.5876624632901459,-0.5848089243201617,-0.897298801760243,0.5869475831626769,2.7853022863190926,2.3726525078141982,1.0568791569001077,0.11525750625878572,0.5032436479814351,0.5718056485056877,Class2 -0.38413487693435733,1.4075821386492748,1.1001832332661883,-0.6512136728534567,-0.02881528391248159,0.16009213943424594,0.13785544234382016,-0.28184860410834334,-2.6394105755185504,0.42486844563561577,0.027165304189065195,-0.22070746409475356,0.8374896040186286,0.7299588592723012,0.7219646868761629,Class2 -0.8630830647086017,-0.8623700842190684,-1.4874782973706802,1.1811869251279636,-2.506194830122122,-1.0653735039649923,-1.006008385939745,-0.3053380203426015,-1.7017429032965947,-0.8231435230299274,0.2298466393639047,0.993871751990043,0.3568528927862644,0.12289908854290843,0.2776714398059994,Class1 -3.0062550461905286,1.8516736876439448,0.6455703929517791,-0.37080631758288835,-0.390942176840542,0.21462893614814185,0.35907419151659503,0.823144511730286,-0.195201957634227,0.005398131155506534,0.745848912855593,-0.9312246920875875,-0.7162672118283808,0.716724568977952,0.49730412964709103,Class2 --1.6373849437714485,-1.1327378402507615,0.9490106547287608,1.559951922255171,0.9954059835310223,0.5558706840187643,-0.254041632673593,0.9322230036623892,0.986986578548789,0.4650401622507134,-0.09232098814818332,-0.9122391286248133,-0.020189900416880846,0.4191911278758198,0.2655657841823995,Class2 --2.402306623412501,-1.6312904766632657,-0.11065138560021799,-0.8001931479084522,0.7527612630203105,0.2195403504518538,1.81372578287696,-0.06339732933543163,1.3016856885141712,0.562176311406035,0.20299148145437937,1.3245284269246056,-0.21840997273102403,0.7299827698152512,0.8243125958833843,Class2 --1.2666169138291585,-0.4262257318434976,-0.8912432116351523,-0.5397610727968878,0.3174485997009279,0.8769985093467699,-1.2610105038115627,-1.4972195349817194,-0.23509291051602535,0.927958358689497,0.8589676003375514,0.5140930669188675,0.3755769217386842,0.36370980995707214,0.5423562077339739,Class2 --0.6888925878267678,-1.4231956715708454,-0.14789324634878281,0.6012531683332762,0.053566231931984634,0.49443423178686674,-0.6562482326855633,0.497187820331874,-0.4800616840960264,-1.8263026515825478,-0.3421093149808786,1.812010476680881,-0.7171717053279281,0.9618025950621814,0.1470229714177549,Class2 -1.731489837715467,2.7859830697173513,-1.351501330474476,0.32633411605752516,-1.3565053435500432,-1.5112514738676845,-1.0200895547398614,-3.448449459435509,-0.815841042261537,-0.2752007960884351,-1.7475280022181239,1.3631574282476817,0.42511362535879016,0.8466994124464691,0.927040187176317,Class2 --1.1655260449023295,-1.623943545445721,1.3663563074121856,-0.31405091605553426,0.26300551538268707,0.5144662204061695,-0.5815434967230706,-1.1617019159358568,-0.9484735514205582,-0.4143565684336887,0.6560790720195684,-0.6797962692803875,-0.0908078826032579,0.8533602564129978,0.6132946398574859,Class2 -0.1878973036732704,-0.9101136797834039,0.5352794229161326,-0.928797035421049,-1.1159915241229195,0.2135309737159293,0.494743525622211,-0.7200701445467054,1.0531033414278257,-1.4265512987907403,1.6937476779943923,0.9437373052357063,0.6086148908361793,0.9195684408769011,0.47140636714175344,Class2 -1.5168196547330217,0.8016876374100506,-0.14481674512588785,-1.7082820210008625,-1.1352072909991604,-1.3109701609223907,1.150323871804356,1.937443958024987,-0.9809042315112971,-0.5018274182146963,-0.7268669743230203,1.7970052185248233,-0.5351351052522659,0.16644848440773785,0.43855104222893715,Class2 -2.755068098738278,1.2048428829066347,-0.8557443744847864,1.1414128525910414,1.0086705299050336,-1.77210549431937,0.6736811225620792,1.9291771480148048,0.2787672096968757,-0.3182449430939843,0.701869275153597,0.46887311216260913,0.7868768498301506,0.9839711640961468,0.49169679009355605,Class2 --1.7853575233879182,-0.10437054653697098,-2.0369456083886313,-1.3916908207040177,-0.10300715970429526,0.6702079184032625,1.1873801003526203,0.6589789462384477,-0.7166458833048408,-0.1148006182123324,0.411952553782622,-1.0341661749585747,-0.05344180669635534,0.25486040697433054,0.8464941773563623,Class2 --1.5087947828993065,-1.6926348733957954,-2.5094157521243217,1.0370437972685331,-1.2961823935773755,0.5736025433872043,-0.8627421827341647,0.06221913038130686,-0.3889466721981126,0.6246910259226597,0.36262162179269053,1.580004814054613,0.13011668249964714,0.9698171971831471,0.6856873845681548,Class2 --1.4272594377437633,-0.1257522867575528,0.4299513402296133,-0.6894144423463056,0.10218311248568797,0.491422525830427,0.983243921653187,0.19024930321746944,-0.4961497725116084,-0.5500912929796596,-0.6125027050414964,-1.220053319651032,0.48924700636416674,0.3952707168646157,0.019130288856104016,Class2 --0.8165985804939921,-1.1157255002994044,-0.4893832787636983,-1.174802638177407,0.5328962362676086,1.1252831770367955,0.7123437141954315,-0.5266548719900301,-1.1444835993757831,-1.0021452827362625,-0.6238031639850885,-0.33957455131771774,-0.7147328415885568,0.7072731051594019,0.0771170100197196,Class2 --0.9766006458201635,-0.6215839505263769,-0.5267128690755961,-0.21841677382338606,-1.3160921749567132,1.20064621171414,0.09893055125593843,1.5320810620090108,-0.4356123216052424,0.6313381139477774,0.7278797330032329,2.1855843124139893,0.7574835140258074,0.25509868701919913,0.7665176722221076,Class2 -1.6370053737532435,2.0814729995715595,0.14644962312212673,0.7748083682224948,0.6289587089743282,1.1539663030524954,-0.4679994163469899,-1.1084918293564885,1.033153425524187,-1.0138277122315693,-0.5903370156273314,0.4128286186023181,0.3563882578164339,0.5473548392765224,0.7598336751107126,Class2 -2.2283287437516965,1.1915842435540425,0.6187579068716529,-0.5990269647992591,0.22929686522180404,0.2990348887024546,-0.6049572568765376,1.2071730193580104,-0.3158021540535464,0.9034972319205355,-0.0806382909735953,0.8615331690090577,-0.17453542491421103,0.7644582050852478,0.5186123850289732,Class2 -2.291158707617634,3.5665031556949875,-0.04781164620023958,-1.707304725522144,-0.8790513649157461,1.6515169464226702,-0.320136684771104,-0.8975879800204372,1.422284465814547,1.2453501561580482,1.9664562810511994,1.3738076717254,-0.5104329264722764,0.32045861962251365,0.5230294954963028,Class2 -2.5967853178033606,2.7860258493933494,-0.5972219142747407,0.8378732259300045,0.36824682024018,0.28158527807994743,-1.8719967184667197,-0.10675580336554676,-1.1443162975637895,0.11104926689520539,-1.3519537144015508,-0.6050469683230341,-0.34760253224521875,0.3926155713852495,0.33631801581941545,Class2 --4.183450750422911,-4.098511487814794,0.3811573135033554,0.6396843646877208,-0.8782182734839399,-0.4331842835188818,-1.934427356647576,0.6999390173667636,0.2323720058450767,-0.30145827606339615,0.269672895897864,-1.5892095012102525,-0.586559159681201,0.22121007135137916,0.3042883120942861,Class2 -0.9168255303367783,-0.009431033259238553,1.235907805652057,-1.1169507202229336,-1.4538151665415737,0.21196188949385333,-0.10403935386909068,-0.6656160269724986,-0.21528098179684976,-0.5621861799163831,-2.0445127662320988,0.23081329723180583,-0.8479619300924242,0.5314554844517261,0.2977921785786748,Class2 --0.45650234060829176,-1.0555626539471046,-0.6705680322095747,1.388717875824271,2.152580574356997,0.9645075594757296,1.4482336458310696,-1.5576076927728835,0.6565445445681587,0.6178268837772342,1.231571706711098,-1.096361059143024,0.5193145768716931,0.39989533997140825,0.3149332634638995,Class2 -0.33246963698652876,-0.8381572285842014,0.9282518617193815,1.915158263206179,-1.2014810499444641,0.22076459869407503,0.2127624000414413,0.5787427965565217,0.20894363183940168,-0.8238634369427743,0.10583005205587812,-1.280016709218578,-0.93388546677306294,0.8916654677595943,0.360595278441906,Class1 -1.6080878994940941,1.9241699946381368,0.836088454250812,1.4752707566820018,-0.4779063797266213,-0.6227208672982486,0.3406036173473383,-0.5197729816273396,-0.3461045855399846,-1.5233800894832563,0.0801588719090671,-1.0896963098152632,0.47835030360147357,0.43447651783935726,0.3801464510615915,Class2 -1.2214130785014612,1.1215491728826519,0.8007679539692084,0.20975664263594845,-0.23165134098469742,-0.9911435371860179,1.8635381621884066,0.8480857407399186,1.47769002975833,-0.4932275168864325,-0.3969486995722933,1.8129629987610125,-0.17147049261257052,0.32271979190409184,0.5918507636524737,Class2 --1.3454480174793373,0.14704028925022483,-1.4743176215405362,0.5735103571368652,-1.760992168597817,1.438665506390031,1.1929572382871136,-1.0446060928565923,-0.19687327809329164,0.138723520007658,-0.012327986726352073,0.3130276732478272,-0.0704802991822362,0.9058961067348719,0.6911649159155786,Class2 --3.5094113158526676,-2.453133935215658,-0.12125836757462567,0.9945102289648111,0.1959977661834005,1.4948932951968452,-0.28129553640178706,0.4934744004766671,0.6234914285391298,0.6120016671420037,0.3630167632696162,-0.014781037426324436,-0.9937948710285127,0.6527839053887874,0.48375228070653975,Class2 -0.3738567150956829,-0.13336961272645304,0.1934349213449335,0.13356110949986658,-1.0494436659941444,0.08420080158703418,-0.36312198380073485,0.15623794870450747,-0.22624243847441725,-0.037714627948318986,-0.08861416000425058,1.0317431925228204,0.07691413164138794,0.7722203850280493,0.5497247006278485,Class2 -0.444861760845902,0.13911683404091713,-0.2308929121710796,0.061179213164203404,0.7539096987493382,-0.9584615285758126,-0.3583731020046221,-0.29642307610604635,2.1791759398637685,-0.8478769163945074,1.8491898645991316,-0.2624828871582981,-0.22555825160816312,0.7920527590904385,0.4852503000292927,Class2 --2.928914237796599,-0.4407900601801411,-1.0194555307690747,0.8355725510481743,-0.10444599788438894,1.1307818349421228,-0.8017594418241464,-0.4276598682006049,-1.0928648851580065,0.7265247624260032,0.03331341090609818,-1.5785872742295588,-0.38569800602272153,0.023094392847269773,0.12568199378438294,Class2 -0.046525816724314203,0.10573149989900961,1.0939100358683687,-2.9835926976507032,0.49983510082203764,-2.47301941142058,0.6043269335192756,0.8796358660122703,-0.06407250291445088,0.27981049977265404,0.6909022992920282,0.3627337240243685,0.79277179017663,0.688889596844092,0.7216863620560616,Class2 -1.4077504091890813,2.627755784651655,-0.7192955601841361,-0.16944847134890428,0.6744748550304962,0.7143517581223956,-2.1104298320604373,-0.803251933609334,-1.2205143351758891,1.5907164500945625,1.4711705804307886,-0.13035719825512623,-0.5088982125744224,0.9732084120623767,0.008603390539065003,Class2 --0.7683647707406616,-1.535903988714764,-1.3833904074310013,-1.2793525129639658,-1.054461563766051,-0.6729951934779748,-0.47101334483710733,-0.360829420165878,-0.5319807061201132,0.47080719804380733,0.1262710739164559,2.1748819000087316,-0.4847669107839465,0.30610461230389774,0.8205756701063365,Class2 --2.9382928292079322,-4.130543798371303,2.2633840186081478,0.3303083997695534,0.016082151080187682,-0.39093939057206784,-0.18121119749381404,0.4672053771475678,-0.3349909524301451,-0.8605961455472927,0.4459818431007868,1.9081537442989818,0.6103396937251091,0.8190211409237236,0.9076905422843993,Class2 -0.9793622699566197,0.6558604762905867,-0.7310976178860381,-0.9175963522447185,-0.5906173859813515,-0.21284819681113282,-0.9629449905974902,-0.4697802337657735,-0.10674305001612946,-1.483886035449581,-0.18962988567443537,0.16372102483806125,-0.44948684936389327,0.137448163703084,0.9747146863956004,Class2 --0.6756283933849377,-0.3004178527700001,0.08832779867306924,-0.9343591200460978,-0.3099849799512013,-1.2952551059333157,0.7101226165984694,-1.5276005103691113,0.005022224881882475,0.4005612973418385,-0.437738010043203,-0.02526341406527482,0.819122803863138,0.47131566400639713,0.5161316841840744,Class2 --0.4781131356305074,1.9852739618257593,1.006551885620897,-1.9266039687396344,0.628598410241691,0.08122446851677903,0.21371271151406368,0.22000126227943775,-0.9087165389076369,-0.9866776110022084,-1.667220357701604,-2.2459391022090243,-0.18025245517492294,0.6510012515354902,0.5018386016599834,Class2 -1.9918163841577274,0.4337961298025149,0.5284971566611951,-0.004586144722497799,-0.8143919871556913,-0.1521115041883751,1.7042319623228792,-0.9042880058347663,1.549403921362749,0.2705786238993688,-0.4011900538564223,2.7316332057905033,0.6310318717733026,0.3103380051907152,0.41906063444912434,Class2 -0.34561361278410185,1.6606465753970352,-0.8241737337399836,1.0412753945385669,0.2958125217361942,-1.261558632142955,1.7353645552003907,0.366487353764859,-0.5576301329591822,-1.0961809899158443,-2.08232899770931,0.7375007594536576,0.06717799976468086,0.9720261169131845,0.6172869636211544,Class2 --0.12548067691865483,1.7984426387459598,-0.9744197324010785,-0.3003136096671357,0.21513751079948334,0.9321899303675212,-0.4585466905403288,-1.9821155034269182,-1.2438032910825507,-1.6935019162548948,-1.238517392921802,0.8383264138315787,-0.9965276508592069,0.30307432427071035,0.4329871607478708,Class2 -1.2937469188791968,2.8175658599690947,-0.33121712092307687,-0.8495124651748316,0.4438078879051052,0.5118434789238608,-0.4744638023205169,-0.5856876673088746,-0.7002049606532806,-0.18835158611038263,0.6076777025766251,0.9132716974628188,-0.6233029551804066,0.6124733097385615,0.15179744828492403,Class2 --0.4088053554143034,-2.7807443901447173,-0.6019064025997805,-0.23324406885688345,-1.5657132661174948,-0.07204685655433228,-0.8942832079910209,0.41082275672544516,-0.5374398267651157,-2.126017415401269,-0.7699750097855205,-0.2790023857111105,-0.01764780981466174,0.926767161116004,0.8951576843392104,Class2 --0.10587073709950978,2.0828332704013737,1.7964320744348807,0.6861622417557193,0.24655164612167768,0.22549638449342183,0.6260227623858582,-0.6118540504958155,-2.1376643722018187,-1.3360853915670992,-0.9380372454301935,-2.168106102260148,-0.6726090312004089,0.13869979954324663,0.8598309736698866,Class2 --0.8663204913952389,-0.9338569929306324,-0.4683529471939427,0.8007942657434094,-1.1804993841914995,-1.17184709215734,-0.12006944947947504,-0.2550485149269334,1.0379112627002218,-1.8597997184800257,0.3053694555450766,1.4978368271754698,0.34832311514765024,0.1896903042215854,0.7766954547259957,Class2 --0.09235684945076506,1.9100316402051805,-0.14463009561795406,0.8290522948468121,0.5332432190920819,-0.153579774353617,-1.6839939404616968,-0.6746367182594266,-1.750095470166575,-0.3243671465849063,-0.6188420588763361,-1.338413462179449,0.65960254939273,0.13232178194448352,0.5419025057926774,Class2 -0.22584106781148877,-0.27943375784608854,-1.0375909085193344,-0.21245880047454618,0.042814174319290424,0.39885842071614747,-0.7974830405783978,-1.853433639405129,0.26230522599828265,-0.7712084248963031,-1.7149785754446667,-0.19888519663860937,-0.05253175785765052,0.6323653673753142,0.7402112314011902,Class2 --0.7501446813048762,-0.26992159679161687,0.5917780768490314,0.16206843661222414,1.074393094305827,-0.3832999945103308,1.2399205959910282,-0.7475477674135451,0.23554979285961344,-0.028529153827880512,0.5389503848810088,-1.4702644529819358,-0.755195262376219,0.3222696522716433,0.1483502727933228,Class2 --0.5753252539513143,-0.6542033139056973,-0.6896499818506397,-0.0033165415399309594,0.035598912477019434,0.9730515449113346,-0.8894307425235665,-1.5644837299592207,3.2428049396364513,0.42907834327552064,0.4482092920442961,0.46010338616478325,-0.14674922870472074,0.9575968037825078,0.6635350456926972,Class2 --0.5271396809828218,-1.9410887481999906,-0.18197375347402178,-0.344937507760901,-0.6145544329113846,1.365966100940903,-0.5267170108130576,-1.1859625071467175,-0.6182111698050271,0.036091230348429704,-0.08665549316364667,1.0499907727004085,0.7593691237270832,0.4625759497284889,0.3269049010705203,Class2 --4.085003613856722,-3.18086375111137,-1.400657523926835,1.0907337659840273,1.1812013625372104,-0.1811043659629013,-0.795680739344302,-2.154549237239765,0.31078355917817985,1.6819119788116403,0.5991461182619062,1.3779528507407073,0.09489429369568825,0.4839253497775644,0.2262892483267933,Class2 -1.325278953347371,-0.3702306620238,1.2272011658681365,0.84254775695597794,-0.6771375639470599,-0.11441658983857646,0.8054374206292725,-0.42859066995434797,-0.23517621215109286,0.6072918785518447,0.3791065413527394,-1.7036266122155796,0.13521458394825459,0.17036351491697133,0.9962037103250623,Class2 -0.163799809728542,0.2628326941430643,-0.49264252151121296,-0.1999121191990239,0.3832095983005369,0.10976046161933661,-0.4888373536216818,-0.7441090051637904,1.420870967697322,0.8000457488218443,-0.21705359102143423,0.8953356896219294,-0.7431958895176649,0.9506879625841975,0.06390649871900678,Class2 -1.7158865307833182,-0.025576976701850107,-0.1534094306701641,0.9646068567078531,-0.6578397599948277,-0.6066623918166505,0.01954619740180279,0.10799342762060118,0.7805695934141589,0.10114463979063293,0.6763986436680259,1.5755611046696534,-0.9414564641192555,0.10689990734681487,0.38998776325024664,Class2 -2.379504998227037,1.3524639210915197,-0.22076959547383673,1.2810533190379543,-0.1368865479382534,0.24572620348000368,0.854739722457042,1.7544183296070617,2.6410675846050324,-0.9269672341974813,0.014294125938047865,0.5118957193320027,-0.6205299627035856,0.07857553311623633,0.9292406314052641,Class2 --0.47399601079621545,0.03074092234817158,1.5394191968746382,-0.09597723983391035,-0.48741404282888834,0.16148175275079413,0.7048728099107872,1.0783954560039655,-1.0790982208884528,0.7861654251178302,-0.04203046531708794,-0.39580549172690876,0.02260784525424242,0.3911790265701711,0.9758664441760629,Class2 -2.740977345986134,2.8276877675926118,1.0372066830328281,2.6131428560629315,0.3431624702906217,0.61103046497182,-0.07131192876394635,-0.5600315103449537,-0.672286029872629,-1.3556203486408844,-0.8427285073820202,-0.7149858061031654,0.2645195946097374,0.6631169714964926,0.5013724958989769,Class2 -1.673298517964633,1.9953039541659652,-0.2888366773352072,0.379131418575648,-1.4383285809857926,-1.3024839248558373,-0.2947072694225652,0.36466170857828734,1.5295966351423667,1.2382201087151652,1.1428935787853627,0.24426619476268754,-0.6785593340173364,0.8641356390435249,0.5970939234830439,Class2 -3.0015772449688605,1.980272073908391,0.7116739002536273,-0.8297145083330126,0.2854344885646797,0.9306880441055263,1.7514347892146762,0.06000557295702034,-0.4250166165776423,-0.1086257959389931,-0.6260252695540688,-1.7683228043917,0.6848837858997285,0.9237497632857412,0.873282922198996,Class2 -2.7355330500987773,3.5359104357133844,0.6552317319629235,0.44374326743191295,1.0414935765157685,1.391558829667573,0.5792795187461615,1.2879461605211933,-0.9838184976568549,-0.5890209960754196,1.2179124931321053,0.4422004479763498,0.7149626715108752,0.21904603042639792,0.3280745083466172,Class2 -0.04575406473858476,0.27132352426217154,0.311206780688166,0.35729899279880734,-0.7079344786131629,1.1138624461488746,-0.8830364652822109,1.0043664027308936,-1.54909331442993,-0.7296316673422526,1.0541313466242848,-0.4987462047560164,-0.5292060831561685,0.8561183693818748,0.016899353824555874,Class2 --1.5714172445361418,-2.1508420339172205,1.3571162094810307,-1.1288340982140084,-0.3568164407476521,1.2828974337409438,-0.7490474219966429,-0.9047221631773658,0.597113283781212,-1.3374830433605338,0.3565972920614438,-0.3309245236651052,-0.2590174935758114,0.35475056152790785,0.37779467017389834,Class2 -0.4290563153762067,-0.8894029470190715,0.38586664662239784,1.6737545144592876,-0.02491221314372396,-0.5842614115360949,0.42301150492454015,0.36521959940182774,-0.6473940770956934,-0.8583111142789442,0.5947307709665913,0.4594724878120585,-0.46753245778381824,0.07072987919673324,0.31489510787650943,Class2 --0.3962783113204743,-0.3707323389522422,1.093785185064224,0.3623401545733206,0.5277572436694217,0.5817213363731856,0.6114170678167387,-0.14191838036335838,-1.1642392723887984,-0.37222565843632954,0.01379306035834273,1.115174177947621,-0.9029337279498577,0.822755940258503,0.7403579019010067,Class2 -0.29523334553293684,0.8970715875330129,1.9580514061271872,0.8712414240957277,-0.03257029091612197,-0.922232340029136,-0.8287747787010702,-1.0436118787928228,-0.3795974571285538,-0.6295399279573951,0.23548249999437146,-0.9399285490244398,0.6530786785297096,0.19613321218639612,0.06410695612430573,Class2 -1.2449197319494976,1.0188003158853978,-1.053466213223265,-2.3296541453062605,0.5452053995162883,-0.987667416675318,-0.4524399455486502,-0.979399705972604,-1.5453617856302055,0.18248042801508083,-0.9315667455418832,-0.039151031236891294,-0.3565997127443552,0.2509829632472247,0.3703540498390794,Class2 -3.47936661695322,1.3239717479445197,1.3239509636012208,0.19901304864093042,-0.08432739026021116,-1.6233821927019483,-0.6580326141507866,1.695536986545336,-0.21043317364747022,-0.24455204682720708,-1.344397312654688,0.6218902641138093,0.007143375929445028,0.9948017103597522,0.711106491740793,Class2 -1.300240097430944,0.31703721016227876,0.6038013510464868,0.7475681400535392,-2.184559959404925,1.3206771207211352,0.5323350140268485,0.8771564696534094,0.8828815124145977,-2.000994508550771,-0.04046536966723219,1.302792149679265,0.22830804716795683,0.4231679260265082,0.7536586404312402,Class2 -1.1680562584438323,1.7979864529570782,0.09680754552707974,-0.8856569985488866,0.7024409352153596,-0.5138228148622498,-1.9910310208699686,-0.5090856293973213,0.2647498405676754,-0.296783261312696,-1.0456965829171876,-0.6117470891045572,0.5496534882113338,0.7181900658179075,0.4816680820658803,Class2 --2.128890258191004,-2.9663295632430176,0.6808975851096013,-1.1422960886698987,-1.3437288917003498,1.4750851205983626,1.314366840168701,0.6090344090201943,-1.150801464590248,-1.368780464520663,-0.9859620302511544,-0.33746307088558314,0.3676254558376968,0.07515955134294927,0.533876379719004,Class2 -1.6024095934364615,1.3273904646968644,-0.1468704136420854,1.448805956714356,-1.2246411981166285,-0.1775376799053688,-1.2624574412011982,1.632826366068144,-0.03276576631948993,0.8420153802707543,0.5013382244057579,0.2476928454612443,0.39783288445323706,0.9941278523765504,0.37805712688714266,Class2 --0.5750856887105644,-0.11356280419563497,0.20976583933062803,-0.4644060838223371,-1.6110565570333668,-0.767306460793051,1.0845837803372458,-0.202344481469972,1.351009675686256,0.16376446662203603,-1.3763216191952037,-0.40614170905188257,0.23342859745025635,0.18997146282345057,0.8376554439309984,Class2 -0.9581826940827287,1.3879113330210584,0.48965584579529303,-1.7914053929965552,-0.5480845997028891,0.2692781053039635,0.10631699449756436,0.6287895629406708,0.3803918014222259,1.1205158627494978,0.2635385125879534,0.8128209982775954,-0.27557434095069766,0.2360793228726834,0.0652912650257349,Class2 --1.471857263183334,0.26025920667764524,0.37080591388078127,-0.1770551812354478,-0.20293476190426274,-1.261433829769956,-0.2209660951486089,-1.696588296554961,0.731106084767485,-0.4303584896289685,-0.6834439622408149,2.0065464723704016,-0.060789970215409994,0.5969763421453536,0.31191714433953166,Class2 --1.5065312530104076,-2.872722248170326,0.18712341324157525,1.1809946950078865,-0.8576471296350833,1.4399872110838503,0.20581183927227034,0.7078645641564754,-0.894322746229416,0.6919180776279171,-0.5667587615480796,-1.634700001610069,-0.5214564851485193,0.7268417358864099,0.9373305407352746,Class2 -0.9586188352234386,-3.019907801407462,-0.22943198096849438,1.3717436506510161,1.4919514637500124,-0.7453712641890763,0.3422481442872244,0.6379498310695553,-0.5725265303745927,-0.9840609553039209,0.02797860788472056,1.1675865006382713,-0.0458361953496933,0.9474190345499665,0.5419005770236254,Class1 -0.9409992721755766,0.2551034817040613,-1.519200952798423,-1.4575633895639781,0.558965248769815,0.5840471248422664,-0.8755109857058025,0.6829650929144272,-0.16372265764573546,-1.5086922691828442,0.4885850964138308,-0.8559885822879405,0.21730361506342888,0.8043581449892372,0.7153729912824929,Class2 --3.509378496886395,-2.2945719507737303,0.4411801020716601,0.7054825445751982,0.025518888748178446,-0.5641372419367223,0.5811277930601007,-1.8455992731499624,-1.2517607150650116,1.1014622981855349,0.4219182244627183,-1.5868411010103578,0.14139795443043113,0.18561820941977203,0.6895692460238934,Class2 --1.34471189342144,0.36199022199794745,0.4277178857029119,1.6974352988420662,-1.8452508709528086,-0.03775155856659244,-2.715698748305599,-1.0695400004347175,1.1639591202531394,-0.409872708841496,-0.19778122980998095,0.9091892003537372,-0.7392245074734092,0.9492566986009479,0.7979362681508064,Class2 -0.18095102979022565,-0.8462448768708251,1.1488125994965988,-0.32676578602742445,0.3475627091648077,0.17606324305639742,-0.26315355052514855,1.1332328848037527,0.1067734158766573,0.7911875658471097,-1.8643139818750591,-0.9220584635209088,0.4374673427082598,0.4953388371504843,0.6074633533135056,Class2 -2.257718961410741,1.1494236860372073,0.58961920914537,0.7360994683833142,-0.33364194760014704,-0.7723032716829252,0.413794449417351,-1.2720348035934868,0.4731917720231867,-0.8818940657102458,-0.4622211522441281,-0.17529821943997284,-0.8098824294283986,0.5948316643480211,0.2756874265614897,Class2 --2.860599966802481,-2.930076512498205,-0.9422190683503733,-1.0021157201147506,0.7432154106647492,-0.02849802100179224,-0.2102827879669299,-0.17009598814415683,-0.10563391964397668,-0.3615862796511519,1.3457895099811155,-0.6706784908439539,0.1859712158329785,0.0740444038528949,0.1295555296819657,Class2 -2.0420048401988016,0.6641585869046521,0.5805458405341792,-1.0106300500600107,-0.10784631528242501,0.42176214106294263,-1.298736202210137,0.15668464653173564,2.7771226968529623,-1.588063508830439,-0.5559289647914889,0.8633702531375638,0.7335284473374486,0.6400800587143749,0.8251970349811018,Class2 -0.517390848217093,-1.4728314921067616,-0.007344382286934415,0.05165649506328023,1.865917270226643,-0.2571495544105584,-0.47002505375712195,0.694991964214618,-0.4681654576723301,-0.7382674918744234,1.4999406548436938,0.6448176032657018,0.716628500726074,0.9935834971256554,0.06077162129804492,Class2 -3.995639628949079,4.132243415281681,-0.9585982702557868,0.09347040198638457,1.0036746858952836,-0.3404734311935524,-0.15200863514749682,1.1282651078316082,-0.9569679917957639,-0.4033398144795703,0.06144514723819485,-1.2450396010457787,-0.8277970249764621,0.5927000551018864,0.6427166524808854,Class2 -0.6187139584210335,-0.32110945070479724,-0.03997230919623887,1.1737056413849314,-1.5327746575265517,0.05693195470374454,0.08923599341765331,0.5261199272774284,-0.4494666781697271,-0.7221835181691267,-0.7988108864976193,-1.0598136387083414,0.18175163678824902,0.6825736386235803,0.6655140924267471,Class2 --1.8313616124549883,-0.6529429517715967,0.9979933990659552,0.25907402734379575,-0.5869399764931884,3.0518820410134357,0.01511725801901033,0.42378583247054746,1.118927279059177,-0.5151797082825813,-0.6837682713322125,1.1589691662557682,-0.5092377997934818,0.5747239226475358,0.30319830076768994,Class2 -1.8971992146426753,1.2959439344352512,2.1249274515717747,0.01198331214506322,-2.4123558449185816,0.3121852068022169,0.6888206002874858,-0.6470298999849753,-0.9207703284697647,0.5930553451104464,-0.5513603699508106,-0.3674055833577981,-0.5032890974543989,0.04833650845102966,0.9294498129747808,Class2 --0.5899638564739179,-1.3324661843938141,-0.13530395391642686,0.2394925551548956,0.44173290324415954,-0.6148595063825284,-0.3311685757210509,0.2182586459113434,1.3588888431036352,0.41632589232819933,-0.1372688164329111,-1.1982358529479553,-0.09467696817591786,0.8660047256853431,0.7933461789507419,Class2 -1.2762967939208067,1.9275066954672107,-0.42505083068012656,0.30473498001241117,0.10805704350383552,2.8666436475599624,-0.5823599766692873,0.43506550465338534,-0.30381271765292417,-1.7308328411250038,-0.6522233501095063,-0.4006737334469337,0.6892119655385613,0.5750889105256647,0.6297190689947456,Class2 --1.5763766834024826,-0.541850329062006,0.796339975289407,0.39601099168840126,1.8581245111238067,0.06433615346036738,0.990136447694861,1.051262489674101,0.40471095770300297,-1.5160363515593385,-0.7995562122497675,-0.10565227797621893,0.7828171839937568,0.9396596245933324,0.44619354512542486,Class2 -0.3009919743542711,-0.28636793478440786,-1.863659267583134,1.6212016897896466,-0.17633529315261431,-1.9913904895069459,0.4979824750631278,-1.2668956330880725,-0.023066583830089016,0.28640334282143287,-0.7775894532733415,-0.6340258978445689,-0.7007526587694883,0.5048635611310601,0.07621005969122052,Class2 --0.07637539078576036,-0.22987426689396787,-0.14931358504919964,1.8241593336272797,0.15737829802899234,-0.5812555452215991,1.9161397566155673,0.9938036679918493,0.8684280022506369,0.35800510005106323,-1.2803436380728905,-0.6107685373401828,0.8131814859807491,0.7909623205196112,0.7390705891884863,Class2 --0.061091757058647216,-0.061804909466379405,1.2735113513008485,-0.37509127486247196,-0.6139994090113569,-0.4019373758539269,-0.30184268980307344,-0.676981249511355,0.8628160269506634,0.691982774609071,0.3302157615916069,-0.025537902001505332,0.08418629225343466,0.4689598351251334,0.983483464922756,Class2 -2.7143606795127573,0.48670193525993977,-1.087573585625833,0.6663363044747911,2.0773880586201945,0.016174502825404086,1.1720851780039987,-1.0664926665521557,-1.909407027866559,-1.1439705931854278,0.5565455743716464,-2.1074389583992073,0.5623116116039455,0.4701482905074954,0.0508149079978466,Class2 -0.6027939059480405,0.8856965213610606,0.9099838671296427,1.8545250261193298,-0.7172828121380024,-1.9702309400816824,-0.3466232034980615,-0.5192783107174443,-1.1813253435527051,-0.4719836202801721,-0.24646602037759432,-0.1189089296812091,0.987720494158566,0.7168376236222684,0.3325452075805515,Class2 -0.024578856672792318,0.20955647755103207,-0.37443461590234434,0.3840839712962281,0.7481227493842288,-0.6900394133872328,1.438440511626754,1.297968211134962,0.1864845189269622,-1.4942386050272476,-0.3481098042788333,-0.06791893074527243,0.7841008203104138,0.7506580215413123,0.3797268222551793,Class2 -0.831379775158495,2.38302912398217,-0.018635047074982678,0.15958095375396159,0.9740058742937042,0.39110166155326204,-1.41721828533565,-1.3837698421887403,1.165223624071558,-0.09257909284902675,-2.028554154139701,0.5533365857960673,0.09407583717256784,0.3404316168744117,0.16249790112487972,Class2 --1.9913798123232818,-0.5146529454762665,-0.7595626927773307,-0.6796905702808126,1.490018228119833,-0.7952056206003834,-0.9545483242158783,0.3935315723848191,0.173565636823628,-0.1593297349671146,1.0754194609898702,0.018641635529190688,0.1253119818866253,0.4961773273535073,0.817499591736123,Class2 --1.162693605378624,-1.5269437049876546,1.8191964992232463,-0.12832983604637194,0.8966035110089948,-1.348721233389822,-0.9774682742058417,-0.34092284295068265,0.16779285854703951,-0.2993446686077876,-0.1696298261471739,0.8652373326643682,-0.5770341292954981,0.788756814552471,0.06724537489935756,Class2 --1.574920185555596,0.5806778606237921,-0.4062650614290141,-0.5386533378844505,-0.30465597499797575,-0.06778721994179626,-0.3596133439063613,0.004758963253196471,-1.3749746584161284,2.298896646662766,0.31142513012205375,0.9857150725804152,-0.2450431319884956,0.15130546991713345,0.35461960546672344,Class2 -0.12131822459898514,0.5450896351085063,0.11556156057283798,0.4711728513327306,0.7907064470753681,-2.017079747211678,-1.5053325164585951,1.1012709946890982,0.294881266464703,-1.390227996930299,-0.5981629381907901,-0.22789739739615725,-0.10265709040686488,0.9253977341577411,0.6216983573976904,Class2 -0.22008335166904802,0.8157675819679758,1.0003184059197208,-0.36166321760736786,-1.041639340406692,0.7795189894637217,0.07356019495704791,-0.5536856474831175,-0.0694457465344019,-0.8389629040423636,-1.5250061975361577,-0.42969522159205503,0.9852398340590298,0.09706734167411923,0.1414202421437949,Class2 -2.470980640231642,1.700199313668424,0.022545080137687655,-0.1720942665630873,-0.8811370154346413,0.4116611515058087,-0.383004778907955,-1.3705282729712867,0.44853791630550377,-0.03645673393004382,0.4036932658290765,-0.824292275579217,0.9840911594219506,0.479707483202219,0.471456294413656,Class2 --0.36435174477411253,-0.9510246155728652,-0.3755505819506252,1.8009586177845751,-0.7379013380209316,0.2999466826674139,0.5341149328419554,0.6004140517594105,-0.20550591698376006,-1.0222135203982055,-2.3294010491513273,0.4755933756987162,0.28393884701654315,0.7859883310738951,0.42038430320098996,Class2 --0.9091638504024737,1.5662570053527975,-1.311358438373548,-1.2656563440075501,0.6479693164988567,-0.09639516121695332,-0.4425641394454889,0.9472910604050383,-1.494455360938514,2.0543196267539683,1.330167480231229,3.1920681861698803,0.05120884953066707,0.022171882446855307,0.37858849624171853,Class2 -0.8216979242226012,1.341549487809938,-1.7574563720135248,1.1853025059872488,0.558009138305989,-1.1498987193137833,0.3072337554607585,0.6947745319974525,1.6711415335468165,0.28063251535896994,0.7850178233522467,-0.9660860795419429,0.21963445423170924,0.5010880122426897,0.11996924527920783,Class2 -0.8455641745108664,1.4654106268550005,0.5501895245793574,-0.08179599378080984,-0.2444814979105399,-0.21481710272756047,-1.256591376185501,-0.4555422022615962,1.7569761690366743,0.11609523230151501,1.4414310347081896,-1.491478043634519,-0.30272521590813994,0.7527663412038237,0.003910398576408625,Class2 -0.521598289315387,-0.24598905243344502,-0.4626682606081682,0.6185412915915568,1.1978835706386088,-0.6561935214272331,1.857062727005246,-0.12096843210356871,0.7474608580499963,-0.2877670686685996,0.8412766173093081,-1.2335838229173113,0.9145044065080583,0.29358194465748966,0.06392618897370994,Class2 --3.2206321602601946,-3.5044402196874875,0.6985470520918389,-0.4523241322680177,-0.92819028475333,-1.2307970871481317,-0.45307158025291194,-0.32725697461197,-0.002029492070102686,-0.9377193672771208,0.5234081914437121,-0.9156872691279032,0.8086798749864101,0.24462031666189432,0.8969930149614811,Class2 --0.26814784967661637,-1.2282454048093012,-2.049241440663732,0.2730941112975091,-1.481329709284799,0.4101275118143485,0.2535846452421525,0.12164491797467936,0.10142916584634591,1.339175360276688,1.4296994161614731,0.1875691138772792,0.13608779199421406,0.7799929613247514,0.8189094017725438,Class2 --0.1421977085902354,-1.7058732856300924,-0.02561344801086109,-1.126551153258485,0.988921355526239,0.6606063204260152,-1.4725862649588197,-0.8658151212441013,0.324064201065007,-0.7866884401531375,-0.5872440916498284,1.1901089136342524,0.5923862606287003,0.8999527581036091,0.43584952992387116,Class2 --0.8241698845351039,1.0708424538741157,-0.5871392105872847,-0.2438634292324117,0.9620482725651257,-0.0322543072481219,-1.4866578575510225,-0.7051237219982427,0.34485777696485587,-0.10303456735435598,-0.085460531086604,-0.10285850172145093,-0.7212267839349806,0.8327735152561218,0.418994345003739,Class2 --0.8392712105169282,-0.582054376143097,-0.2995531599639794,1.4243082995471958,0.7693834368544125,1.0166995471163875,0.07960573702664726,0.5619813111678813,-0.8067469977835994,1.6830057384699901,0.05370361602841901,-0.17832099351614736,0.6972667444497347,0.7469537751749158,0.08300812123343349,Class2 -0.8680201309508799,0.5246527403485426,0.5629286630857406,-0.23597134555375188,-0.5327158027246254,-0.7459467106264208,-0.6998452563691566,-0.6636453318459079,-0.354926854211966,-0.6451238111119658,-0.24899793566618514,1.7345324542803915,0.3839665576815605,0.7152804855722934,0.6563672090414912,Class2 -0.030707706530684567,0.26843401245050497,1.1341177550271766,-1.6649197720333648,0.10318768044690961,1.0913910056488703,0.8979742585850058,-1.2178496183941725,0.732432014933846,-1.1337563152990262,-0.6983627468941487,1.0267256687546022,-0.8439356349408627,0.13407960277982056,0.5949431613553315,Class2 -1.4154513911897566,1.1344725069734016,-1.5701707375801521,-0.8751796012037737,-0.13998963117340166,-0.4275598424420437,-0.6167995685113193,-0.24446107407460352,0.3065317632225906,-1.3880560742286274,0.6816657187953951,0.2283486510358273,-0.3274926105514169,0.22459002118557692,0.5321726526599377,Class2 --0.8517883347917907,-1.1811945951997749,0.10272944424163431,1.3478116437355339,-1.0566486240334005,0.574801402862861,-0.27019103735553623,1.0613559303164413,-0.883276422158741,-0.3029669978958081,-0.9216780028672322,0.19155331266938666,0.43394248094409704,0.1604399101343006,0.48672288749367,Class1 --0.7604248217877067,1.9174957103953547,-0.13329909380854343,0.552496192921557,-0.3095067709251028,-0.30189525171636716,-0.915334809372042,-0.3425352543912513,0.18444535753081634,0.9136245060579704,-0.07690712050890128,1.1967605773701304,0.27557729557156563,0.5482943155802786,0.7042758103925735,Class2 --2.1298259723321094,0.2123844410886888,-6.369057335986045e-4,0.71487664636367,2.2968655068731803,-0.029570727489080188,-0.2834547333905249,-0.1686796466554108,0.8333452773588274,-1.9118529417400048,0.4152247500341295,-1.241605416938831,0.04187922040000558,0.7865354553796351,0.30177846690639853,Class2 -0.9828764593699278,-0.2994360749901669,0.4292347752463409,-0.21067961845201336,-0.34394408367761,-0.8138705530738525,0.006778530633106144,-0.27771694530646157,1.4558005295402958,0.7562932771214437,-0.16799936833568818,1.8096175094293423,0.007794513832777739,0.4432448078878224,0.9717228878289461,Class2 -1.1609614880709551,0.305564619239792,0.3609254774829215,0.38090991286589704,-0.06551600489744819,1.3041512987956994,1.1747301110066952,-1.1522513064404318,-1.1235585351295232,-0.3975811637564597,-0.2837729948325421,0.44640642336851843,0.2923252978362143,0.8449598187580705,0.31750775617547333,Class2 --0.16004779488322096,-0.8456528876042778,0.4730421435779953,0.42665323095859764,0.0766685808798932,0.018931362953480246,1.3389232588762405,-0.2952315230909308,1.0228109712092137,0.5794502793346944,-0.4485544647688782,-1.3993874619104518,0.4245203719474375,0.03761272854171693,0.5085859168320894,Class2 -0.9233065516391,1.5737413723928757,-0.32609110211965064,-0.9954117399943282,-0.3739092308504738,0.5610825781954816,0.033429186834201,0.9566437001678482,0.7712793354620043,-0.8403413857985338,-1.0924933557697072,1.274143414320238,-0.45322479167953134,0.10688438708893955,0.52224997500889,Class2 --0.6715181904425322,0.16854888635159143,-0.8669088228539134,-0.4435782736707867,-1.362887491154528,-1.3262463175532888,-1.6676994483008627,-1.4036839105064198,0.6959937040306761,0.9976866778233294,-0.026613898452528248,0.41260229816450195,0.21756202820688486,0.557564842980355,0.6755150312092155,Class2 -1.113946446962536,1.6428785330434,-0.4592598874311091,1.686379296085127,-1.9210665053968248,0.9874887655136272,-1.2815910202011334,1.295253298930146,-0.7457036813746948,1.075216769812953,-0.12638688172847687,0.22515056329498098,0.11772662354633212,0.30427994136698544,0.5089943546336144,Class2 -0.8158806594841204,-1.4045895997288695,0.8430454065378091,-2.1883343980083962,0.4119083527813764,-0.7201561104054398,-1.012320530430715,-0.5042217619210514,-0.013823314909221535,-0.05837558675223591,-1.333807210363877,0.12081692754912864,0.24189875787124038,0.7886635386385024,0.7724273812491447,Class2 -0.7734255932595111,1.566326045678417,-0.10708792464344957,-1.3061838595511868,-0.3773253500328284,-0.26419892361979946,-1.8464958578229287,-0.1858185024797593,2.320125088800621,-0.47592853836304183,1.3370385416721926,0.6459840097193514,0.7681601513177156,0.8685878592077643,0.9363011801615357,Class2 --1.509394696083186,-0.328661208253743,0.2550044831551668,0.27129545400287053,0.1510247636663107,-0.2922978903270942,0.5175285726029789,-0.3610438804399441,0.9965555399797267,1.0068279921932262,1.4618669711122636,-0.24468678026963436,0.23553733294829726,0.020272251684218645,0.9858344809617847,Class2 -0.38881971721030306,1.5547351368270053,0.8839367220398885,-0.009023606338335762,0.025053924356134093,0.7214503253488403,0.49417570698905733,0.23977481917473883,1.1317747931449569,-1.1605817332002533,-0.7442168494688969,-1.8427714838546783,0.2656449689529836,0.005342218792065978,0.43105058558285236,Class2 --2.1092773767028774,-0.08666769810752029,-0.04937096536766289,2.5728013117558977,0.0425969892800058,0.39573082802736226,0.053658895235879585,1.1823716823475678,-0.5956692135218258,0.11341786952260426,1.185054116476169,-0.24132604460779444,0.9280427973717451,0.4098721088375896,0.9744317058939487,Class2 -1.2149149778949286,0.5735348783552279,-2.4766713377696825,-0.08744896258814142,0.18821114773548683,0.0014565694607638563,1.1853531429439483,0.9413580715214238,-0.2719316063810356,-1.3565355634848046,0.8330720651705029,1.091375260765905,0.3931805114261806,0.41113661020062864,0.5976030349265784,Class2 -3.2300356666724293,0.6750704827853171,0.7196815405693446,1.6006181175886847,1.5616476617179245,-0.3776763298821211,0.798881188305214,1.3957681638019586,1.1884666436769504,-0.423394296645286,-0.22259671490796307,0.22825878777097308,0.9302082317881286,0.044401464983820915,0.6985976831056178,Class2 --0.5420795718658183,-2.4710610181442982,-0.3965436051862158,1.0893831914487513,-0.6129534659810792,1.3550511339514109,0.4827310954696963,-0.577592453869327,0.9941664645407319,0.682041210563347,0.37469420253946756,1.452756432571321,-0.2584227784536779,0.3016814449802041,0.377908994210884,Class2 --0.7578972565196653,-0.7906605166124656,-1.283552264499052,1.326048417117073,-0.4689760262068274,1.516918827476057,-0.31359881877745166,0.01149309784451701,0.2347484703003322,0.4636902515701318,-1.265968630812915,0.432984155186957,-0.6610034685581923,0.9322638229932636,0.6364047846291214,Class2 --1.38474162519836,0.43712351294125645,0.666725704028179,0.4260468263397679,0.6649509777540423,0.1373694158200562,0.5924862035011815,0.44101093206159114,0.7341608872748471,0.8639705125201472,-0.39568284361483375,-0.28892917180530203,0.12956297677010298,0.26475730584934354,0.3860400195699185,Class2 --0.7089435070407825,-1.9853044275448841,-0.8399202160972488,-1.0184769857826856,-0.9905627113031273,-0.46441277275324405,-0.0745636330267549,-1.6136663402710456,-0.11117087183894996,-1.7408802165246409,-0.6774057251519322,0.744464386774155,0.2063071862794459,0.8712579654529691,0.506251354701817,Class2 -0.41435681813360314,0.07807604348786046,-0.4770316722210497,1.222298503157003,0.09649818169053101,1.5136681933781868,-0.8383611883784335,0.26823412459076335,-0.09664054237274311,0.3473678876757703,1.476299700283601,1.6483788377201674,0.56380599969998,0.925193572184071,0.07905116211622953,Class2 --1.2030406299540632,-1.4194614935599033,-0.34651632030795265,-0.624277268719183,-0.6757686949874265,-1.035290506536295,0.8775917274983388,1.2541134855194422,0.0800521923374738,0.416551218204127,-1.325464973370254,0.7950706882239378,-0.7331568179652095,0.5080344385933131,0.20030754967592657,Class2 -0.3014820001290436,1.6925738158071217,1.133371864222573,0.5551564261762209,2.0459310369817945,0.9232990177219357,-0.5162060686986035,-0.35568548225572627,0.6225056732554962,-1.2375699955848496,0.6806035386712965,-0.2211626666494892,0.3740604091435671,0.9974704992491752,0.40007906849496067,Class2 -0.8757539473563685,1.8366413576764922,0.6149979713779192,1.312457299230095,0.8786993759773538,-0.62848633593847,-0.8731704842839757,0.9415292652480909,1.1606360013668524,-0.5747241032834309,-0.6586977610541974,0.6892233405118013,-0.49875544058158994,0.053670540219172835,0.7664464637637138,Class2 -2.5268826832825146,2.141326726861214,-0.9876359039010156,0.1502944234135266,1.089553496432681,-1.8066942422603043,0.05810877607613716,1.006857346977566,-0.6301548363316929,1.2299965402953899,0.16244851412819677,-1.1207332176441147,0.7543060285970569,0.6475591771304607,0.9273907269816846,Class2 --0.3054703052462026,0.47648933302466584,-0.0711999659456532,2.2941651032344965,0.755029515406737,-1.5363621044669087,1.6323319676152193,0.8694244681193776,1.7837950399054894,0.6158896453122557,-0.12013274140821777,0.35578606139091384,0.9552144352346659,0.4005703367292881,0.825485962908715,Class2 --1.406358593520845,-1.5284567854887725,0.257715419279358,1.0796011546616102,1.5060517474126924,0.8550089425305669,0.1310958730893591,-0.8954512267444513,0.5577662764725998,-0.6807090959492011,1.2172471784333776,-0.8310593508872697,-0.9150832532905042,0.9682782485615462,0.8425866784527898,Class2 -0.5915568249616691,0.902635279189632,-0.951361272029365,-0.5339658314338624,1.1938626242892156,0.7035412280246602,1.002124781640329,-0.354252388690954,0.6128328335319871,0.8709142637839989,-0.7679190640736697,-0.9918541674004375,0.8570212917402387,0.5892907127272338,0.3360569572541863,Class2 -2.4920959795663995,2.1400342861178006,0.27789227689307416,-0.01778854721552045,-1.1978153582118063,0.12683557338044252,0.3243220322749475,-1.4322870697282741,-0.12744910753693017,0.07964576235555218,-1.463396551348575,0.744489222165478,0.13968736631795764,0.9012008558493108,0.6040822092909366,Class2 --0.2802297526708617,1.9731830087236089,-0.33252186828579994,-0.21273240443603594,0.6977925398637556,-0.5508151445031423,-0.5200485103196651,-0.5344770190726468,0.37246198393688534,-1.8434132276863937,0.5562959150542928,1.8215693357420097,0.37671080278232694,9.659207426011562e-4,0.1579122378025204,Class2 -1.9968617229873449,2.361570046716613,0.1928866335988381,-1.226975421744007,-2.0978638186092398,-0.5885541744323698,1.8932570852288837,-1.722280560259554,-2.030698919400089,-0.5977624428865164,-0.8620041848366772,0.2823388802356699,0.1842831172980368,0.4037751113064587,0.33107997197657824,Class2 --2.2943210332126007,-1.2482043001190692,-0.8326136913965875,0.6123305567055066,-0.6568960868116429,1.0699170230006252,0.39366782258587507,-1.1484227050554499,-0.7107855818312412,0.44356951291934044,1.166261869094042,0.5128648888964722,0.542561330832541,0.9467742082197219,0.09561331663280725,Class2 --1.0910621599107546,-0.9742224569181748,-0.2854420032678304,-1.4561657798829006,-0.18153434123373163,-0.3860443764769202,-0.4373225795384808,-0.46154822259742995,0.24349348717682676,0.2949902335607599,0.5192725286182646,-1.2299185391346101,0.6911458312533796,0.5928624691441655,0.08423774922266603,Class2 --1.723216698533627,-1.0276293850957077,0.8495486409682675,-0.5143370145077725,0.17101396512619965,-1.3694246553863567,0.1262169832926421,1.7412327965322596,1.4505382388420456,2.003350266497451,-0.7216691942925512,-0.7513737926696411,-0.47514696000143886,0.0020027540158480406,0.6044561555609107,Class2 -0.5527526083325673,-0.7980190826273414,1.4886251651448195,0.7222024396353163,-1.0421955565957028,-1.9934644865029736,-0.6093674572819453,0.2425343340956734,0.12552534648332825,0.10288672807084039,0.2343701047261313,1.3081819123177445,-0.759833472315222,0.4617638518102467,0.16840126202441752,Class2 --2.0084919465858304,-0.37657604799762534,-0.5362409368789918,-2.652173960300055,0.8062332987144201,0.08392832774824105,-0.5494073562936335,0.41120798275218545,1.1041320831458385,1.9057495939976379,-1.0409066198528574,-0.930442023763646,-0.19552383245900273,0.13324633985757828,0.6620244970545173,Class2 -0.33599953851767994,1.0283479056784288,-1.9214169538100738,-1.02573390501961,0.5682517890927342,-0.527425080448333,1.071422429733671,-0.5584471094691505,0.8124837194849148,1.664061628467455,0.48914952444324694,0.4860713138025439,-0.05683392332866788,0.6350868111476302,0.14419701392762363,Class2 -2.6737118361937204,-0.04781778571651474,0.9264594100305372,0.5751215825352002,-0.3310212376232586,0.6126442116519347,-2.508770052888241,1.813966371896336,-0.09652653750797716,0.3592608926060826,1.1351257431452793,-1.0511010323857939,0.1804250506684184,0.07061198633164167,0.40515325241722167,Class1 --0.8393029799760195,0.12124363425852802,0.45770899927725267,-0.49576110422176584,0.9494081571497112,-0.11620489759160262,0.16549733753766324,0.37927069926659035,0.14560919771677336,-0.6532437404485397,1.312906406020725,-0.7932924404622268,0.17585110059008002,0.04708494385704398,0.2798900946509093,Class2 --1.254015810042056,0.04092297222621799,2.119348473030922,-0.7978869074229338,0.5201553842038713,2.0095330579699477,-0.35462700835392635,0.07175173875504216,-0.7419698044669196,-0.11422412400154212,-1.137902074274237,-1.3969066983296576,0.37975236074998975,0.8638662474695593,0.5356978592462838,Class2 --1.1938118379440785,1.6422768466692592,-0.3362471095178421,-1.4693179831775935,0.5085609585486147,1.540800697368982,-0.20112598090140488,-0.578917924746597,-1.5186849829841123,0.9482451039841429,-0.19737306531264645,0.438088289692055,-0.9547786135226488,0.057209464721381664,0.7276906524784863,Class2 --1.4165763817112595,-1.0325350117743608,0.57402702631457,-1.2609428958234312,0.05133176154254836,-0.548943550373757,-0.5207304712823949,0.6242116015674276,0.22343394107559245,0.17262997287730047,-1.003068705314182,1.2937749599322705,0.6486416752450168,0.6129408266860992,0.5705943854991347,Class2 --1.275647178062205,0.9203289463071648,-0.3625758774091195,-1.0612878942594282,0.2598595985425349,1.3395692777448953,0.0014100788238668646,1.017556290873462,1.4855870942188238,-0.44796278024714165,-0.2667529356492364,0.7790518296429105,-0.7191928811371326,0.34538230230100453,0.6475586481392384,Class2 -0.49513886114408395,1.0793710900889868,-2.3446440752722473,-0.3739176609817332,-0.06200493739498313,-0.09897686631520725,0.10170918893658125,0.5942840482924532,0.1015052216422323,-2.3021680663229938,-0.11833298004290391,-1.4310806248621384,-0.6437980653718114,0.604140505194664,0.39398927660658956,Class2 -0.7342552739326127,1.883021557356185,-2.376541328892681,-0.03880172672310928,-1.3611236796200143,-1.251130335489054,0.3833166896580443,0.4133042031553147,0.7099459032619823,0.6524577053095225,1.6485297685780471,2.143277021076223,-0.653142720926553,0.9068924896419048,0.40769182122312486,Class2 -0.9637685607496275,0.597421361477846,-0.17342708271336071,1.2769808917406127,0.9145827931790027,-0.2181555904820522,-0.19000508790127932,-0.649529217391464,-0.3894984789039273,0.2617289853529159,0.3029589036176052,-0.7776726388337044,0.4104576972313225,0.6618327214382589,0.5132490678224713,Class2 --0.2784981279298883,-1.078231709956266,0.2079236854263421,1.1579233491952554,0.07538888747115181,-0.2899717573918951,1.0850664895169895,2.468371757638632,0.15696305273411368,1.3532951116704397,0.037266845337859775,0.2568757036024181,0.48561803810298443,0.9275333301629871,0.4803776773624122,Class2 -2.171630888871116,0.595805544480223,0.7954571016635993,-0.4040446466309196,0.2173084365303069,-0.05571640526355492,0.9633547495769786,-0.20776329450920428,0.7865542703334759,-0.3057568429579487,0.3717510853631996,-1.595550921494854,-0.5987950540147722,0.27863359125331044,0.17952615697868168,Class2 -0.1196650186032715,1.0300724389999445,0.5853108810425153,-1.2270914395019998,1.6482188193786629,-0.12479450226616474,0.1241635545562361,2.2595398135871023,-0.3036893621334656,0.6107851699988623,0.059205875569180236,1.5094800667622221,0.9709230689331889,0.454463078873232,0.1079605626873672,Class2 --0.24635869231613877,0.34063800429890995,1.398548840676489,0.9261278633120245,-1.413496934526549,-1.2726643687109078,2.3442934227186396,-0.5022219545055385,0.6698813842028172,-0.5215918873738525,-0.5283687716248603,0.4195553520075895,0.5415924391709268,0.8856653887778521,0.3345598471350968,Class2 --0.2524052035948927,0.7060055732351003,-0.872842337894133,0.0920732441267653,2.642003069298205,0.1416813833874721,1.413421720107895,1.312425170760115,-0.5977443782876233,-1.2471800075089257,-1.0935569979031283,1.287292107302274,-0.8547177850268781,0.9479575788136572,0.6809796348679811,Class2 --1.171204965573156,-1.8892244173163961,1.003916089260502,1.203766538463815,1.088229640957814,0.05114539081523651,-0.5957664538004932,0.7363727150652911,-0.6492062728659245,0.2805774614731854,-0.1683021915711085,1.675608845326998,0.2677907752804458,0.5845918727573007,0.8443515454418957,Class2 -3.7383687260440883,2.392951478398925,0.7451456327363765,-0.17347327656481223,0.21414657860221334,0.9508687932689309,0.29980723281325267,0.41185525414918023,0.37229973924410076,0.23601441409052945,-0.6120338420753846,-0.4436848930730518,-0.17671637795865536,0.043678989401087165,0.6162464644294232,Class2 -0.3097071133691044,0.6431977098066927,-1.699628272431846,-1.9624142747872466,-0.46428117926278434,-1.0023669646262494,0.017583672219293488,2.24645783705447,0.8228884501673559,1.4711019198204844,0.7146326900665071,0.2686245501084162,-0.7807819256559014,0.11376416450366378,0.8573982284869999,Class2 -1.6485748467679993,0.38492762667881675,-1.2888174085241075,2.518763254089192,0.6010177884692127,-0.21613774072730232,0.9626808159991778,1.2945100130027447,0.21681753905014886,-1.1224453785912212,0.5738495623592,1.5890696997529028,-0.26795203471556306,0.5842867407482117,0.2964231895748526,Class2 -0.37884336671137697,1.360311467832846,2.4524700871203455,0.21139990320058552,-0.8407352658252876,-0.16058420900040504,1.147329091762066,0.37898676072353127,1.978448484300123,0.09969698880218644,-1.6372731387216841,-0.5627774875713798,0.07112866407260299,0.6619141716510057,0.1221662915777415,Class2 --0.9354793432274935,1.7053389313085594,-2.276512628701016,-0.8529644982881853,0.6075150982371619,0.8421943398458316,0.4952633252562606,0.3261322500975186,0.2436251732929695,1.1266246899777161,0.981548557101988,1.9313343468332909,0.4417783124372363,0.48619734728708863,0.39788477146066725,Class2 --0.14408846997086736,-0.3415917020388476,-0.6225638343188582,-0.01219985917182297,-0.26587757692130604,-0.051837296435408284,0.06059719227895897,0.9370751252749366,0.2612765999882822,0.7472676918749857,1.031259574571359,0.4129856829886556,0.41453946754336357,0.8472046218812466,0.852251818170771,Class2 --1.499714311934736,-1.2327164178138552,-0.5727328454253872,-1.3576172017451569,0.35467758073982447,-0.6244235360986533,1.2080949431160086,0.15252214025896632,2.450966099699781,-0.3914128603138219,0.19455624669331242,1.0789342185960142,0.6383723397739232,0.08837610553018749,0.7127237182576209,Class2 --0.4193898049825194,0.3740065846033956,1.0721912748452591,-1.444177903158149,0.2511973708244712,1.3671924738724108,-0.8450155101345469,0.25260200202187383,-0.7515758763699845,0.041540525493410384,0.5927543149253018,0.5297407041197505,0.5856238813139498,0.11656279885210097,0.1472707458306104,Class2 -2.4687982200285616,3.568915822350417,-0.5908383637748288,0.18066560698797818,1.9705415617154005,1.8300337169595347,0.8694426135686902,-0.7888218623384243,0.7145553195001271,-0.07086591940889808,-0.5579757715971715,-1.497167862409974,-0.36502903001382947,0.07569518825039268,0.8166493470780551,Class2 --0.9410068750581193,-2.0731264737625557,-1.3708913586453464,0.37000995982703844,0.3129924882053659,0.6110595352658791,-1.1810864468756537,-1.4344723880374148,3.566416102880531,-1.1211214815983885,0.9626561942329541,-1.1003047607268255,-0.03930507227778435,0.12021393212489784,0.16664649010635912,Class2 -2.679213274993148,0.7462042663786863,-0.4830459110492385,-0.12024211532154598,-1.4015269577836058,0.9855850399079974,-0.5284635277025335,-0.7403913980018457,-0.2473490465102819,1.1875928640689608,-0.5210488285050107,0.3687728568546226,0.11391481524333358,0.8687120156828314,0.5458263845648617,Class2 --1.3829816792324496,-0.761095865112875,1.1931037981370707,0.9388582864672129,-0.7674646541377638,-0.2888074219318002,-1.3518786361150146,-0.46625198158867104,0.08752308539557449,-0.6292227222598189,1.3994637762863589,-0.5282884722228909,-0.16381301544606686,0.43487346707843244,0.880418325541541,Class2 --2.9869201130585177,-2.6249219487234505,-0.6730061083795765,-0.9877126988644841,-0.07113607754389076,0.5103531224245693,0.7276750619824733,0.6627908023078004,-0.4185582821552198,-1.0953409986253948,-1.0051466309725452,1.2472353460965042,0.7170766517519951,0.7523557436652482,0.34792337706312537,Class2 -1.8882907350091496,0.953657914316847,-1.2690884051131959,-0.7983803728293657,0.8971190429988561,-1.4022676443590054,0.7476765935035886,0.6716252689771277,0.1745434731807092,0.9868851335575268,-0.04743290563784949,0.620774036112955,-0.4840310998260975,0.8131643547676504,0.0317612555809319,Class2 --0.08083290928056469,1.0298330558875237,-1.3933927655566525,0.7801295899098177,-1.303131115463487,-0.12810159943447774,-0.31164619263019844,-0.7393246157994828,-1.0168933389647545,0.141727344108625,-0.8041742766349332,0.5357998770465946,-0.05705461837351322,0.9600846676621586,0.23841197416186333,Class2 --1.7512147932507445,-2.0537286184882904,2.123868369873493,1.4473614254456537,-0.7629855871599935,-0.45225935941041195,1.1629460640434404,-0.18533806738990105,0.15179770556645006,-0.8552686236432673,0.8470622705467875,0.7823446546308056,0.6854521851055324,0.07099089561961591,0.3997442605905235,Class2 -0.8331006990939398,0.841708254341436,0.10132387432166973,-0.12923358347888103,0.9876904359796043,-0.41674430583923294,-1.5796174350442262,0.9510032709573387,1.1473949965122499,1.0259490471867438,1.8725882337916673,0.7066956926576541,0.8288763472810388,0.6545622039120644,0.7350012694951147,Class2 -0.8873347878826752,-0.39901606076127316,0.8014013624389372,-0.43019687681889784,-0.6553853014715829,-0.9767024409466908,0.6822927689030807,-0.9240403786919825,0.08192518539769615,-1.2456402466194012,-0.33466615291259305,1.3361561290147383,0.059084718115627766,0.04485502257011831,0.30626052361913025,Class2 --0.08135504935359826,0.2571205004694203,-0.9303558481366669,0.3254745207860824,0.5700408596368464,-1.0366598870597472,0.1006737083034945,1.728849885767546,1.4002161089557434,1.4910455396855804,1.283294018465018,-0.15924261948400276,0.002439796458929777,0.8025310102384537,0.06902334955520928,Class2 -0.262108079775057,0.2454546758585823,0.9219545347591659,0.18952531077694781,-1.1128465287379643,0.7536613536386019,0.7408187920668409,0.21927741750830873,2.1465648030236997,-0.9607523175498892,-0.07902923579509534,-1.1864826429240423,0.7612753608264029,0.025511288084089756,0.7005071311723441,Class2 --1.3263974188056706,-0.49413293456597485,1.0183274885828606,1.0398009111543203,-0.36492821933360575,0.070822737334529,0.43188241381000747,0.04261154395061653,0.037878249360025984,-0.5183766585191756,-0.012730685022374734,-1.347648313678902,0.8686553332954645,0.515801751986146,0.9012628502678126,Class2 --0.1659284846064536,0.8456111677037136,0.4409536751437533,-1.1369495901301274,-1.1127640350213703,0.40937163289996564,0.9173014480861781,-0.04054404511722387,0.3350311692209803,0.37390504249657586,-0.21025492006529906,-0.221923906546793,-0.7884888686239719,0.029862420866265893,0.5807188332546502,Class2 -2.1240801186992777,1.6704903347541464,-0.01942816330239846,-0.8946419823082092,-1.6567597373136889,1.100236270741571,1.1380796162442892,-0.7173358491019817,-0.046210764026497236,1.1214408466580104,-0.10427150256868362,-0.10477600538938941,-0.7294994457624853,0.27011385397054255,0.7595972861163318,Class2 -2.727812844128834,2.801557346889474,1.175298775904336,-0.35947684543924024,0.573331150789726,0.4951374968627402,1.1732128984360135,-0.12173404650384533,-1.5447303766750462,-1.7025232082502948,-0.980774662797435,0.18334920267043758,0.26285350462421775,0.29055785201489925,0.7045999898109585,Class2 -0.5874815836132099,-0.43787637058293516,0.5890150921353144,-0.43672898600291093,-0.6706445839429286,0.25217231527679,0.7159147808564744,2.5323931730180504,0.7565030439024721,0.4074504559519444,1.6470253169248759,-0.21148281638482436,0.24082445353269577,0.25394342048093677,0.6306078045163304,Class2 --0.9644994369861102,-3.5414564661501826,-0.9969334485260837,0.4920290593911799,-1.5869655215043044,0.5762208813198341,-0.8627395159500946,0.8793552228102226,0.2739320338393063,0.5513658434683576,-1.300553985129337,-0.5757980800133267,0.9191090143285692,0.9310329204890877,0.015313834184780717,Class1 --2.977400156532516,-3.2674185007864525,-0.9643726706491091,-1.0545849022235378,-0.7359757366813221,0.6907360646518507,0.19624275199406835,0.6110644871981792,0.24995726085245465,1.0640276963815258,-0.3223932308737301,0.019353739695829295,0.05509687075391412,0.8702906512189656,0.19035462103784084,Class2 --1.5471633045389475,-1.0922897244457055,0.6209573682918327,-0.8666719684100289,0.03648343588709728,0.7086470953292253,-0.7582385459256145,-0.26225032815965954,-0.2674687642848966,-1.920920687325933,-0.022175682389720966,-0.44768044260559564,0.8652539229951799,0.8270262076985091,0.502062002196908,Class2 --0.6222884064128331,1.3505838958600984,1.4154039483604688,1.0699118622815778,-0.7399805384529641,-0.08971952777492759,-0.06266641553776751,-0.0881890938704881,0.7339300196293115,0.8420044068223732,0.33228663255762325,2.27855958242292,-0.6790478564798832,0.36467446223832667,0.3526286536362022,Class2 --0.5668595376920755,0.07282998517765907,0.44659498971846145,1.1626571856346841,1.179750340444152,-0.9948354304460695,-0.6900282233541779,0.17354466527202153,0.02163133469977531,0.1607215367579412,-0.4795984882751106,-0.3350120505997355,-0.647762652952224,0.292258454952389,0.7277376796118915,Class2 --0.8988472152680029,-1.1907154642865074,-1.0300326621821256,-0.8327254418841463,-0.49064545480227956,-1.2535211225203378,-2.0313596624070294,0.910226131629487,-2.098804868649648,1.8464927627033116,-0.3388026250158869,-0.23126158027188126,-0.6978050619363785,0.2687047387007624,0.8503963469993323,Class2 --1.2822712993548155,-0.5434336379529672,-1.7285904367465599,1.1338597777139094,1.0187968182200138,0.30635289491345213,-0.7980740854470698,-0.5081507106019105,1.2763324649796137,-1.148637679130738,-2.0141721600652733,0.07789062817681307,-0.2370854252949357,0.8315495504066348,0.4620618575718254,Class2 -0.4478220084266624,0.21445739086854965,-0.07417324374392331,0.24215690433719386,1.1277490922390387,0.3012159431386403,0.23438789001435825,-0.876718168819218,-0.2851753225272064,1.0687747923507447,1.727658729909375,-1.9610648367110093,-0.6767851123586297,0.7701278515160084,0.2518564513884485,Class2 -1.953301769420223,2.330920135311807,0.9262000567845122,-1.292310272582546,-0.2702189274538632,-0.2211705296662861,0.9765881984773566,0.1876985394121701,-0.6505141796914907,0.5754642705718919,1.129395389052608,0.5206619588577819,0.7974475836381316,0.5428230839315802,0.9528036471456289,Class2 -0.9047714100727624,1.9039358276903566,-2.9279706888854515,1.2263578520891674,-0.18848433584071736,0.9341475016189499,1.1744039061890335,-0.5753181972850933,-1.9160750690529056,0.31363789152759647,-1.1776632104029563,-0.5506576941929348,0.11892273928970098,0.5908661433495581,0.019052669871598482,Class2 --0.4177291086461562,-1.0135565319783457,1.5439069263511613,-0.38360617519950624,0.32419208467306143,0.4504752303490146,-1.1781861966980645,-0.7670373291471693,0.7373190733863407,-1.2736849337256209,-0.6424108139511353,1.9445605425434762,-0.7406731909140944,0.110211081802845,0.3693037643097341,Class2 --0.27264448529016383,-0.9464334746047413,-0.6436446539066366,-1.4504961952480655,0.08947689975814699,1.6667431959115724,-0.06524619257844667,-0.6794171792949629,-0.8267653255563094,0.6318759678660909,1.2539161886080452,1.1831304277402201,0.8506410298869014,0.8048638985492289,0.37699381983838975,Class2 -1.9015105545985183,0.17471713119730892,-1.1921898669263837,-1.0398689939522268,1.3510340262649378,-0.2720064404091926,0.18706487188183551,0.0066303591167281755,-0.3964349990277754,0.03439744416198756,-1.6682253697075076,-2.153729173751242,0.24067734740674496,0.6316105406731367,0.05802783905528486,Class2 -0.6796505222803029,0.15352729566178985,-0.25142522401706285,-0.7208405688545377,0.1343347714477414,0.6115723054674667,0.20553633504994828,0.9088727429634855,0.6242248183132036,-1.3450686210683003,0.9820169328024632,0.5043794327272223,0.20046650059521198,0.5634382672142237,0.915242426795885,Class2 -1.2371667720789548,-1.2379212953160939,0.6494882580894288,0.8547801299601304,0.955883529843384,-0.17475326643241096,-0.8806922436473819,0.3018694055594835,-0.0021886709478559888,0.6295967594259053,-0.520062128727859,-0.6196571792751352,0.8527544201351702,0.8593672101851553,0.6657715865876526,Class1 -0.20979469591405686,1.6306371182771058,-1.3361095491619628,0.2733909489151795,-1.052275383768449,-1.1204668925311911,0.08921704676221118,1.3030703552712433,-0.6865788151611051,-2.076317720091929,-1.1180262134911623,-1.6457432043156537,0.8721918887458742,0.6680506863631308,0.506481465883553,Class2 --0.43217617034430383,-0.4736429254313516,-1.858517589258596,-0.4900823820189163,-1.5036236302405928,-0.005781688087622229,0.2681885085923396,0.0808353364485129,1.5184309475066102,-1.0801007721801956,1.4123919358403823,0.8769991052276312,0.5979841547086835,0.9942249255254865,0.3791689909994602,Class2 -0.838327971246196,0.7478683525951604,-0.18911805721512098,-0.625627000395069,-0.6258917902214646,-1.133189490699888,-0.21158141372565203,0.32849836315403164,0.08584792625564455,-0.32707141949277374,-1.6024122544443957,0.3984373590450843,0.7841648841276765,0.7834370019845665,0.26036644680425525,Class2 -1.3671489652630477,0.9563150228968229,2.127589074884166,0.6186595387382661,-0.8046390829539831,-0.7503961535528698,1.3978594347184032,0.6451927339318588,0.7110736602104867,0.6609758234046076,-0.22291237240611764,0.6461550830877626,-0.07721585547551513,0.5163719840347767,0.5798249274957925,Class2 --0.21165330951103667,-0.251956383606856,-0.7086277247125301,-0.41440192261019704,-0.08820893348615307,0.9286677004557381,-0.5313299751585203,1.0726026931225356,0.4985385607469856,-0.9296822243339713,0.5109959935691981,-0.988925764477368,0.5753365280106664,0.5368968239054084,0.5976181752048433,Class2 -0.8536426622221847,1.761268831190377,0.26272443051556826,0.227355861193604,1.5790617903686097,-0.09000135300294743,-0.012097238600374313,0.10307450654284263,1.3557056094999502,0.6886421159865712,0.7119990584715539,-0.04135419362973378,-0.6783157158643007,0.4791741496883333,0.2872706181369722,Class2 --1.502325008818472,0.030841723591805037,-0.5182296642554496,-0.7058733117732549,-1.0886365298878458,-0.7427595558245366,-0.21010066282106887,1.5677952016444316,-1.9070165493940479,-0.05800603366171483,-2.059291833626288,-2.184484052840503,0.3436030577868223,0.8033518972806633,0.8499004072509706,Class2 -0.828754933635539,-0.5938895775911642,-0.8457384904293044,-0.7086031115497611,0.2557272877457393,1.1763447718826388,-1.3523799891096153,0.021725609435728283,-0.6255250987018072,0.2049341756422091,1.1139238317726208,-0.7784566157935737,-0.47291371831670403,0.9375738038215786,0.05899901478551328,Class2 --0.5940510982712854,-0.17247063249462288,1.3464487756825234,0.5808527277894802,0.10477130648149557,1.8688783054163673,1.7315364531152313,0.3294462182211084,1.0204799982628974,1.1724033700140155,-0.09455470181405402,0.7546041297420921,-0.5128778549842536,0.5780721786431968,0.05901822308078408,Class2 -0.32835223598393937,0.8120067407091994,0.9783225360785096,1.2454117189123763,-1.6358292734259336,0.2891810166364309,0.16795172760807567,-0.027047016288943935,-1.8109436639926015,-0.9091570360800661,1.2291077268765611,1.53297366182793,-0.1407690984196961,0.29803200787864625,0.016696982085704803,Class2 -1.805434433485488,1.25815757393342,-1.4553677654066497,-0.2934393088188636,1.8497939605154523,-0.48721721195994555,1.162605504405621,1.6230229790418689,-1.255936078309305,-0.3749862949550617,0.20415978299748982,-0.8241971850804203,-0.6137510510161519,0.4471053765155375,0.4437629815656692,Class2 --0.5514441796307115,-1.1588450735503317,-0.4638690897536533,1.7090996734178212,0.11486984512162668,0.7352216521370653,-0.6443069808076838,-1.1100151736909853,1.269740949352879,1.1444103648481991,-0.43540253307264065,0.44807445962052567,0.21320206066593528,0.6190050661098212,0.5046829550992697,Class2 -0.9975144897211632,0.29140529256250974,-0.32693259863044677,-0.6694672915259194,1.2051426467711281,-0.3414348551134478,0.5982893883024198,-1.6231163225374932,-1.5787932370639128,-0.4810284052335093,0.1656611174442568,-0.9316180136527812,-0.4045495823957026,0.06308922125026584,0.536510301521048,Class2 -1.836164241184216,-1.1501924546991613,-1.6983384024088115,-0.30886247333557315,-1.2020280811399364,-1.2676657956181698,-1.8395862123975535,0.26966246321475756,-0.8376599310741872,-0.3927057212707854,0.34952170154720885,-1.0038285766954917,-0.24688555393368006,0.7107981180306524,0.14374799863435328,Class1 --1.343864490712166,-1.2471160495140134,0.3312852575211477,-0.4392416646845302,-0.17460211540962034,0.9664595277518733,0.9240400467736273,-1.5465299147757616,0.5830211089219625,0.1633117169946813,-2.091579313738889,-0.36617958399301825,0.47720078891143203,0.766424156492576,0.5673437966033816,Class2 --3.498762924093704,-2.3101389969167325,-0.17309832916344955,-0.2685427966522733,-0.35857253549046036,1.4483793264903904,0.971774194072148,0.24462514858285678,-2.4188162505715445,-0.4670185389564098,0.05230765472626635,-0.5589674356806287,0.11201753560453653,0.56119275232777,0.12825380638241768,Class2 --0.3991371409869193,-1.5536719002887833,-0.529250469137611,0.40363698620254024,-0.722070364257088,-1.061976989155475,0.23727453366264942,-0.342544736204363,-1.1635070283146745,0.09284130641244102,-0.42898181874223756,-0.014096709277590394,0.6111974623054266,0.838478886289522,0.14944729954004288,Class2 --1.9676091689729645,-2.4299337696020182,0.9902502521442363,2.156017544773045,0.22673842235369274,0.6169299926787872,-1.4747369339763194,-0.719650359771361,-0.5478882773670424,1.022232160347133,-0.2070934868715481,-1.339065743343777,0.7574425963684916,0.20493715582415462,0.40110802510753274,Class2 -0.43209870041495924,2.279997796303433,0.9144438757823983,0.8829335314465816,0.27550807351278683,-0.35323969759458707,-0.11154457156497133,0.633117240262491,0.5639529054957054,-0.5984120409549496,0.6882478583185355,1.5335498380631631,0.17204607604071498,0.8712915265932679,0.7238711591344327,Class2 --1.9013993050746953,-2.0800445999732378,-0.44062099088690226,-0.8808012540056367,-0.33441370075627036,0.40755286073061914,-1.642266006006559,0.3166236193390202,-2.158473689321837,-1.2241180069641735,-1.2483593893423361,-0.32530068636738446,0.827401808463037,0.7311362116597593,0.4364403411746025,Class2 -2.168898379854651,1.8970448812592928,1.1384655591548598,-0.5315206008369986,-0.09070616282179798,-1.446389304094635,-0.3819829609241145,-0.08058622424160948,-1.5405724134788534,1.4725010114031218,-0.021924235684944036,-0.488294674822492,-0.7240761923603714,0.5754954766016454,0.3521833026316017,Class2 --2.439764829097595,-1.2726510424070714,0.147641989561402,0.07608258045956544,1.584605003818536,0.5575481287126953,-0.07499358041501648,-0.3106016134170342,-1.9868156245467452,-0.25248409111321113,1.6940844384810978,1.8271924520949745,-0.6032018424011767,0.32475380785763264,0.8546590369660407,Class2 -1.2886078667479566,0.8479277071718022,-0.557209278479278,-0.1274478511911323,1.3661727028893957,-1.1941717838622046,-0.4406539824831475,-0.2106039385859591,0.6493668651432625,1.9680719068515848,-1.4096163807106763,1.3183908567373972,-0.977104899007827,0.8446536145638674,0.8189468686468899,Class2 -1.6744712945919278,2.2978210296852497,-1.7632526913899471,-1.622442857609767,0.9015181299454266,-1.001568877278664,1.091375246885461,-1.5935227870175452,1.2115327188869338,1.1581912689575906,-0.15929557725470408,-0.8265159342272855,0.058473287150263786,0.27880890225060284,0.5280970630701631,Class2 -0.32324718514001693,-2.0694600071859486,-0.5248695751058329,0.8045673219653718,-0.3355045539542166,-0.7314128582411488,1.31899099628356,-0.5474152901401922,-1.271588070715843,-0.2909215567911075,-0.30845913075255915,0.2711430915022291,-0.35504852049052715,0.9238973930478096,0.46210848982445896,Class2 --0.36367462928949346,-2.262034620081411,-1.7042672716174478,1.1594249267626804,0.7313062225450568,-1.860158032814207,-1.4234180864854078,-1.6469682308921587,0.8810912454413266,0.6915487293280544,0.33535622703822104,0.043354714644212665,0.13279370497912169,0.9604291943833232,0.06418450432829559,Class2 -0.7593217480206099,-0.5173256246186747,0.21948035479351546,-0.5314400703659753,0.05020667943062526,-1.9100358158885653,-0.08159551031787257,0.4198940862970815,-0.2263588611864507,0.4051712629904677,-1.4888086069409534,0.8889869719934925,-0.8903866591863334,0.31557242292910814,0.5652375551871955,Class2 -0.978683750607676,0.38492114178794684,-0.02743543187132115,1.3825194689227907,0.46726552243311964,2.670236991115869,-0.6551781330308585,0.46977927516722184,0.07584726681357754,0.10610246769529508,-0.4276255966143826,1.2033240416982478,0.08144694566726685,0.7408366561867297,0.1264922646805644,Class2 --0.03261490595423128,-0.18653198431360285,0.14824905669229604,1.0189501669281071,0.7751188065074451,0.024250392123926573,-1.1293899532636984,-0.8866241654051678,-0.7533635763367552,-1.7081685078616702,-1.04395363038871,1.5771949881030534,0.867605762090534,0.54575823713094,0.23497734707780182,Class2 --2.5466037537871324,-2.175116526797436,0.6001513067348122,-1.3013658104617825,0.1875064363203001,1.1350851739041192,-0.6738444949278286,-0.5431780619834445,-1.0813802794951508,-1.0774648439829904,0.25107213757855307,-1.018248198661016,0.513403327204287,0.29876646935008466,0.8324030037038028,Class2 -0.3618754227860076,-1.6730377695561733,1.7843293222470007,1.8323135078082762,-1.0063958637104011,-0.08192503147599567,0.7573921490380168,0.6541879539511639,0.3457354558712211,0.6855557622070216,0.006903839843572531,0.09259460786969483,-0.8362910444848239,0.15203471342101693,0.09086010418832302,Class1 -1.2347134207076789,-1.8904112911765736,1.1663888525366397,0.12775343452215504,0.10336643045161893,0.9930865249420132,1.4087449884709757,1.3623871976917254,0.9304251590556132,1.9839444063905478,2.796778532445287,1.5422301690895055,0.45564739825204015,0.9639604326803237,0.5633478928357363,Class2 -0.4273366619266572,1.1101088461891042,0.77541203423542,-1.0146135445697897,0.10498746450494664,-1.0329013195666306,-1.6373494103365092,-0.5505321965890831,2.9830502648057275,0.41559317073871027,-0.4611992396720657,0.7391378892302906,0.4620615402236581,0.04534077015705407,0.05946197430603206,Class2 --1.9569542842665812,0.6390263021875302,-0.6621867921091055,-0.4829475817365442,-0.1033807871932749,-0.2432196231729899,0.8558921438884253,-0.6415983187992245,0.6780481690066836,0.014739316639589026,-0.6046752432579641,-1.3680996299978723,0.6721555204130709,0.3110185326077044,0.3195108398795128,Class2 -0.7152289019125954,-0.30754641673281446,0.7720098590805293,-0.5732710805410153,-0.4408021025883526,-0.45460021968054415,-0.8201268003614278,1.5792828806735852,-0.03152403298520147,0.49580459759847206,0.21325168981707288,1.9886931770596719,-0.5680097350850701,0.28010313375853,0.8077608763705939,Class2 -0.5576745444468221,1.619419281623178,0.5176966743274707,-0.5430219935167225,-1.189612665436241,0.5520722607462824,-0.7976480557461668,-1.6395178724747828,0.6891020332334438,1.1598685880411406,-0.1606562760735495,1.2053467318179623,-0.485742480494082,0.8814258696511388,0.5461765811778605,Class2 --1.5061323624334746,-0.6769685575700508,-0.6847661539500268,0.5094096444960639,-1.0751289852555181,0.30197971432570364,1.5503250060090048,-1.0754589705841369,0.7272700258202219,-0.2196877242727947,-0.7018881979247753,-0.8628484461489212,0.4321690951474011,0.5613095245789737,0.2643685285001993,Class2 -0.7477879011863608,-0.41894882088483293,-0.5503633661227767,0.8180989339599383,2.5994591579748385,0.4038201920478269,0.9689977828892734,-2.007415184322429,0.9724423326477236,0.31979777418791594,1.1004294800494454,-0.6681671494336828,0.831595943775028,0.9853497962467372,0.14890114637091756,Class2 -1.0005628974314333,1.7850017396241087,0.02078251072278917,0.7026186801650467,0.16771195592827637,0.43034243348566187,-0.7810607290523314,-0.8349053506177134,1.2413031736482472,0.5242388665459611,-0.9641726800377478,-0.4603688629498737,-0.26946610352024436,0.8059927860740572,0.8585485219955444,Class2 -0.504233828251619,1.2174414190891933,-0.1703665120006196,-0.1079216895983101,1.1229442713010165,0.11735747516957784,0.6656176342769112,1.0234555616196743,1.9847363150793902,-0.1839139605595114,0.6793773264890273,0.8406236290448731,0.5222552781924605,0.05889264144934714,0.22426221054047346,Class2 --2.164031412036616,-0.7851014925853753,1.3601065218406847,0.23058524562579802,-0.9151783242255559,-1.0426638705146352,0.24654088715458322,-0.5489375408000687,-0.9099190898707483,0.19915131445846676,0.20821396579830814,1.2967366325281853,0.4004902979359031,0.9250466185621917,0.907548178685829,Class2 --1.2922074106415278,-2.144845513548056,-1.0066780779343758,-1.8341717260741524,0.07777589047535194,-1.6318800307942325,-1.3415682813271874,-0.2928283570039182,-0.874718416224322,0.975010659919715,0.017245565188591973,1.1088785321370658,0.4697081120684743,0.4295066436752677,0.5764142645057291,Class2 --3.372704005306585,-2.644180780015941,-1.8972746516592431,-3.5101508097739336,-0.521814412949024,1.1300574665660519,-2.421646775789274,-0.98126078085776125,1.5248752501815839,-0.3181610121697577,0.4807466318722597,-0.373170441339653,0.19723511906340718,0.14352148119360209,0.7266732165589929,Class2 -2.6368621744052723,1.6847221963201156,-0.9165121222058801,0.4988495939527013,-0.8971295588766947,0.3051224892497424,0.2793119374841937,0.09597039774488823,-1.2067143307105783,0.4292241351488534,0.27570989467231444,-0.37303087741933044,-0.27846397878602147,0.36520221014507115,0.6431416017003357,Class2 -1.6832935798526631,0.5592606532155102,1.9537922311988603,-0.013449168015297403,0.2679331049765422,2.3059121998778873,-1.6518204249212194,-1.4797278953155837,-1.2043587934096598,-0.2602785250198205,0.7357664031534359,1.2775708157057495,-0.03229651926085353,0.026137619744986296,0.31541845155879855,Class1 -1.7583290408812104,1.8105015685416992,-0.5198781283532117,0.5216790169121699,-1.1025391656197165,-0.004985212857505399,-0.22174473605227965,1.4634330097226824,-1.4921073482079805,-0.9632828705887025,-0.11196493171743346,-0.7201260227433041,0.2921566809527576,0.3033013248350471,0.22745910449884832,Class2 --1.5391843355633634,-0.6501302562545543,0.9108641583418098,0.1037345275052222,-0.8694954787813002,-0.27786497580121444,-1.2099051466502475,-1.1027050469677613,-0.10394531562071996,1.9612796381042945,1.0127048221267183,-0.32585858621750174,0.29823793983086944,0.8921100338920951,0.875714423134923,Class2 --0.5119859628995271,-0.720044357085029,-0.774444711081486,-0.24727305625316973,0.930825716974658,-0.01512428309387409,1.5296773717946732,0.25983989940117297,1.3591929842476511,-0.6639834652407961,-0.6627433227041027,0.4656054971131585,-0.5746974861249328,0.16758001316338778,0.682228886289522,Class2 -0.8084159461780697,1.956044260022607,-1.0805068393200972,0.24365720467772548,-0.5364819974530143,0.17127118795421595,-0.5371939387605225,0.5904113766909251,-0.34733583062788564,0.5267440939159562,-1.1897167572474843,-0.8492835297797565,0.35008050967007875,0.9581630970351398,0.7852184385992587,Class2 -0.5171052887797908,-1.0129510553010541,1.5718324811414728,1.162581559038792,0.5646672444806602,1.6847425707304784,0.86764978811677,1.3617235446503873,0.9344902710992049,-1.420317015995907,1.686925090332495,-0.04052987439764666,0.16197321517392993,0.4671841587405652,0.19017509277909994,Class2 -1.7233465447087584,1.3969334363983608,1.709815117259748,0.45623789180781266,-0.9139963440072819,0.02712351940727129,-0.22137366578251336,-1.688723702337105,1.8741574156045073,1.2191114090198425,0.2673428982785477,0.9838199763394343,-0.2005923823453486,0.2730386939365417,0.5126821130979806,Class2 --0.1719126944820077,0.10607386093981815,0.627682311760555,-0.48041046751105015,-0.39149476092695856,1.1149027028520382,0.46133660828435397,0.445802432005435,-0.7530152980954299,2.369240645600259,-0.6479242520794476,-0.7934794459980012,-0.32163191167637706,0.17021977179683745,0.9985844297334552,Class2 -0.26927472402553676,-0.9884583299935784,0.833327349218073,0.47711518552283977,-0.29367572416618454,-0.22834380471426907,-0.8824147654922295,0.2560860081256637,-0.9093055452662794,-2.2884915232661633,1.2670600323412677,0.7828314245640046,-0.4900251906365156,0.31432751985266805,0.5876774925272912,Class2 -0.9273603739027212,0.26391789714818165,0.3270752983906226,-0.3131104166025291,-0.6216284147796494,0.5500772048132958,-0.042582696431095364,0.7305533995018167,-1.7448261650700567,-1.0677358204783722,1.9999597196975816,0.06259501901677661,-0.09346123319119215,0.5062160342931747,0.10534724988974631,Class2 -3.6458326138445054,2.6675625751185734,-0.41850582633880673,3.1389861308695615,0.3881605882056245,-1.3689267962927316,-1.5353210423702661,-0.5776287872953948,1.2470313085252163,-0.21091561064949252,0.5274174779142297,0.20681914717317082,-0.24918118678033352,0.23699829610995948,0.09103335184045136,Class2 -0.41436057836842055,0.1436316965661252,1.457308950220687,-1.6482739720120945,-0.7942493932288761,1.307364748100345,-0.3122554066832918,-0.5331249579135888,0.3080498778322288,0.4688525371590478,1.7943110064579246,-1.0401383110021347,-0.6678170128725469,0.11858162702992558,0.8882646970450878,Class2 --2.0411732002318494,-0.8835889683282752,0.8105822232106339,0.2805921424225434,0.7360522538931188,-2.800266576540447,0.12362911774792468,-0.611694782675672,-0.5779512637886106,-2.2266584677445787,0.27195984952373825,2.0942804626949196,0.9845979493111372,0.9311576280742884,0.4172766909468919,Class2 --0.5098420991049104,-0.7367261327497319,-0.33883483859352087,-0.15548430671439611,-1.157945191068538,1.3199572071584948,1.51276260619355,-0.36811662512673077,1.2171583073578243,-1.162177761328603,-1.3488954354838973,0.5978005226990167,0.34125814167782664,0.9512135214172304,0.6330570438876748,Class2 --1.0609001344767819,-2.31341552083222,0.7707825860695052,-0.9149927145149841,0.3688016564104525,-1.7014390164091946,-1.260238406586191,0.9695529840048008,-0.7955941904456367,1.4322014234704954,0.9800567366621057,0.7225686779851994,-0.36538714822381735,0.3050012111198157,0.062430730322375894,Class2 --1.1786127798038868,0.6195166070182727,-2.2132963813222672,0.8079286767934675,1.285563384424191,-0.776910267771389,1.0683162211556876,-0.42648459515921167,-1.209519047396947,-1.9419742040985013,-0.1235813348194654,-0.9322135710921721,-0.42182021820917726,0.919231089996174,0.0404466416221112,Class2 -2.9036670633181276,3.070847599959165,0.2655971458512865,-1.8789816660318428,2.2305138150638575,0.6948214562239846,0.35466961989130713,-0.7998923420327447,0.4011542439071108,0.2732428416444581,1.1311981427359172,1.774103155242406,-0.4558826629072428,0.8360644201748073,0.2607936505228281,Class2 -0.05501079724620461,-1.0557385085988282,0.8576295292711809,0.4903139383532704,-0.2924617579618515,0.33894123677891225,-2.0947896921567364,0.3099681449312096,-0.33197183239684874,0.41918430293533293,0.4239909878936437,0.195440767714511,0.33939212188124657,0.7450629249215126,0.3967003633733839,Class2 -1.4099871191027873,0.13042497471885917,-0.3708894075889216,0.6128807123556717,-0.18079869331489792,-1.1758670708517598,1.0359418193849246,-1.333734098621579,0.18909994622019927,0.14955267647512144,0.01253808053430339,0.7623643140690815,-0.14904644805938005,0.9148534771520644,0.5595395560376346,Class2 --3.1073178067153973,-2.1252510660159394,-1.929600375417261,-0.020239102155979044,2.0229943418353935,-0.13197166812875832,0.6248017248327229,1.0227784637664894,-0.18000984879730084,0.33112938398948644,0.3287906410018792,1.551895057158091,0.18265513330698013,0.05646355915814638,0.4998326115310192,Class2 -1.825446448492745,2.84725795238557,0.3223302760936514,1.605355832338561,0.5126465702816563,1.1558592692491179,0.2955601742867741,-0.21694221030639813,0.1706869705479334,1.396640029428532,0.36964334704501955,1.0641806473208555,0.8365078759379685,0.31376082892529666,0.8583147840108722,Class2 -1.0500175156046834,-0.0875969186319569,-0.08677308638529993,-0.3195931297549649,-0.5125078946857378,-0.3600647601147938,-2.547896759635197,-1.2883812471402922,-0.5382971494548378,0.9101725680938857,-1.3930125307728165,0.2884511611099646,-0.9287566659040749,0.5268879265058786,0.9324352715630084,Class2 --1.2761813987877366,2.1325720764827656,0.1006317363945004,-0.07335561046879885,1.3843009164394828,-0.5776667808932121,-0.06629987588712627,-1.563523042801122,-0.7621279745531229,-0.6763681901501455,-0.2961665625509899,0.3392215118634571,-0.5760951829142869,0.08409835072234273,0.4686621236614883,Class2 -0.7904114630283612,0.4888878148746412,-0.13669869149744177,-0.3884811803424933,2.2664114081166686,0.4028931381651882,0.24847204177090407,1.4547599940755116,-0.6895381984681744,-0.8806689309533823,-1.613003866355513,-0.09657250178445181,0.6796190468594432,0.21396041009575129,0.1345875628758222,Class2 --2.1718880510077887,-1.7394081578908511,-1.2162454212256746,0.8599010101219146,0.25181571051973695,0.9552068765165809,-0.8799894864753395,0.34034962456937984,-1.6294281996406352,1.7632146651178684,0.10740510720230946,-0.2658174609753765,-0.33990241400897503,0.28282894426956773,0.9244642800185829,Class2 --0.05181172427455524,0.41358191324844984,-1.134134189557394,0.5925236960916442,-0.12262469890554079,1.0720270122671918,0.7577868981093344,-0.6090015711093208,-0.1290852901698769,0.7614067594289217,0.19322103632264925,0.005713223182982098,-0.8766040946356952,0.9177712956443429,0.34400405804626644,Class2 --0.12643671034535497,-2.100462535927682,-0.44214391056056446,0.6710331950711377,-0.5917854993369693,0.012355026344359763,-2.27283360771704,-0.9756687214987727,1.835542097073034,0.9352641924644334,-0.1712155854574149,1.512769073145042,-0.20223425654694438,0.2361205981578678,0.09710998996160924,Class1 -0.17768749349752294,-0.8996393637447537,-0.20895448820335963,0.7376222378491027,-0.2162973705784875,-0.31850092166478783,0.17297032330922343,-3.5927403329972196,-1.4247161115132954,0.1364704347134144,0.6415345788157613,-0.6796146873832852,0.09002175461500883,0.9974892758764327,0.9180631798226386,Class2 -0.07006725783124057,1.6693657193873328,3.0898084952932163,0.2229013547349461,0.5689185078719821,1.4789715413132698,0.6214647093846385,0.28408228053807427,0.29878899793049674,0.06127778188782247,1.8024734551938963,-0.020666104308761566,0.291960415430367,0.254040680360049,0.894793396582827,Class2 -0.09761364555108534,0.48842800932485253,-1.1148952840301616,0.4297198005979864,-0.9951319310329934,1.7207940847574794,-0.6714532972863217,-0.5513120216602355,0.655378054310341,-0.2082404114910999,0.13208635496732613,-0.42945581280770095,-0.3021305906586349,0.5497323798481375,0.8211160332430154,Class2 --1.8728816585240131,-0.2285681259977178,-0.6783452362934357,1.1443782282321389,0.5707398722513516,0.23423693947520413,-0.3996418516124852,0.7781829442022503,0.0814746804129498,0.6375333167216881,0.16714545832868882,-0.3750982047296023,-0.4373486954718828,0.7206963584758341,0.7575532228220254,Class2 -0.27674187121896104,0.23202765389566776,-2.3727028107250465,-1.5536314379690832,0.03841852213833273,0.9910482589081345,0.8647253594457867,-0.988618742780669,-0.05849232760880665,-0.8086281606746594,-0.17844645210292948,-1.610545915600816,-0.5299360891804099,0.2797749771270901,0.4176458604633808,Class2 --1.5825887669370067,-3.1478399112828264,1.2770490522553097,-0.6436535676972096,-0.3406068830610158,-0.4409064487410997,0.029293258664799038,0.59572531244143,-1.2981686626333986,-0.17167184896366905,0.6676797631206111,-0.4234208316552009,-0.5085163386538625,0.9749857240822166,0.16910847090184689,Class2 -0.061223597182726186,1.1896001224283008,-1.350369512781584,0.0151945588214812,0.08848921129372564,0.2945794616689104,0.7215413227398528,-0.04746232438779007,0.37447378883515947,0.7558392498450814,-0.7912921974518986,1.6701721378217815,0.10774682136252522,0.314519060542807,0.008562814211472869,Class2 -0.5807087097822698,0.837185119771271,-0.8508202864892117,-0.23187854415200346,1.8705959288721463,-1.4088020778907446,0.23641708971579461,0.9188442684921537,-0.8323590625004889,-0.030450605188834457,-1.341418492508264,1.02376507302157,-0.20941791124641895,0.17362668295390904,0.7322384433355182,Class2 -2.0182529587668885,2.7631478589684617,2.315870608905079,1.5196116978787777,-0.039874546042054704,0.9551207997959144,0.09083342367797281,0.5277608536239141,0.36315106786484447,1.1588199014454301,-0.8038692908030886,0.23518510763604475,-0.9064594563096762,0.6709007003810257,0.48923862469382584,Class2 --0.3533088056443773,-1.8805688589337572,-0.976237052450309,0.2714446551151201,1.1491687413684473,-0.00450753850047431,-0.5927787168729977,-0.6114517865910412,-1.1877949601072568,-0.34252007040583426,-0.7931907132933783,0.3529666981845384,0.4756004875525832,0.9995701387524605,0.4045465770177543,Class2 --0.012683745641220018,0.4595743712712832,1.4281859927708016,-0.28318132798269136,0.8548045662308849,0.23721762589300266,-1.7433825222239336,-0.22640182766684996,1.452612290773964,-1.2109057961648435,-0.352883391190435,0.21278635977320792,0.8260460733436048,0.9904069644398987,0.4502217301633209,Class2 --0.09858886341475626,0.14078837165186067,0.20214060950962837,-1.5100350059901626,0.24169941778987264,2.3984690103861235,0.6896354458978743,1.4355373771837088,1.1913477568006776,-0.715786205407513,0.5693176248037444,0.6180500090426954,-0.6068558893166482,0.3794766799546778,0.9762966728303581,Class2 -1.3355657488446873,0.41073886130759796,-0.48024463182223887,1.6654069758792274,-1.3949972276407443,0.8634478917599697,-1.3178181641864195,-0.7822509631301512,0.8007997440462359,0.2661566231750428,-1.0513772787726383,-0.653411287640877,0.7572538065724075,0.7471757726743817,0.5613233132753521,Class2 --0.9921440070052496,-0.8254197407079975,-0.5592528947278056,1.1670718421638702,0.599550308311637,0.3159177223692506,-1.188184224530895,-0.6282054105465348,-1.4076447523649604,1.3114010493108763,0.2240307157085842,1.3372615355251203,0.5807769084349275,0.3371123548131436,0.9275968295987695,Class2 --0.4569167849324186,-1.2174988398359128,0.6923311296532416,0.8082434274146107,1.1900718800056913,-0.3584109200304944,-1.2347247420241854,-1.7569045821159426,0.19980309393975226,0.9298975246392022,-0.31660270673433877,-0.5724760872702457,0.5744010959751904,0.7417678502388299,0.9150573282968253,Class2 -1.597390152519623,1.6269315313902588,0.43382965911139243,0.36785919969299463,0.7543281660822119,1.1580832347589156,-1.2778305632016036,0.329809816615231,-0.8548435474421666,-1.407445412597905,1.7639247390819057,2.050195601367539,-0.3908917009830475,0.5248984508216381,0.9961845427751541,Class2 -0.519366423378732,-0.3517061843376137,0.25610947992104277,-0.6121919109262972,-0.7401894747039037,-0.4905846204653023,-2.2825054828212012,-1.273321173527558,1.4861645351057406,-0.5304158200765656,0.06264932567157475,0.39316373182098685,0.5909567112103105,0.48648079577833414,0.6528467272873968,Class2 --1.0302390910270613,0.3614328420302853,-1.0215825172127475,0.7870918427080535,1.2877512731892202,-0.5111212341920731,-1.981187363201786,-0.5472237462354881,-1.4602033666849659,-0.08463967133359675,-1.5125155583269607,-3.0139961261744546,-0.7391491271555424,0.4097231237683445,0.7643498459365219,Class2 -0.8691930929396393,0.6576892712869261,0.5878669839157207,-0.5876860235167227,1.2705527307301614,-0.07991394100104296,-0.519934012843761,0.15032545606007655,-0.6062249528634295,-1.0650076233132912,-1.6969896936482893,2.200141991868396,0.9427725789137185,0.17674313951283693,0.9978673767764121,Class2 -1.964906708212352,1.2282217876928723,1.58526096143613,-0.3317963959908123,1.1069398576568483,0.04008127683282629,-0.050358573554393535,-1.2791033748894247,-1.296671982316926,-0.693525618571476,-0.11893947626176386,-2.0358360037245546,-0.8053186954930425,0.5834301835857332,0.4105086289346218,Class2 --0.7009125876445816,-2.6159189893366896,-1.2506049246688222,-0.2757678227108135,-1.7013853460963304,1.465338880240303,-0.7241758202765538,-0.8315222351462022,-0.5069907031922992,0.5265987734147191,-1.527702541595554,-1.2976423944488586,0.2936106431297958,0.3247983427718282,0.46268829610198736,Class2 --2.8878809971077137,-1.7206584216318328,0.1091389525362628,-1.0865457324410892,-0.7684811849476092,0.7531312697711003,-0.22390917658599555,1.6045659962674093,-0.2270796235959401,0.1578648818635475,-0.9855438422617409,0.12198607442747933,-0.9154918175190687,0.8669780548661947,0.6925727583002299,Class2 --0.4647693939358609,-0.29311960346158017,0.3060195035229349,0.37832041060379173,0.8725058904486317,-1.2546336151975914,0.5414433520238008,-0.6565025796488948,0.8449401165724938,-0.576025426487046,-0.12497384777176761,-1.6697652877014448,-0.8931560912169516,0.24377191625535488,0.6961091074626893,Class2 --0.2379321986291706,-0.3254013673656915,1.5896662903844045,0.7534397614315309,-0.42901357143020363,-0.0870264241735794,-0.4205530363699014,-0.3810545403250792,2.057216049226259,-0.35083544190704563,1.1620324745905006,0.30691898236949233,-0.9858842440880835,0.8902786555700004,0.2929970540571958,Class2 -1.0125297097681043,1.5340461442098177,0.09302002655281012,-1.0829546791080715,0.9059226058409037,0.9097541686066551,-0.07414955162638244,-1.8124932825737066,-1.3455407190513715,0.8976991286743194,0.17295714453796132,-1.5276007332029016,0.49555338453501463,0.4452120193745941,0.21629410912282765,Class2 -2.3604736791372,1.9249779520354626,0.179426594792204,0.39381084179971054,-0.10169466065560419,1.037221318288814,0.2571486669709245,-0.6038658376783989,-1.3450940145946604,-0.5523786321597765,-0.13914288969447544,-0.721545968537479,-0.10957317380234599,0.984512762632221,0.8519995212554932,Class2 -0.505505273346875,0.7589917727375142,0.8840376290363298,-0.31482635642912316,0.10203098877974248,-0.29846248763990263,0.3140067158677421,1.150322556066498,0.47206980740162957,-0.24764876101399183,0.892918328024367,-0.020031098078946458,-0.25913669960573316,0.290560235735029,0.520351791055873,Class2 --1.196376310905265,-1.6842038105453354,-0.7523534180095749,-0.023578578031150425,0.4356420104265221,0.4713292466735538,-0.6071295994238488,0.9181899253575145,0.887522855445968,-0.5698455370652913,0.9583012183888007,0.03656354084313179,-0.13845224678516388,0.72545349993743,0.6752577067818493,Class2 -2.0418031193093213,2.5078402305318344,-0.725801799326796,-0.7632606078195919,-1.5654585240648964,-0.08632865402529082,-0.3358511226671742,0.3224211995373205,-0.478089016337069,0.9716970690857141,-0.6820699667246479,-0.48914292541752974,-0.9465121333487332,0.17642794153653085,0.5334020177833736,Class2 --0.4366415106397092,-1.1431810262745474,1.04608544090457,-0.9617936033928077,0.551655066321603,-1.2624302284481825,-0.20958466484260005,0.31454121416208974,-1.5223222902312539,1.2080906971557126,1.039465492226977,-0.9409932016487138,0.7841863012872636,0.524410679936409,0.925088620511815,Class2 -2.584459815136767,3.5198053373433673,0.13380204265126536,-4.794112359458298e-4,-1.2388886773642607,-0.35866206064198697,-1.434194887228352,0.8211079768767088,1.6780725229563453,-1.2358669341458892,-1.3766049535077987,1.6377432469047306,0.738775372505188,0.22956006764434278,0.17504992545582354,Class2 --2.25212525403058,-2.332960765228724,-1.2021602567765264,0.9140413201567532,0.49840376812355724,0.20781913054414358,-1.2915735502249883,0.9610132511669036,1.2483749139118754,-0.8836464404287614,-0.6921788149130449,0.38300957295617444,0.9145670048892498,0.4995788214728236,0.6403647772967815,Class2 -1.8423977850701863,1.2583351922245685,-1.0764716238906926,-2.0473341258488835,0.37798191484425625,0.14144519165493538,-2.762845995605748,1.7038908130545014,-0.0721858935029182,0.034363763979336395,-0.6494530776670934,1.046919422775968,0.17288701282814145,0.6933787672314793,0.8435977697372437,Class2 --0.41063630386489813,-1.6670281027792508,0.5325697588992734,0.9461961615168935,1.6828204925394514,-0.21777738785631257,-0.05933395218137007,0.7091423240126226,0.7124016811118344,-0.21042011934548732,-1.080574837861651,-1.2513376476735685,0.21792765194550157,0.22877964633516967,0.9864750935230404,Class2 -0.06103994895659891,0.01043987064118726,0.4953054823219005,-0.3789440763009633,0.1381885155426148,0.35942386001097465,0.2953745321487049,0.26826094434011777,-0.09615799684444183,-0.008454843543578497,0.1362232745624124,-0.7117547236152972,-0.4041914092376828,0.14467855310067534,0.14469100697897375,Class2 --1.5666638896378993,-1.4064050741992717,0.18078928540714936,-0.2640867184359121,-0.26540651692941125,0.011205075912451597,-1.5426160425993443,1.4783518407212952,0.25743411279321987,-0.7912262676750315,0.693382434191774,-0.8934978956227897,0.9744224534370005,0.0678654967341572,0.5209864331409335,Class2 -0.5215252039182829,0.6583245100387116,0.2607635825521253,-0.9855804033461922,0.9380627032132609,-0.5801125856484192,1.062342618381674,1.0535852124770155,-0.22399420884360183,-1.7702917440230646,0.8287701214451569,0.8172969905706069,0.1903666816651821,0.06946343230083585,0.18226338969543576,Class2 --1.5750198040282721,-1.6249784155407414,0.11504663993642558,1.1153945619083512,1.6449417175953154,-0.04471455054486852,-0.9748279578667712,-0.7224717336774099,1.6560273821853058,0.39746177068147015,-1.5126974019192174,-0.27492200081823326,0.7362439599819481,0.7585125628393143,0.10206727986223996,Class2 --1.5173059037521315,-0.23303519525636807,-0.723951465944378,-0.9543290764908914,-1.3234547776514074,0.5347402619383259,1.0200389044577374,-0.6284232034670882,0.5502606377309078,0.6869219798114616,0.5440643802907777,-0.7831733060430902,-0.7460088254883885,0.1985855088569224,0.12350920517928898,Class2 --1.2045264628915058,-0.5035485469848184,-1.1195963429562106,-0.41072054535660385,-0.043402091659234074,0.9453195686878096,0.5047685534905622,-0.5501390707367942,-0.07780301262002984,0.5301520137195413,0.08387759663979143,1.6688116684227627,0.9952393239364028,0.204615973867476,0.32949666189961135,Class2 --1.5871857372851403,-1.5270548988273918,0.828627517094423,-0.3648929016597463,0.12437126908446305,-1.5201382222088908,0.6974390220751685,-0.49694259000991664,-1.4432026840178307,-2.381881788845348,-1.4229687872204333,1.0592529381566553,0.638855257537216,0.5009098001755774,0.8581959414295852,Class2 -1.2373821445703679,0.11884780093394254,-2.8149087341902264,0.032346924892153815,0.4348854476491003,-0.18419064208408328,1.4687247354951172,0.2698039017369256,1.7992323540606887,0.06246126591874244,0.13864244965353587,0.1050604644157687,0.21650510467588902,0.49990962049923837,0.9208426356781274,Class2 -0.8592055298278601,-0.27281793928371434,0.8318398517630846,-1.875975083870723,-0.026697904716690948,0.1917814558067025,-0.0025580100123486957,0.29657374076269516,1.5607534663143754,0.5788430905886915,-0.01487988653650433,1.5478541093165363,-0.0945271528325975,0.8855334115214646,0.8122296121437103,Class2 --0.6410700596917126,-0.8743393265477061,1.005505575952024,-1.612654189372264,1.5892617432694458,-0.587700058818859,-0.651932116807544,0.6189925471197878,0.8405310943652109,-0.6929354397436086,1.1568511662445067,1.2553840368154598,0.5086388695053756,0.2228408232331276,0.7682244889438152,Class2 -1.656356962241813,0.24853876687135967,0.9155044596967149,1.6069547458631677,-0.7893531324171954,2.786669515074305,-0.2244685206237188,-1.8843764270250731,1.1489277601801329,-0.3201918058466644,-0.12579391357662403,1.0203595188077956,0.10798520594835281,0.1498238597996533,0.4780697929672897,Class1 -0.8059594370058034,-1.6411820061531588,0.3273756181637247,1.5790500483687087,-0.020364578779913354,-1.21691105164277,0.07427350060824695,-0.5917583518700937,-0.8461121958725701,1.4422777227684147,0.38150639781380474,-2.698961600048412,0.5187544710934162,0.9051984308753163,0.40272578643634915,Class1 -1.3437940838012878,-1.0609747395757834,0.5202244424441277,0.6141940221832267,-0.21619204310082493,-0.8148030077927869,-0.15445134473656125,0.6993868000409833,-0.7105837028842825,1.2992068299701292,0.6591253058278352,-1.12195862480021,0.017520849127322435,0.9884307326283306,0.18625919939950109,Class1 --0.9098787523668606,0.5627712474629558,2.161219749884137,-0.2880839148554961,-0.07081340997944797,0.7600692517904242,1.31348455227364,0.10679010183531075,-0.21198163317599925,-1.1460060546375088,-0.8868927622719541,2.036724899869837,-0.5165057713165879,0.6785287908278406,0.6145882285200059,Class2 -0.5435417278504779,0.77578494338015,1.241174133570591,-1.420774465535861,0.2632881734190898,1.5613201969677801,-0.22188212252272504,2.178299209055592e-4,-1.8416427315717074,-1.2991402019796265,0.0692391765500624,0.8675519252116247,-0.48054939694702625,0.33722147159278393,0.44095199136063457,Class2 -1.845496164467242,1.574840084303007,0.25557207069015864,0.2419663650243178,-0.22704276063116124,-1.4586094188906509,0.8556004858210161,-0.4305744603140972,-0.354806957094126,-0.8760466661137359,-1.0876924177121514,0.6174625520571221,0.5832949201576412,0.3254483947530389,0.37689891806803644,Class2 -0.5768499475612119,1.2363274183359432,0.3059993982931419,1.125106839242367,0.2647495438703519,0.6755503928617215,0.4794209780734146,0.0829809672693997,1.5333625683439338,0.5035874511294403,0.16668104449462767,0.391878494856852,-0.43806058168411255,0.8033186618704349,0.9575558791402727,Class2 --1.1135256605494055,-1.4121688523959612,-0.36443275741899717,-2.3623196925653986,0.3491097862397281,-1.0040962087278416,-0.0772474517930096,1.2638836591461966,-0.1711205498651332,-0.07016467259022709,-0.024893174188656064,1.6613594657614483,0.2627607653848827,0.02850133436731994,0.23160590045154095,Class2 --2.0835329976646317,-3.027306988544775,0.14192171501684858,0.5784990619296058,-1.7481115258926692,-0.36935558529234713,0.26918104452956204,-0.3987745574233477,1.2125609721049417,0.99825603285058,-1.1622229598321008,0.5081652213375253,0.7526482865214348,0.5941802558954805,0.6559139767196029,Class2 --0.011917277835454598,-1.5392983552343684,0.5553785185938499,1.6448851216788878,0.5274142382074957,-1.144103240282346,-0.9439235185716667,-0.15937358375894237,-0.19800026879904836,-0.3162992011872906,-1.6104920945756427,1.261239625712114,-0.3032419988885522,0.402493626344949,0.31438234727829695,Class2 --0.4087321448285488,1.5016493341060606,1.2554080396750664,-1.120006743655466,1.0250165136808589,0.6159120723744536,-0.8746412920733581,0.24549113162142422,0.11067778152860974,0.6113608119357191,0.41246791093708224,1.1099613230095766,0.8321933723054826,0.8751983703114092,0.6479140857700258,Class2 --0.09154343088257245,0.14574718920170573,-0.051645711271169105,0.09037389860240158,2.2182089511582816,0.014899903687491532,-1.577432869764295,0.9945061918295867,0.5753294635601505,-0.8013291219108961,-1.3040590067498659,0.6459388168498045,0.33523231372237206,0.8046278124675155,0.9255867735482752,Class2 -0.15194588483441585,1.756091594127342,-0.05402550466790469,-0.06999364601326753,-0.12484802841014696,1.1277657695286296,-0.16804481786048855,0.26119521782828947,0.7518955828895524,0.14934962041674343,0.030424868510978982,0.26429601590561236,0.8260904694907367,0.3631198771763593,0.461739057675004,Class2 -0.23508884235533495,0.5280843705095075,0.07104050392607786,0.3684622388441692,-1.0251633947126593,0.8428448881441474,-0.9474514734141022,1.0729528196565723,2.0896146816791803,-0.22915565830960927,-0.022887327929536273,0.6391142488623418,0.5845831129699945,0.775258291978389,0.10720067517831922,Class2 --0.22370115814778155,-0.5305799625470458,0.13967382347900228,-0.4951656441490076,0.0373293063751443,-0.18012986843495313,0.9901142727769349,-0.01051920823988175,0.7941262791223239,-0.5681124551425631,-1.320182355017288,0.2712633350084867,-0.1521313632838428,0.5500748956110328,0.08294324995949864,Class2 --0.464597780584566,-1.3043368073697965,-0.3400019526958123,1.4213604969153295,-0.04032185137938982,-2.267388974228444,-0.03480919762222683,-0.98339181201369,-1.9708969492632566,-0.7698371890793454,0.242302622458108,0.9721760765348768,-0.2739368909969926,0.6610930433962494,0.47199725708924234,Class2 --0.3948468124760851,0.24743056501642272,-0.9212029532280621,0.24010666221066496,-0.22875302398609093,-0.15604922378995228,-1.1567342031411074,0.9278024703779999,-1.479173808728032,0.5239005295696709,-0.30679000765420195,1.1033718906088488,-0.4473082646727562,0.653098619543016,0.652509413426742,Class2 --0.870101029315208,1.3573781508700593,1.8861493701248542,-0.202662776036221,0.19453636987070078,0.46925279595809266,0.5051766308810003,-0.4479086777669213,1.0669008427232625,0.10581974114821685,-0.238701748814226,1.8301140371598488,-0.9027361068874598,0.32712887949310243,0.8324327815789729,Class2 --0.8061389430211221,-0.6569601935036782,1.0044042981663706,-1.733651331611804,0.6789613425687689,-0.44495710058725685,0.1731241431138076,-0.43128934443699535,0.2707385767542863,1.1086133694284537,0.3005935381132349,0.046441179806811554,-0.23827030742540956,0.9630939727649093,0.662824600469321,Class2 -1.443320192227152,1.0816757753856212,-1.2560566298448124,0.05483876795945177,-0.8207771878332571,0.3479539880637943,0.4034053967726729,-0.9784227822606224,-2.0224068905100023,-0.8178489815828474,0.7953331982611993,-0.37816495109219245,-0.7962342300452292,0.8048003891017288,0.006733006332069635,Class2 -2.510737538015116,2.5685272176840153,-0.51613107224619,-1.8004902311928983,1.1208525353146022,0.36377273917923125,-0.23546868501450513,-0.8124381224812809,0.5782448609510672,-2.035792099478943,-0.04850230479826508,1.4182982278744514,-0.3308712155558169,0.20179146784357727,0.34839836857281625,Class2 -2.7946861353363,2.003273427413027,-0.06624664456136821,-0.36223089960123794,-1.519534709143402,0.6515316252632619,-0.9750715836863415,0.18681416727491845,-0.49689535418617703,-0.935199677668348,1.27800904990293,-0.6548962820365409,0.009957043454051018,0.7456031013280153,0.3087874073535204,Class2 --2.105286300567741,-1.1097229745601451,1.2095310044693384,1.0162633821151728,0.19833337994942032,-0.37160493404466843,0.41236228480345816,0.03495180108111106,-1.958082615052044,-1.3273689346065525,-0.3876311650129071,-0.1379479496385232,-0.2176909619010985,0.704296492272988,0.6216678372584283,Class2 --0.7090935831035646,1.2766953366337508,1.0166524669408223,0.32882766343442066,-0.5123947480984763,1.209461353127769,1.4549049251903763,-0.19478933678333185,1.1501220999015762,-1.5146594535641404,-1.2324439469477413,0.5704091600969665,0.1905339532531798,0.716790922684595,0.7508212719112635,Class2 -0.9903597216747431,2.121875405321492,0.462114295472479,-1.08608111207193,-0.5222050279835612,0.16170364862500256,-0.004377712350314496,-1.047989286147404,0.7929923556836724,1.3499601124801728,-0.15090143050193547,0.11020602640943997,0.9329827642068267,0.9879725889768451,0.8771580513566732,Class2 -2.337318961838985,2.3134130457616866,-0.6790016701074028,0.2265546343990492,1.0399599478481036,0.7597917303296428,-0.5498869608780351,-1.0575248935785642,-1.8151945789584496,0.07338309349827743,0.7985107122313397,0.530615626736808,0.2898766393773258,0.9708146178163588,0.12925310526043177,Class2 -1.0583568139057142,2.25348428622187,0.681333526397683,-0.9924810572484717,-1.4839383302760425,0.13940776132622867,1.1070652163652266,0.7083845343223298,0.8539663060442919,-0.1588362117402545,0.9387895386565823,-0.35139646754189136,-0.40407679649069905,0.8934659552760422,0.3658994890283793,Class2 --0.5735078803371985,-0.6332139081759995,1.5499305800947631,-0.6519723470099162,-0.8072274541022848,-1.8300788038030704,0.5254118847053297,-0.5731088323975593,0.6709001459381616,-1.208309132837913,-0.9107842511153257,0.6397732984113362,0.8935889899730682,0.6727014819625765,0.907728704623878,Class2 --0.5838483391415821,-1.2211490610371578,2.811955458902463,-0.1948589446542509,-1.8254621477494783,-1.8347534680257822,0.8372193706261013,-0.16559303239829737,0.7723609006258201,1.7447658168262652,-1.453805520615639,0.513707831096096,0.8410360836423934,0.8480501146987081,0.7638894934207201,Class2 --1.1506051057225548,-0.9913961867490185,-0.33581503644268107,2.35602509355262,-0.052685902183401044,1.1138696261580727,0.6568087002850233,-0.41333376032880886,-1.361585491722705,0.9571611258183012,-1.0356208289202942,-0.1630415678144737,-0.4125698935240507,0.5229597196448594,0.026938461931422353,Class1 --2.3577459341686953,-2.0634815381381437,-0.1510138360503422,0.2511711902923085,-1.8419406363310755,-0.19619996241508222,0.13466681348696147,0.8464678115255637,-0.7843854175916197,1.6635680769784496,0.03569124903125832,0.17255254591408814,0.39121027244254947,0.7752417561132461,0.3156946178060025,Class2 -0.16480563064794665,-0.593346113496481,0.9752137910071936,0.7241164318636621,0.9278060612050157,0.27597038284359665,-0.0889313113505521,-0.00986483458496107,0.9453126829415586,0.5238797100387336,-0.35658084405006635,0.6363128366478109,0.4490683968178928,0.39242227375507355,0.5965038896538317,Class2 --1.7843330349194395,0.39305433630824815,1.9756076918553451,-1.8215695810890815,-0.6288144060038653,-0.8889302622037308,0.4426352206896783,-0.9354248989210476,1.0606478557224142,-1.2088669318761187,-1.8751153098152549,1.3608036685934701,0.15492609795182943,0.7710370514541864,0.2561604450456798,Class2 -0.9142301916277288,-1.2346663724353746,-0.15672754775314648,0.8934720540593413,-0.9316959486410581,-0.1302865153898195,-0.4075607341399074,1.5585977228413292,0.13134292914031523,0.2037846721228313,-1.3928186145183157,0.3076694382082562,0.5223867711611092,0.3730354616418481,0.9052833982277662,Class1 --1.2208054386028862,-1.4113741461503857,0.6306585007246959,0.13393509477878304,0.3304810382130402,-2.095674142881053,-1.7109005573294114,0.6352576291287082,0.8776025046600611,-0.8849517819558165,0.3946290814020234,-0.6440534524024563,0.08008711552247405,0.6767644535284489,0.5592932961881161,Class2 -1.0754470103875018,0.9542790551031352,-0.5046121370250259,-0.2560503843893206,-0.48478088372741346,1.1469230330147644,-0.7609942358412345,-0.1876088414978387,1.82016437490033,1.7485602528504487,-1.310941542058003,0.5557404869623669,0.950996913947165,0.275515143526718,0.852462240960449,Class2 --1.2915415349584578,-0.5341082342042105,0.7215232052364793,-0.7954719936297654,-0.10239399919858064,0.7272371183228863,0.2083422238330808,-0.5769601062694952,-1.266823650953665,0.1674238130691552,-0.6762995053890423,0.1322203274915792,-0.8326487028971314,0.03625425463542342,0.5200618137605488,Class2 --0.011115290497257513,-0.8728594028437813,0.3416350986076544,1.0017006287039654,0.7479453862387185,0.13594119945864333,0.9392825020043655,-0.43337287509068856,-1.4590545868167946,-0.26607510024375264,-2.117286215241823,0.9721064598747221,0.2542510717175901,0.20539385243318975,0.4429065764416009,Class2 -0.2426195720341242,2.568558387284801,0.46762172591464035,0.10172824662218208,0.8001262038263953,0.24404798693809068,1.037723912877582,0.7309033712315438,0.6516308616123834,0.43298973984782124,-0.05303847199759524,-0.2085209893358963,0.810690266545862,0.5406435064505786,0.4955722682643682,Class2 -1.8097792761822922,1.6211945322266976,0.12429489599753742,1.014245683986288,-0.3483159819919981,1.283241713331696,-0.07875718390438019,-0.40337712601662246,0.38487674791750404,0.9977104432760652,-1.035853512955287,-1.1718809257603215,0.9559540618211031,0.11589086637832224,0.08745611668564379,Class2 -2.22712239799374,2.165191971759328,0.10765813515735224,0.4419270891585521,1.0510373299517144,-0.7266623401508769,0.42858184795921617,1.399794760722584,1.422521612263843,-1.0750163108286999,-0.8189436706052418,0.9264869739334198,-0.72132274415344,0.5495925780851394,0.2184434593655169,Class2 -1.5842371525972858,1.698295543094504,-0.6366359824039397,-0.2475786299774965,-0.04261878800572974,-0.9317672793483742,-0.07678438319222508,-0.9562208527972763,1.671993739280674,0.5227049733989361,-0.37938922709075,-0.5388615667919946,0.7971502072177827,0.5005934967193753,0.4028846218716353,Class2 --1.877788917066179,-2.1823233377942617,-3.5000102579207275,1.2354561711669614,0.34026614493989993,2.045865444231526,-0.6888608133747169,-1.3495517559729275,-0.6749166032311968,-0.9604458467820014,0.33134442012873155,-0.7039208320259609,-0.7572390022687614,0.44558302545920014,0.49848291208036244,Class2 -0.43893750258144804,-0.5035606841321364,-0.6232733684037554,2.0312609891056517,-0.010157041054669724,0.575920046965569,-1.0118032763115188,-1.327401300874802,-0.8021554890330101,-0.6599077134486075,-0.18547061007544793,-0.44578624956460705,-0.7070622583851218,0.8833052651025355,0.6868243843782693,Class2 -0.3759088739807442,-1.2997443762272272,-1.5822154615010937,0.9345622630147286,-0.9704784669060388,1.3277215448368769,0.7395906800346703,0.15717302127166444,1.8488906908176275,0.3360115519617376,-0.14742978301535134,0.4893129875625286,0.9973336807452142,0.9914010516367853,0.8988283409271389,Class2 -1.2240605464204322,0.6569954038664724,0.31332852237829617,0.12802946161892118,0.3912192275228396,0.08267444724998887,2.1701222514289302,-0.6734036590477022,0.958697046323789,-1.646509788380101,-0.1139345634420013,-0.6364870777877999,0.8548048418015242,0.7018932800274342,0.39074977370910347,Class2 -0.7100106912769362,-0.0398126969862892,-0.6407859781658761,1.537194914435242,-0.7453215687783956,0.9286491153600388,-0.9287792621673854,-1.4533162150895056,1.7331790960541806,-0.47680254710503395,0.10126293056217,-0.5256714661034809,-0.4384274845942855,0.08279586979188025,0.059660721104592085,Class2 -2.3577969720845244,-0.26240901718729615,-0.30207305877754387,1.436746767587071,0.9490292386427932,0.9251824620415464,0.2531716867912923,-0.40876599858855694,-0.058118378649626906,0.8996929139505957,0.2730780261412218,1.4929431809828182,0.6333960108458996,0.342045315541327,0.6099121540319175,Class1 -1.6671915766567067,0.5021131212573655,-0.2091097328593336,1.0850451172807691,-0.6994333444006416,-1.251222396818327,0.3238833604214311,-1.239028266131588,0.4548599121067475,-0.9991059266176688,0.2160628071228225,0.1319013066747336,-0.3561673308722675,0.45305101992562413,0.48171967058442533,Class2 -0.24105360833788014,0.900435263288551,-2.9280848434658515,0.741801011875779,-0.32258113895630264,0.3808139730116563,0.33081145763373737,1.1969587931413284,-0.24309788396693366,-0.48123350905741513,-0.5303398976519148,-0.07507385287064436,0.9793847044929862,0.4546821101102978,0.9787777292076498,Class2 -1.9350890625141437,0.3504659920688037,-1.5951552665614213,-0.6006031343845439,0.42884773503903323,-0.745101641262982,0.19172931844979307,1.648791118726899,-1.2636494979070731,0.12500841406743263,-0.290917632832458,1.693893251534326,0.9621853460557759,0.03230619593523443,0.7971787007991225,Class2 -0.8611426489686627,0.6300130885755573,0.009528221894483524,0.05259974211738105,2.0222930997423028,0.8647785808817915,0.6525453753158684,0.672010847692859,-0.2946998772962826,0.15060268359769302,-0.14175449675207075,-1.8146598772890263,0.07636347552761436,0.9910153697710484,0.4320264991838485,Class2 -1.4755800231698895,1.0270669928806644,-0.10860989595763444,-1.121000804609756,2.2244854080600067,0.1254788550217144,0.8897090981128868,-1.027576554022557,-0.9413256411026646,-0.396766725831266,0.5256900636051924,-1.189701086213852,0.1417604796588421,0.016490692971274257,0.4443464754149318,Class2 --1.6935814238735312,0.5767784905112279,-0.40936177283470876,0.039799146802890754,-0.2579221160076284,0.4729611181618299,-0.06520651347583077,1.10355863821707,0.09654622462582349,-0.26223089013434453,0.3896021938129962,1.4339447638467788,-0.31261467607691884,0.8115143415052444,0.24068411253392696,Class2 -0.21371419656833912,-1.006704679542622,0.3960748405005857,0.14605531201792504,0.9418872008298971,-0.7529546237772993,0.40684887237195744,0.3357399438485655,0.07311861965954997,-0.6717022359635499,-0.9931474814194645,-0.7405449344659831,-0.8684263150207698,0.06320342887192965,0.608826847281307,Class2 -0.5999248496239774,1.1755311614322506,-0.38234275246706745,-0.367102893126839,-0.029659260526667036,-0.70049088956267,0.9068226548663785,-1.3721340835203764,-1.470766409554463,-0.3656186427489103,-1.2319864199796027,1.433425257098496,0.6264466624706984,0.1663526624906808,0.30589330988004804,Class2 -0.6099369028819052,-1.0125995943555166,0.7815504775436706,1.0202124529965504,2.4780263885361604,-1.4588493038649735,0.11717636037222426,-1.1714172006164398,-0.9164688429704587,-3.1629371530729027,-0.7592974851911879,1.0680844267628844,0.6446613501757383,0.8601524033583701,0.594653663225472,Class2 -1.1382878672305217,-0.8288032503358658,-0.32722390199987095,-0.13395198167303035,0.8664705668901961,-0.47404443703423915,-0.7444656651898921,1.066348308480682,0.9132499163078888,2.097119751338574,1.507859927642597,-0.8163151904776009,0.39718559803441167,0.01441468158736825,0.8348001900594682,Class2 --0.7133542705895919,-1.3187846536795105,2.3767239839261842,0.7087433853282659,-1.3329470819157205,0.47063415528721586,-0.644367708243397,0.15290879677454566,0.8826430248629121,1.4605595746406597,-1.1558282828159696,-1.976052837249058,-0.001109907403588295,0.550934003200382,0.9210544102825224,Class2 --1.2818889328491152,-0.11402424954725654,-0.44551324631059275,-1.3303213690394882,0.34232713237354556,-0.10381360224883701,0.7518175159409921,-0.20941547815883868,0.0982209923207208,0.4952067566658022,0.4370365360437064,-0.3721528145797416,0.4262540643103421,0.12429887754842639,0.7891546997707337,Class2 -1.7714444312464979,1.2473655128979861,-0.5680371048921597,-0.06104592188503825,-2.284728973201847,-1.4180950756996857,-0.4138409465755103,0.6591097069509059,-0.4161040767896169,0.24367119162540657,0.6091293550597986,1.0483997022567713,0.5823961803689599,0.1803718153387308,0.5582800847478211,Class2 -0.11545197076995661,2.464793373548167,-0.1490170249463919,-0.4635361753244641,-3.0018513910424662,-0.6810479846396906,-0.043577795797321565,-0.5290563903883287,-0.797641578954285,-0.7060294671595394,-0.6559136394899473,0.7187814926597202,0.7401793203316629,0.8564385790377855,0.5604902803897858,Class2 --0.06878039000051728,-1.8119591999438251,-0.814891202660571,-0.23181951328865147,-0.1690557252335383,-0.5531915610261426,1.1710785245281674,-1.46067332968207,-0.3132648361803527,0.969144607338367,-0.3486650618428688,0.7082776649259469,-0.10210788669064641,0.04324973630718887,0.277717289282009,Class2 --1.7605469526019495,9.083572541935814e-4,0.010335884716582115,-2.5653803594992572,-0.20215855246408226,-0.19425457916497765,-2.147898155791724,-0.3516552501510019,-0.0052233461809339425,0.24727978219488184,1.1097445845651757,-0.011349863133837314,0.18599323695525527,0.8927788119763136,0.6869317817036062,Class2 -2.1248871777052156,1.836827469523045,1.3845776586346838,1.982238941825789,-1.434420261815213,-0.7883136323386027,0.18696082447317158,-0.5446130840524456,-0.15102439752058333,2.0457866073210367,1.8188409012360829,1.1402810363870937,0.1076449709944427,0.4327512413728982,0.6000257104169577,Class2 --1.5115479590535443,0.25699322003061187,0.5052520548679689,0.06410450794326301,0.46632284259722906,-0.5551796354162216,-0.08875655709178404,0.5704505411863783,0.990056132990071,-0.34073277048699435,-0.36554274514966506,-3.3396462008576795,0.28376736445352435,0.9678792685735971,0.30923871067352593,Class2 -1.033952208595852,0.9328812971849418,-1.4030216737619956,1.6041638429498843,0.4342516237608669,-1.3868170288235884,2.645141313488325,-1.9273670136975243,-1.036661704101806,-0.4723048558752209,-0.8873833328847037,0.043399248821354874,0.3322051973082125,0.9708032591734082,0.9756286663468927,Class2 -0.7697043337954912,-0.23249787387271703,1.3744761122592413,-0.6069739458918346,0.6807227570292906,0.3886726045952407,-0.8562659782752501,-1.8047022921583984,1.236392551742927,2.3693129173427163,-0.34701634668554576,0.36874415663499366,0.22181864827871323,0.9887618543580174,0.20831005228683352,Class2 --0.2828715477611073,-0.2877148672940886,1.1100233487508908,0.3895451131873388,-0.14353065209136612,-0.6523554181051987,0.7380353865571938,-3.7835524200489585,-0.2632635760906543,-1.6854091422053374,-0.09499197586664673,1.7034059943039705,-0.5922905560582876,0.19867014652118087,0.7358025892172009,Class2 -1.1944845640402695,1.6007860943706225,0.9578293943210636,-0.312943521655367,0.7023273395570827,-0.07548062922515827,0.1160595050596701,-1.2427203906531337,-0.2749722093469866,0.669793348759057,1.0016398189925348,-1.049731102384508,-0.43141743866726756,0.2721675953362137,0.29382996680215,Class2 --0.8414659611316739,0.6008706501988492,0.21394443436463376,1.56398654048126,1.404054242411366,0.6163040261424234,0.8828338937931389,0.21667289737509002,-1.551965973129762,1.4647724427901716,-0.8697566864957192,-0.10042563905746048,-0.7351071862503886,0.3360220359172672,0.8810127405449748,Class2 --0.7562959753001679,-0.5819112130753974,0.5705087037247383,-2.191750606622028,0.9503850732102677,-0.15277446338564857,1.2509813639065284,1.0613774741522275,-0.31789008561322557,-0.09882870153776328,-0.6095322358616044,0.22333462144000388,-0.020196550991386175,0.09214928024448454,0.2562819409649819,Class2 --0.3295702669712879,0.053428921378150473,-0.18583790688118715,0.5271707445090226,-1.248978091325699,-0.021651111214324278,-0.672415565234524,0.965887736989927,-0.39291291378726834,-0.23140478340536583,0.8056955090931691,-0.8790367206420149,-0.2173756454139948,0.6695997456554323,0.48327201581560075,Class2 --0.7981163649685087,-0.5109330045095769,0.9104433930727032,-0.900090676828093,-0.7240180147192233,-0.2669364625582154,0.5977283508740618,0.5595556932849469,0.2933205574728383,1.07822477938284,0.49845692362407507,-0.36390612247576287,0.5997219835408032,0.19662009761668742,0.23063291190192103,Class2 -1.1603991859345228,3.0240646709924883,0.6915109088884087,0.97549589104926,0.4289431588808069,0.9328799363645494,-0.45919251634515706,0.6937932156711141,0.17747384435678676,-0.8127097288686242,1.4672414313312188,-0.6442916738222603,-0.11760247591882944,0.35015185456722975,0.7170858785975724,Class2 -0.32225809651417325,-0.7929657904666756,-0.4899061222827853,-2.5880054261742154,0.44345850717598795,2.9759233222582284,-1.6481351746773998,-1.79307553255479,1.107296732301265,-1.40044061409147,0.2710939296389296,-0.3851026135561321,-0.9813901023007929,0.7463070056401193,0.5671932583209127,Class2 -2.0069295885886387,1.1570599560302544,-1.0104488632942752,-0.39466179292531467,-0.39506934256132287,-1.193944043419089,-0.9135675760424775,0.557342644695826,0.013376863282405192,-0.2357763811067087,0.38645366342985066,-0.22093747609458295,-0.7661533588543534,0.7173035610467196,0.2484715844038874,Class2 --2.0257475974106396,-1.1911799740851614,0.8180420464722565,-0.7899395633064935,0.7565426460828477,0.3623761490099309,-0.9895566045309662,0.21992491566284167,-0.7163135267036924,-0.5712594317328614,-0.3626988236322577,0.0021531170615334333,-0.7093362412415445,0.46410794579423964,0.35823591938242316,Class2 --0.924831282497355,-1.4611683574999028,1.0120248128653206,-0.3893385049307952,0.9201466634160121,0.85354984626675,-0.26233987839002376,0.06080037203547916,-0.6268922974452409,-1.2862446030862251,1.1270022223529133,0.5545700463831308,0.830623300280422,0.17318932036869228,0.9399066446349025,Class2 -0.2338956327321986,0.33564243656848647,-0.8061788806120705,-0.0503364651073891,-1.0723766778028836,1.3388528593087963,-0.7390080175391136,1.6143889764553907,1.513125800716861,1.5595329267347093,0.5483503508165071,0.43132828571588044,-0.569349539000541,0.4936780515126884,0.8936509711202234,Class2 -0.014595365334965082,0.48265281482167266,0.7808455782390884,0.39128451326514774,0.03246891970923708,1.143622544582914,-0.4790611716383507,1.1280288044052778,-1.1258218853527318,0.270297731950885,-0.5511038818431057,-0.5621050749331642,-0.131575096398592,0.6137810405343771,0.8975191554054618,Class2 -0.5474933635468223,-0.29410949738486514,-0.09330673848132036,0.31657146322103286,-0.7009582854945012,0.16087657211402404,1.3860639615548336,0.2092845261907006,2.4283713245671867,0.5491800969718685,-1.2698953238520898,0.2890386988759903,0.375951976981014,0.40072401030920446,0.11623229715041816,Class2 -0.7773918156471242,0.16989441648595616,0.9570375988974683,-1.2895679236463204,-0.06476259542320305,-1.3004070548361426,0.9923943570949316,-0.3376169984457414,-0.2477407871567355,0.07570280153526215,-1.3201124370020751,-0.5818133350960212,-0.5448428462259471,0.6190530387684703,0.7968618264421821,Class2 --0.8720774484234107,-2.209159479418383,1.0584507134424297,-0.3411335291787467,0.9200882521254942,1.3450660715216587,0.19064857239760477,0.4274440879438738,0.5087420744129393,2.303181492232073,-0.23106359091129583,1.4344449349495845,0.9535166341811419,0.1948372784536332,0.9744600052945316,Class2 -0.10940705369027072,0.2440273862637023,-0.861691939466451,1.3901949744565205,-0.9297022247401542,-1.2949902316772757,1.0144626107947674,0.7686204399288631,-1.2685212911991002,0.8519026873029932,0.011807183994871033,1.0714956770568473,-0.5455154669471085,0.08246214035898447,0.6834983485750854,Class2 -1.6779436640267364,1.880799787388552,1.5204104145099313,-0.639123568337924,-0.3505675303723447,-0.03233244574241792,2.097259949080212,0.4246036448838318,1.0885808273943964,0.01036354571300559,-0.7584519822360082,0.5218291577093911,-0.6052811164408922,0.16518363310024142,0.13070064689964056,Class2 -0.4938333744580041,-0.7008341966738021,-1.5056275596596962,1.29868120163798,-1.6102438384051592,0.45715818132815844,-1.178085648102229,-0.1513950999230687,2.1673270421562503,0.1975432812649893,-1.1664626946342205,0.9238361164536154,-0.13920008623972535,0.9532415631692857,0.9949746441561729,Class1 -3.3301340368729226,0.6134802787348081,0.11013833353720964,-0.8126356188705153,-1.2643045122345355,0.2777146050207128,0.35833021915101776,0.552730359813976,-1.6943104980123493,1.5693314913092786,-1.878954155220135,-0.6717275466444146,0.841799842659384,0.17290790961124003,0.3010888493154198,Class1 --1.4874950009627663,-1.0006583558308058,-0.5113365089075377,-1.3972734636561934,-0.3874926717404662,0.8687903675448958,0.5392772927144273,0.4139984421836043,-0.7890202766444806,-0.32648586148640524,-0.7021397298499431,-0.19421780268754546,0.8648272948339581,0.11049421248026192,0.0015585715882480145,Class2 -0.09117820211377947,-1.117881056388193,0.6044905592799283,-1.0572587983674864,0.18266594609970427,-0.9111911333023858,-0.22993252615621834,-0.7105195171165576,-0.7164211763442924,0.26961045300900477,-0.9327425854270883,0.15395400847152657,0.9014754514209926,0.41861609276384115,0.7706281035207212,Class2 -0.17464682736904902,-1.4751518295949322,-0.1677869663725886,1.0618107792243867,-0.006374061077409496,0.4960399092619021,-1.473364450777728,0.8834067477204706,2.7938183144528033,-2.744402589982555,-1.2234018099176842,0.9034676230093962,-0.6973279574885964,0.6544805287849158,0.26766279293224216,Class2 -1.1393319056174818,-0.23749721574750604,0.016221386977397834,0.8747313132571156,-1.19512729364277,-2.0077761805674332,0.48850816726480095,1.6368732473393113,0.6487268515517419,0.6965326216617943,0.6249118320691255,0.813748924290225,-0.00474792905151844,0.21398359979502857,0.9184325013775378,Class2 --1.184414719694045,0.68189168285418,0.39029817919397186,-1.2599899319767562,-0.1769880797194769,-1.0939224278943078,-1.0542221117676973,-0.544486093346967,0.35361577404906164,1.5841360146470551,-0.9115281200746568,-2.326108721386683,0.032834847923368216,0.806791502982378,0.719368607737124,Class2 --3.0390119386032928,-2.847734704350192,-0.35745742459732277,0.27885692680582597,-0.036163175701983,0.913424780896116,1.2918673717124602,-1.5452377307911747,-0.8409250948163639,-0.8916931578039293,1.4843107905223796,1.0786963650074517,-0.17939824471250176,0.41030770167708397,0.20933884242549539,Class2 -0.126398422457067,0.34854940862985556,-0.8024704134974328,0.05383480276263303,0.9229759934975823,0.3038778588009273,-0.7231485486070164,0.07864336601828023,1.4556189109134663,0.6771022044904953,-1.184397238799768,0.6550324454083016,0.9488121280446649,0.04664732166565955,0.13153074448928237,Class2 -2.019255366383197,-0.6354364440710188,1.6145919820095407,-0.2726885064300857,-0.06547021081760716,-1.36495757544397,0.03768098459598789,-0.7320368008239001,-0.3199056974784905,0.7209348248958845,-1.3327340533242762,0.39981330084026995,0.9267955445684493,0.29800670547410846,0.5153857958503067,Class2 --0.8427094030144798,-2.1321975892945617,0.8544411504799286,0.9761800430833987,-0.5210055331319482,0.921642049464246,-1.1853745707397838,0.32742877782510404,0.5826312374932042,0.41571869890177915,0.7482603761648998,1.8414362052087754,0.030755297280848026,0.2677662179339677,0.24466001777909696,Class2 -1.480768214399412,1.2666964857973364,-1.3751700035457846,-0.3013900807160579,-1.2267035638202706,1.2385559076231454,-0.07832344957864722,-0.5325415669480841,-1.1573251507073317,2.8557679734862425,1.2898396451175769,-0.6169419441055902,0.034686589147895575,0.18103653052821755,0.48233850160613656,Class2 -0.5082688811703688,1.0823737185816973,0.5166171433210012,-0.7310601365115369,0.5583843587202555,-0.5946130151859381,0.052477964288507886,0.5106487386380304,-1.3416161493758962,1.084998054782061,0.9013916659230412,1.508086150231608,-0.19438640540465713,0.4253503440413624,0.6168445413932204,Class2 -0.6998541543290471,-0.1755260458369438,-0.9885799321126137,1.7912607326654726,1.7109092755857436,-0.046241269749531674,-0.5513924999700247,1.9341295797664535,0.7252083271851854,1.3619994448130834,-1.5618654732480302,0.2364413559851967,0.6588873122818768,0.5415769009850919,0.027812961023300886,Class2 --1.095434310267111,-1.0385874054984627,1.881472295159209,0.6241030185490143,-1.0147596881641978,0.7637472179594208,0.399538031451646,0.7373532287927704,-0.3453981140323258,-0.11622826238791889,0.39212899716489397,0.49599234331000663,-0.06264378689229488,0.9349857603665441,0.008539164904505014,Class2 --0.41723461642941895,0.47357434985056,0.44746431246470086,-0.2692659794718666,0.69391853830344,0.8723944174772161,1.2424264367897173,0.19040186336763007,0.0069424280809396565,0.3233030652811436,-1.638705720317958,0.03452163444887525,-0.7681652260944247,0.7097720531746745,0.8223485986236483,Class2 -0.8225660224343408,-1.5645469600329438,-1.8266157881329834,-0.9099779769501615,1.9862943017795036,0.7594949281014884,-0.6761095233508587,1.454023125351622,0.3372766946034141,-0.17148229929441522,-0.11757482396050228,1.057623486599479,0.1980727850459516,0.2625384994316846,0.5020458535291255,Class2 --0.09689592506567296,-0.2215487977021351,-0.21689131803998804,-1.2804368621573579,0.30927754144347647,-0.7108157180389113,0.08049294933452461,0.2206261083998055,0.3864586958947524,0.3995835774266125,0.07757661444407546,-0.9216465706800661,0.46435299376025796,0.7745161396451294,0.8999817159492522,Class2 --2.5991155417515,0.7360839031787576,-0.6386956702949736,-0.7340039363291657,1.2043199928407509,-0.08253718224748471,-0.6579572436134417,-0.02654971939252105,-1.264220780653683,0.892859129061068,1.0349762550878854,0.041982822809006996,-0.918132433667779,0.8007582589052618,0.7588812636677176,Class2 -0.09808487833318225,0.5469795610738414,-0.9112965835353585,0.856552673424558,0.11121875686779029,0.08112359277469418,-0.6228301786886029,-0.9341402013206609,-0.5787352376167655,0.09681835026990344,-0.40550350081755016,1.0740838664740302,-0.8832133542746305,0.6944401822984219,0.6556417630054057,Class2 -1.3604977806465488,0.7074017809995459,1.2725824559750725,0.46411988856030345,-0.6309264024894828,1.0499433336996502,0.4157362752622049,-1.2192914187116644,1.304191415048806,0.19396641117967992,0.07806689097353178,0.36743304532481197,-0.3822319698520005,0.09500403795391321,0.399614178808406,Class2 --3.095428272554381,-1.6990800439936902,0.5306027550679643,-1.6458667179513962,-1.7981285223649326,-0.20224501253890514,0.5130982067432229,-0.5622261721972847,0.7901247627001531,0.7711656389328703,0.31807786494860774,-1.292983566830667,-0.057367528323084116,0.4538659246172756,0.06397854490205646,Class2 -0.08434991373485773,0.8503668103911179,0.6688478681977911,0.5516851436365647,-0.9313168059190403,0.9085746690595528,-0.515121720780239,0.9146327963234825,0.19815074051082585,-1.1114317053707665,-0.147492868040164,-0.9047592816944584,-0.41547991381958127,0.22578743775375187,0.0936333907302469,Class2 --1.6111233718903222,-1.44024670616309,-1.4405747713874346,0.6312646095933051,-0.8726547931237857,-0.9570410425256319,-0.3342839234259029,-0.2355842465179411,0.32641144508901543,-0.4761121509630666,0.10160712417483253,-0.575680641171654,0.7707022344693542,0.3366796125192195,0.7125510922633111,Class2 --1.289433009749717,0.3115786133612422,1.3883788597963247,-0.06330329736302731,0.49354562815030795,0.593140761066593,-1.3395292140035888,-0.7531091749121933,-0.10896727861754318,-0.5640332598922566,0.663886255290856,-0.3113379967296793,-0.37180795101448894,0.8200437403284013,0.8697297726757824,Class2 --0.12789715508136668,1.216228212743173,-0.20594754790177924,-0.6939204513531956,0.5421311588587181,0.5485622979093078,-0.7345972233438769,0.5930539919236715,1.2351919965361988,0.4947813505116787,-0.5276079917778979,0.19464364491782868,0.18534362921491265,0.2440303701441735,0.2614529188722372,Class2 -0.8680899433800909,0.8011241770173189,0.6294493302053065,-1.0231839821978346,0.37437604117057516,-0.8352287398084594,-0.11107086369217756,0.7806401453893734,-1.6757003856154187,0.8969318199178657,-0.8141212743460848,-1.2705365545941523,0.6712188399396837,0.5392935508862138,0.8870175327174366,Class2 --3.2476661836832745,-2.7975467145420314,0.8019099240830501,1.7683082505450625,-1.1216203178479967,-1.5962968156348316,-1.063206917610347,0.7483682982694785,0.9217805894594131,-0.7494473431832323,0.9689152767986259,1.581232413489262,0.26931677758693695,0.6931116189807653,0.2578695872798562,Class2 -1.1006036824742313,-1.2438976144004243,-1.5916542231346036,-0.9429147407496064,0.5454174972220943,-0.17245067278505885,-1.7237754055221162,-0.2645565222131994,-0.37681016264475264,1.3023692743739517,-1.2136394168630813,1.2478641905000558,-0.6815658565610647,0.13308947626501322,0.9576302254572511,Class1 -0.06492894007571026,-1.1368436672276552,1.363669942346805,-0.12560374004347996,1.2514202930855627,1.4834381180758311,1.0923135326676274,-1.033013395911088,0.5474120212920948,-0.8498840019401699,0.11845665867103414,-1.9818104584408436,-0.019780609756708145,0.4300570418126881,0.31975622032769024,Class2 --1.6911215579878016,0.1692249446751236,0.27987747281404945,2.1200296206671054,1.6183787773462954,0.17450074055497317,-0.14361146894779506,-1.0557519701370048,0.09867758216523131,-2.0112355224589415,0.6335366050601451,0.890884994040107,-0.8461334076710045,0.07516371249221265,0.34141005482524633,Class2 -0.5710482776629926,-0.11812615869870294,-2.2515654704226704,-0.7200165314380969,-0.8270537525840717,0.24426804332174432,1.9593614352074977,0.13883165507374093,-1.6489236389868551,0.8899351297482978,-1.0400617066633004,-1.2848084502646124,-0.5887424070388079,0.07363066170364618,0.9245845125988126,Class2 --0.2821074991702139,-0.16077773594560135,-0.03748602080276107,0.04319499667818465,-0.14041637635493567,-0.3111943639300142,-0.8785477407680724,-1.1335850180144338,-1.3529913234864346,0.07673568133954452,-0.28503515221851833,-0.8914641295025901,-0.3758590701036155,0.28947957674972713,0.13462537387385964,Class2 --0.7535572074964876,-1.721339295574827,1.10532319445672,-0.5244950985581185,-1.383335273195793,-1.297732382606809,1.399875882984046,1.137258255680765,-0.5893061401337238,-0.8359968947056239,1.0746533287953774,0.5618644867755643,-0.743406324647367,0.2403317883145064,0.29974141367711127,Class2 -1.959387765425843,-1.1581350139733038,-0.3995805766578545,1.4934205928059465,-1.8327900362715357,-0.38835372145889135,0.6290419878560701,0.5793146996168099,-0.29408537656756717,-1.3940090451527236,1.2066091429376344,0.982954701119501,0.6388404914177954,0.7992547827307135,0.5245513641275465,Class1 -0.48928791252156933,0.5371430733139647,-0.4363285697990484,-0.752404915299664,0.9020338365547255,-0.8531618358660644,1.2270562421029108,0.2682873555605734,-0.21431852446889185,0.8033801548445123,-1.413563434453448,0.21864570397006572,0.5659606768749654,0.6704334327951074,0.2423744765110314,Class2 -1.888348298258245,0.7061130830227728,0.33938558431793026,0.3657041865078498,-1.0230883826800266,-1.1813847884482136,-1.5368501853681602,-0.650989447797534,-0.10444429641081135,0.18760963616795054,0.7662819895742762,0.4222523302233684,0.2773844148032367,0.2873248609248549,0.961966798407957,Class2 -0.3016372580895735,1.1578904493713371,-0.7614421169571296,1.0194199568964502,1.0098502167919872,-0.5329351353336791,0.7750030560988916,1.0943216736399832,-0.34965102375231233,0.0496440757852631,0.7815526903926308,0.3858399270085403,-0.9219654020853341,0.7865092158317566,0.09440285665914416,Class2 -0.8128426104118429,-0.3085916903723959,-0.22362921432161814,-0.9248351675059381,0.7620537970531213,-0.14708546551182242,0.6071243128584185,1.440835655473309,-1.1893781908251908,-0.5281303860726634,-0.28279167757299095,0.592120202475751,-0.31334456987679005,0.8985423860140145,0.264654525090009,Class2 --1.4933343357052817,-0.24616557437768316,1.3233878913405672,-0.9357191370288549,-0.27862636584467587,0.28417412755705923,0.32906154477775473,-1.25875551808109,0.838944692663353,-1.3083157265100873,-0.7697832004734423,-1.4462763392152973,0.8731005820445716,0.25921259028837085,0.9993454562500119,Class2 --0.3555592821253492,0.014140717630516386,-0.22407126434566216,-1.2098944739163344,-0.034548562757249886,-0.6821821410474014,0.999626316871889,-0.4726414568885838,0.6357496008151513,-0.39709095609449147,1.2468446633139656,-2.5413214480763555,0.6768334158696234,0.13006173563189805,0.0696621669922024,Class2 --1.859251735024934,-2.5422954944023664,2.2103786221550363,1.1767979178228292,-0.6629095840462614,-0.5827736902072409,0.599705772763201,0.8349401952483019,-0.27629982634429934,2.3359665118781807,0.14913141753341422,-0.749079697364272,-0.8026763997040689,0.610700432676822,0.826904997928068,Class2 -0.7711094960766228,2.3967043094685487,-0.47217083690373285,0.04855855736474654,-1.0595735696461395,0.48805715340159506,1.0534775836137704,-1.5200277203750887,0.3074458580332651,-1.9954085598685414,-2.0572950009690474,-1.0531635333854237,0.012135877273976803,0.9118862519972026,0.5698403662536293,Class2 --0.8392185817996459,-0.3739094038012992,-0.3718636688755847,-0.40469473024765423,-1.5467509956276801,-0.058524024511348534,-0.3522630355350769,0.5767827579946322,0.7620314307663026,-1.3535501814406519,0.8610264419841968,0.27309012945203487,0.8461509505286813,0.7005063542164862,0.21119967056438327,Class2 -1.0829807976520693,0.5784050582547151,0.04950550856163559,-0.6662515531468727,-0.2423531216367455,0.8940581188011196,0.29084392381174184,1.2353730786862802,-0.24074553501612003,1.816352716475315,2.2021914678003993,0.6517050288749897,-0.30985594168305397,0.6414011623710394,0.2295034967828542,Class2 -1.261648055259626,0.9198427979686054,1.6085976621349176,0.10050232899379687,0.4665273628406434,1.9369573166938625,-0.29626542181604104,-1.8667039046896385,-0.3030564910098261,-1.4502937448994373,-1.9014896956624094,-0.894845663283303,0.8189587639644742,0.6190797011367977,0.8734656744636595,Class2 -0.3811115271478121,-0.6278897430475997,-0.8298295884309468,0.9826758349264612,1.916710731064909,-2.2843606469628455,1.0682599438777605,-0.5604083006269931,0.7914915632645947,0.443127179004767,-0.08365415980858237,-1.5212306759607481,-0.23640506714582443,0.5490541043691337,0.15249080141074955,Class2 --0.8384873778868652,1.9502176462621814,-0.22927863121513384,0.14056919551036604,0.43840738222743975,0.598544673206158,-0.0879701566769304,-1.4559523869221644,0.6888129504405491,1.741547469279251,1.2360236395340076,0.6778626217995491,-0.7259071371518075,0.01466889656148851,0.7698835532646626,Class2 --0.5987399654506124,-1.0595013580034738,-0.40349570945689767,0.39800071070388265,0.4862728175968152,-2.9974930121986434,-0.4297006746284626,-0.4846770971405347,0.8254796751043616,0.3138346307551538,0.49927913863744317,-1.8791125741631498,-0.03288379870355129,0.5517823903355747,0.2493961090222001,Class2 -2.8011272801978406,1.296187520292237,1.24080053843463,1.164906525619434,-0.6227815434497219,0.3124250536027738,-1.5126474869528554,-0.5058038303398453,1.5524351175411106,-0.65523989045427955,0.664115132316647,0.09120696756729293,0.43735848693177104,0.47749555739574134,0.36867220769636333,Class2 -1.394175634942916,1.879942352520698,-0.9420431645919962,-1.3660001966239175,0.3212056893993314,-0.4793108670856517,-0.05770104598615946,-1.6230072003602982,1.2231567971190467,0.20899984557471227,0.5810888683507704,-0.380356660823801,0.6395287690684199,0.8582562122028321,0.7197878889273852,Class2 -0.40826517740557516,1.1101500478623267,-0.4298934417715945,0.016235084541114605,0.26208095686013505,0.39378034694148123,1.1340854259870887,0.15092118681343045,-0.024224687135369564,-0.5996369549657207,1.6240264004643692,-0.05858343597024636,0.10767775308340788,0.9785811754409224,0.9420401088427752,Class2 -1.3937918919432624,1.1817603472189842,0.09401193730885025,0.882631209700629,-1.876030835132518,-0.5924918039804116,0.6538815628677926,-0.2640692363254098,0.1754260253413089,1.2131454357573563,1.5811122018176937,0.7246005330694562,-0.8364726915024221,0.2719128606840968,0.22372815595008433,Class2 -1.7458671084222046,2.35114260343974,-2.2660418544623386,-1.4633588003415143,0.7147645214701739,-1.1944000501014727,2.601871722676162,-0.48544209949560924,-0.09264080148081594,-0.22700008782471065,1.1967024959209434,0.30049258321237543,0.45176961459219456,0.6877703829668462,0.44865825749002397,Class2 --1.339008377548138,-1.1492123797855445,-0.462518095573695,0.5853374385307312,0.8394134012039212,-0.8768224903671038,-0.7423791704241162,-0.21094299269334793,-1.8874701125529918,0.872781493816141,-1.281861232721372,0.45796967398396793,-0.639517336152494,0.4217387412209064,0.33850614028051496,Class2 -0.25201841762733845,-0.7948467524393324,-1.065763369820225,1.9036988774042012,-0.5706449368736288,0.7579129076077472,-1.1287504602828904,-1.5448617266750717,-0.910239235147842,0.9648251746735053,1.2431728473157675,1.276676260495425,0.48707864759489894,0.22550283162854612,0.5172691494226456,Class2 -0.1500546559344345,1.0729136628674483,-1.346972294177022,-0.15461013793735748,-0.37988243711016384,1.0852528625058095,0.30405055549235915,1.6241464694999626,0.3057765292390513,-0.028519490530794556,-1.0153980670485663,-0.2625632927010443,-0.1991919158026576,0.6556327124126256,0.6134018863085657,Class2 --3.007680927141962,-2.1182772514922865,0.4691234465753607,-0.11088790230437114,-0.7863541454350711,0.06236062464035526,-0.6066874159566547,1.3272784617577766,-1.0251810916757211,-0.1523303847488521,1.1311090503796184,-0.7810429486361269,-0.13376467255875468,0.08491205214522779,0.1676695963833481,Class2 -2.995134830532502,3.1391153008404094,-0.9557423872051607,3.0080504173942626,1.6291797974329394,1.189260868850588,-0.3706527609812224,-0.8911079076804692,-0.41313620206813206,-0.4668181164922639,-0.5763749859816794,-0.21012709038797125,-0.10060620633885264,0.2663822746835649,0.7376735911238939,Class2 -0.7551425836757153,0.44659396243428046,-0.04537875205661128,2.2007798138323262,0.8992624627441765,2.1495796082210497,0.22716926659603667,-0.2171860930746129,0.5901172610127718,-0.23270678159246258,0.361757753246937,-0.8511497015095885,0.9926285645924509,0.3725008887704462,0.5090815254952759,Class2 --0.7763645620930382,-1.7243541275754548,-0.6738753999500952,0.095922495408917,-1.0104285460445754,1.216945813797776,-0.12946682264228812,1.520285729394769,0.283141199499483,0.14087377906679252,-0.41830037717535956,0.5099416067382305,-0.36647544940933585,0.329908364219591,0.7945279169362038,Class2 -1.5411718613888428,0.4962582556091847,-1.1102164106698296,-0.2811338080942961,0.4532029840402212,-0.6391277773586479,0.5018550496769407,1.5297992904070976,0.6668360046233172,0.7197413632054581,-0.8420689994133682,-0.40540128345677234,-0.6676776059903204,0.6235791442450136,0.9583233420271426,Class2 --2.5792428452368092,-1.5557271670774018,-0.5540181980409857,0.5760682665601589,0.7372612641400151,0.1350459366733723,0.9399610802717738,-0.1289696717656669,0.19668208853839478,-1.2502704714247679,0.20891723918878263,0.5898856803950485,-0.3815101608633995,0.6765703002456576,0.8932633243966848,Class2 -0.9165854327905085,0.3965157476888304,1.59604778793905,0.10448817046032247,-2.1597067309425015,-1.2085217132053336,1.2306232808567361,-0.03723656904412712,2.1621608257113345,-0.47918215512490575,-0.06728707468312738,-0.6307899495383108,-0.7548449616879225,0.31574422726407647,0.06215440551750362,Class2 -1.924074958382994,2.1879516560228933,1.7039989462619134,-0.423336298448791,0.3999804328938057,-0.7729667223052973,-0.24929974850814743,0.17580712233046383,-1.8458331286306697,-0.6197244251739489,-0.7587507400145889,0.8869045853208114,-0.4819421377032995,0.47674252022989094,0.3315102106425911,Class2 --2.387078310911282,-1.3455870826212002,1.7143604843598406,0.3689018057620339,0.1889832653798478,-1.219242391922078,-0.573493625192601,-1.60657022251309,0.4924337510041778,0.049007499261991515,2.497756971117939,0.3138410020516604,0.6927703483961523,0.1996137301903218,0.25034563196823,Class2 -0.6540444708705963,-0.3693928276292687,-0.8075271496378246,0.19082124278559912,-0.06604399131658466,0.24562357226759807,-0.8755537440029307,1.2157520640153174,0.01165767775875408,1.9739165106839078,0.6148653702412629,-0.3314076823554798,0.3773447903804481,0.3028455078601837,0.8417212681379169,Class2 -1.479480875482844,1.8291452120960716,-0.42924260417149285,0.39145488283155727,1.1930248202068392,0.8964917352675564,0.42957718921566923,1.0542862021771453,-0.4499113490762092,-0.04766636373314316,0.2605427451263393,-1.3723333045890234,-0.3903999733738601,0.9200080062728375,0.296731517650187,Class2 --1.7142885775724754,-2.9703221691735,0.7186499314078981,0.5352981691018086,0.9311886679275788,-2.0214010635593653,-0.11736019678370417,-1.1869777778517443,-0.022467624347445602,-1.041997430967388,0.5176695657658854,-1.6264491589994994,0.7989456080831587,0.3862670089583844,0.5181120906490833,Class2 --0.04460881277437101,-1.8771692531100177,-0.1759230454370643,-0.8647226058678374,-0.18920664007472432,0.33292327862361704,-0.7081717536118399,-0.2022830557202698,1.1041170342803652,0.8605198346039813,-0.5978391279807708,0.8111870115978903,0.17316450737416744,0.6457009464502335,0.3704010855872184,Class2 --0.9281893540321867,-0.20489831345478782,-0.5140295972790968,1.2112431116697402,-1.4092036525118765,1.0386167256512204,0.2871056282423409,0.7065639843832743,-1.1647241882803039,0.2476932590638391,0.6371367113691194,-0.1757310666784823,0.9343434851616621,0.8495516376569867,0.935125581221655,Class2 -0.34613667804712545,1.1854936652470345,0.5355053057101441,0.4112652351338425,1.6202289691279164,-1.181003366049436,-1.0069855557513496,0.2998488029300506,-1.655603021466593,-0.20655048834738776,-0.37534208757047527,0.4658592203630541,-0.2372529087588191,0.7373468265868723,0.946149867027998,Class2 -1.19287830843688,0.8266174147662064,-0.9992982852032349,-1.1653154832173565,-0.7176189287835616,-1.670883295055081,-0.25584837565679613,1.134419920629079,-0.6518664458534258,0.6178512982834525,0.32303316030543927,1.8399303802651203,0.15413537016138434,0.3673723351676017,0.023675992852076888,Class2 --2.88535949089754,-1.8613752298575847,1.1872414054828366,0.2544903682272028,0.04133874495984773,1.236280691461127,-0.9386070330570618,-1.5486926355643125,0.9035305276214088,2.8806771831722875,0.9783900345298662,0.026248873021877353,-0.3094399878755212,0.47060736548155546,0.13799658743664622,Class2 -0.22892830687792398,2.510161979849391,-1.2691212255116822,-0.1366586994146435,-0.45712044429721044,-0.2626786247292245,-0.21116805921938994,0.08053606579650471,0.16664954890454856,-0.777350228792387,-0.8834535011263239,1.9850210112127045,0.6113662235438824,0.09913728851824999,0.41804422507993877,Class2 --0.909534296376911,0.3042334918504751,-0.024350748545438183,-1.4609506851459968,-0.9732729670232957,1.2648842270403833,0.13030654418060242,-0.9581895717746532,0.406649984435291,-0.4819065728885069,1.3496149829145756,-2.4491250686910315,0.9129457273520529,0.2282875960227102,0.7535724667832255,Class2 --0.4636546760689414,-1.6122809296051437,1.3195391109029888,0.8521587229627585,1.3448539982662713,-0.2047889787448859,-1.2473837394639613,0.6152999380371443,-1.1724213567320576,1.9153963116112875,0.6632188829463725,-1.5574484556992452,0.8538573090918362,0.36915046186186373,0.017901836428791285,Class2 -2.520072179932133,0.8163871408055332,-0.05823623017800018,0.9567731173855086,0.1468997193788415,0.9296933994430445,-1.9805933822647506,-1.1338776700923223,-2.4162596458498835,-0.6748512463044399,0.7527756378416751,-0.15086094115240858,-0.8645896180532873,0.1871925292070955,0.9855184846092016,Class1 -1.1372965509901602,-0.3074738586819779,1.179659136383687,-0.924876016160083,0.7397256195217327,0.40002418512495613,-0.25677998835853494,-0.29856871624374703,-0.7898412624355384,-0.9303682027881571,-0.18756145677356595,0.8917415742738516,-0.7765244687907398,0.9425151185132563,0.6435877503827214,Class2 -1.600744605671999,1.3145245857650072,-0.6986165226518528,0.7880386291311289,0.1328990454776007,-1.0355115747900492,-0.4974381498464177,2.09970587483608,-1.0505810495032812,-0.5690683572128357,0.9633617661327057,-0.650126200168838,0.16101583186537027,0.8240150983911008,0.050568426260724664,Class2 -0.7885293075857875,0.8014147367049487,-2.2545554139561714,-1.2320529138638394,-0.4745083278163085,-0.5821469556694693,-1.1004370593579376,-1.0420107867870532,0.6111365190092471,0.32429884940849074,-1.9508928938517383,0.5028979940243019,-0.35964091308414936,0.24926127633079886,0.0036124265752732754,Class2 -1.703821170005293,-0.13333801631351483,0.267795083314022,-0.17428172189460178,0.22766166984112635,-0.19803775780309177,-0.6218107684705555,-0.8163540962711475,0.6863104558498238,-1.461636954850202,1.9966426009391138,-0.3733051669143706,0.6114137866534293,0.6068972253706306,0.7072538409847766,Class2 --0.28456707228517264,0.51665730254961,-0.5464749368327533,0.47610463304697975,-0.5230961956755547,0.13137053385270073,-1.0909459976189417,-0.9109683527295975,1.1554638649046232,0.9969728088900088,0.01584256514759891,-0.9857657810812712,0.746659382712096,0.42052079387940466,0.7587800293695182,Class2 -0.46453998392423934,-0.13938238259290078,-1.9098872496440849,0.2277144501843294,-0.161575738089434,0.8884486413525582,-1.0959488908188748,0.6617042308432599,-0.12444615587021476,0.6130455630577024,-0.055619038511711655,-1.8239574103597056,-0.2600037381052971,0.008846573764458299,0.36588318971917033,Class2 --1.9356590199261825,-1.758757581095096,0.1724009095234245,-0.19202530295611905,0.11359379731055627,0.5878935673609197,-0.5474912198902655,-1.2014123626993063,0.9092824666871862,-0.4664761531014023,0.17903929934303392,1.2070443400305673,0.2853454411961138,0.6790230278857052,0.6149554222356528,Class2 --3.023593924542292,-2.3423532649042644,-0.9241805320703614,0.17913936105120845,-1.3744857674796207,1.04004130904341,1.8548059481390717,0.5044348501796523,-0.3751459392233201,-0.5317146242468814,0.25187095712251995,-0.6833890520504401,0.8027068437077105,0.9984008849132806,0.5310306660830975,Class2 --1.3775342836881106,-0.8388416820937015,-0.5389495431318484,2.704305448241171,-0.8416995552081491,-0.29993707720658547,0.7303779671515606,-0.35841225864008924,-0.33588889634937225,1.0920266993224714,-0.9168848375974683,-0.27089432543342495,-0.3853367967531085,0.5396553527098149,0.4239288039971143,Class2 --3.11078495689761,-2.842691128671566,0.05254942626319507,-0.34885864122876303,-1.5057749626030734,0.7620068263662869,0.8941478600991717,-0.361067495981168,1.4003415852162955,0.6327984369798643,0.7594970803895699,0.21059314979133445,-0.38973634876310825,0.24111055210232735,0.2439523001667112,Class2 -0.051727530149850764,-1.3740709261474604,1.088449210729998,0.6833874328485038,-0.3433440801277598,1.2538741751270763,-0.10840351195341202,-0.5084770922672077,1.0334827632028016,1.9986042080333881,0.3409333581440639,0.5311783202379617,0.6772673521190882,0.6240170185919851,0.14756600186228752,Class1 -1.461640712725136,2.836188285436795,1.0896452666301293,0.6213055710072409,-0.9308726284279001,0.4907071322563713,-0.07558666575828753,0.7070634417684868,-0.6140681287095965,-0.257377217659149,0.7212277272923848,1.0724699266100737,0.7016322333365679,0.22793083428405225,0.006647894391790032,Class2 -1.2284741040632956,-0.285176334389895,-0.2724172928165197,-1.6455341872330531,-0.3875047208569029,-1.2953564121047358,-0.8277559455497756,-0.2972951769863404,-0.4704935308336626,0.7280509373061337,-1.4783577154992378,0.3644313649648723,0.15787449153140187,0.20570955821312964,0.29104063496924937,Class2 -0.22127457864260563,0.47953853456456014,-0.33029341876094054,-0.5471378607390265,-1.2888567151046328,-0.5920697941856831,1.8673405214647576,1.2550375343262452,-0.5283154942543873,0.1279720190077603,-0.21295345148085604,0.8321431064342425,0.8632336244918406,0.652803091565147,0.907916204771027,Class2 -0.2177532096247693,1.3134344452587365,0.5572674585898425,0.009833120032373278,-1.0382449502180384,-0.7466042790097995,0.1486604154394957,0.4387406278955141,0.8897374498466192,0.07784475757316917,0.13801590668003919,0.7486519647180748,0.43962670071050525,0.2974882717244327,0.05652219685725868,Class2 --1.134919345722507,-0.13742270766295522,-0.36378160695294465,1.038024359901001,1.0024713522055428,-1.028145408691236,0.2803626497853103,-0.8469477388064585,-1.6071358093248507,0.6469032740565837,0.06320167537770163,-0.34059091259102364,0.295511360745877,0.28775316569954157,0.17534724087454379,Class2 -1.5373743537291022,-0.14300408124427577,0.7854275126182126,0.5576987642781186,-0.5575966575603053,1.6332665649007174,-0.2966196024294295,0.4098481619616168,0.08627197846881478,0.6350225436082466,1.0813676012469327,-0.11201816423742991,0.21638715220615268,0.8479305920191109,0.8018233766779304,Class1 -1.2073006861804754,1.518036314020638,1.2598276744070303,-1.2348946070874838,-0.19229317630234655,0.6409412585655575,-1.2856220694950014,0.04745280796759979,-0.2853038439277522,-0.11392621532927852,-0.5565641122721012,0.5990351959279064,-0.9924412951804698,0.5906498711556196,0.5796274768654257,Class2 -0.4886364238272469,0.39145302740280874,-0.13212749730629636,0.7138136170352181,-0.18406820330064755,-0.991957788221204,0.3463959359143711,-0.5035787194084653,-2.012117510801601,-0.5142184966224859,-1.1570710960900477,0.7307538630148845,-0.4238840281032026,0.025492386892437935,0.3298569917678833,Class2 --0.16102471705437427,0.5386439526703835,-1.4276398482060828,-1.663638523413546,-1.3137595917283718,0.6814307542903795,-0.00424726128674445,-0.799634180319008,-0.4925621874419887,1.8371293464319176,-0.7645663172868515,-1.2458802992718343,-0.09204849926754832,0.309558876324445,0.6147326256614178,Class2 -0.47401582425965155,1.2010519930627308,0.482707403826344,0.1602439849872446,0.037116578654907355,1.5315419389140332,-1.3141863731774701,0.0603970890490958,0.9931375538944314,-0.785694476768981,1.3136537346709207,0.4000552961997571,0.07321321172639728,0.27534111333079636,0.3367095263674855,Class2 --0.117058345644114,0.351489791376063,-1.8088160566415077,-0.6202040336860083,-1.773032569132811,0.750110650404125,-1.3737265632151225,0.24841757901788564,0.17972768688752844,0.923442762182171,0.9957663390627021,1.261988811238937,-0.8510403372347355,0.8759079019073397,0.4060077981557697,Class2 --0.41605710086301306,0.2511947817801802,-1.329920329031069,0.5844716623934365,-1.1730170746533946,-0.5616134748230321,-0.8059486031732538,-0.41578394412971553,0.5620034741726917,-1.7277660977933882,1.9973994786827147,-0.9946827756575541,-0.04597825929522514,0.8873530491255224,0.8140319222584367,Class2 --0.16173272989364557,0.9708961752302916,1.2398187675610892,0.9706479110043946,1.1840481738819664,-0.7534530333540239,1.4070485156467352,-0.17369322478914864,0.13141742851957375,1.3989163750856772,-1.2133848240102094,-0.1000676354237985,0.8439424657262862,0.7862748834304512,0.9268013858236372,Class2 --1.1217088276921732,0.6704115310215879,-0.645853370580619,-1.3310120672176997,-0.23673469381723128,0.4375889607989972,1.162630904593611,0.8003239744308743,0.3821304219671022,0.5725801004592236,0.10114199010508476,-0.5693733028852233,-0.8713306835852563,0.6104276436381042,0.8490001687314361,Class2 -2.097488046677572,1.8166484827125537,-0.5784859247007688,-0.2832257650698195,-1.4073105109064172,0.9643688248520651,-1.373657839050931,-0.9559290605918614,1.363054137142165,-0.33024029574962177,1.6814497355003102,-1.3690491022987201,-0.04328893031924963,0.6075932753738016,0.9884021254256368,Class2 --1.485094697694485,-1.1270125114623095,-1.0850418896636942,0.5500489834159522,1.0962233581412795,1.4830283055554705,0.3329088375299195,0.04012994761072386,-2.2093970771564773,0.5277431152861348,-0.04903599139791147,-0.6294136146404766,0.09858569456264377,0.20920007210224867,0.022409719415009022,Class2 --0.34798996381730496,-1.8443533331686102,0.5533136570263368,-0.8838322572053908,0.19671112233842472,0.008381480374244615,-0.9725354905276152,0.0347809219848428,0.7072538944909841,0.5976912116506788,-0.5285248514604547,1.847206642951911,0.6457532113417983,8.993502706289291e-4,0.1351538256276399,Class2 --0.43372550502941787,0.9817894275003881,-1.855030278738211,-0.02904939237377189,0.49012051386391253,-0.41738500025776765,-0.9625011078584821,1.4768476180826087,0.8901720587166421,0.7193340820931526,-0.6161853933597299,-0.056697352769062875,0.3598514813929796,0.933630179380998,0.30146874487400055,Class2 --0.8540346975370738,-1.5092335933517758,0.6546266602214613,1.8860212900021953,-1.570800641279812,-0.5168338137944837,-1.44132096267242,-0.1588969669135282,0.03865380675648789,0.2385102554960604,0.6278740893043997,1.026180624305068,-0.94772376306355,0.3152677861507982,0.2504021590575576,Class1 --0.6119141834759444,-0.3832142768959427,-1.5248374514841558,-0.6992664077583117,0.9860813621796046,-2.9795644315141603,0.27494432912619765,0.1813100735935199,1.292709952637955,0.24439479640644224,0.5077421545979816,0.657898507833395,-0.7554263710044324,0.6483246434945613,0.9870194806717336,Class2 -0.863404467348387,1.2864711707450325,-0.8386353723767528,0.20012477610597834,-1.6898732524209563,0.0310655525725295,-0.5830600567589076,-0.9872503839748825,-0.9646013474106238,0.5251931230207323,1.5193358609016798,-0.15884639212637558,-0.00568727683275938,0.19103143084794283,0.6851819332223386,Class2 -1.371145828295448,-0.2460349604549713,-1.3258320165512112,1.3206204631893306,-0.13799077188580666,-0.43491804398112266,-1.1113690585436322,-0.14366988830652583,0.3949360782167348,2.0294050488032225,1.7748066599012515,-0.673291731427045,-0.0030174837447702885,0.018044648924842477,0.04549590381793678,Class1 -0.5806296307185104,2.2579869340073846,-0.33850971796650026,0.04465823739317974,-0.24198794241749322,-1.7599820519564824,-1.9272930810310778,-1.627800076022093,0.705851351436257,-0.41148557343526526,-1.3967828365027768,0.8069121025421218,-0.7601564265787601,0.07441894407384098,0.7670428201090544,Class2 -1.1884498760891387,2.893903013211972,0.39754849639088813,-0.4804006178615854,-1.1496669921647593,1.8370350817691687,0.9963172433215673,0.6896592872356918,0.5527186890135546,1.4717657368198744,-1.7514244700587833,-1.173153543635102,-0.5434627062641084,0.022549782413989305,0.12131268181838095,Class2 -0.9214094905629128,0.9298719386817648,-0.4365257419206708,1.2475021021434645,-1.4527283712551167,1.5592802931233367,0.14169741458233398,-0.3179671080220299,-1.1703306659097,0.26209731993926366,0.17036762591608887,-0.6003971971518482,-0.5098984655924141,0.8972452455200255,0.18857973022386432,Class1 --0.4226593244242776,-0.6546749880205636,-0.4049910749433853,-0.04450275440796963,-0.6848170392474134,0.10737208091601753,0.3971104822988042,-0.9791130977778931,-1.1762661532776244,-0.6294244039736027,0.474360353581511,0.3117233795907404,-0.7376258289441466,0.24513457622379065,0.7639195190276951,Class2 --1.461230781526457,0.851745297090515,0.6050023656053332,0.656635167535182,-0.8982297999787374,-1.3989257016518923,-0.4604628782480214,1.0088423611877504,-0.2657489201549954,-0.615576864653913,1.3437417433452243,0.04898586499075976,0.700097506865859,0.6495635197497904,0.4610231015831232,Class2 -3.1839159958991345,2.866913205939511,-0.14602097628948735,-0.5561549958802872,2.353414589777768,-1.0851826355267213,-0.5728455602653093,-1.0252072417002873,1.1204379897695027,0.28014250233423743,0.35120052278489255,-1.207348657465378,0.6192568154074252,0.7893622149713337,0.6104860280174762,Class2 --0.8405176074356909,0.044612234997185585,0.15612652414969055,1.721936597222115,1.10241485102708,0.33026170684133227,-0.286617815318154,-0.002673819312452735,-0.20082897054552798,0.7078612253043782,-0.5637477917196069,-0.9446101732939236,-0.41594299487769604,0.7675450018141419,0.8359191010240465,Class2 --2.3649481442962754,-0.9763061939225601,1.4127259295318912,0.5663304506212199,-0.3171634991887972,-1.1687163587605767,-0.8745851244623482,-0.07027476165466542,2.0399553237983263,-1.011327716014137,-0.2589140740867586,-0.34519640992956613,0.35253065126016736,0.6243738909251988,0.5418955257628113,Class2 -0.08287078600946096,-1.8355629217901894,1.0366234606526943,-0.5435340045552872,0.0672082827469414,0.43049035002155145,-0.7812046431937989,0.5000057166459192,0.7848933916295217,1.2511478418547566,-0.5718069173103032,1.811298285719852,-0.42985136806964874,0.23815409815870225,0.35260737291537225,Class2 -1.293244337078643,2.202260485586776,1.3092779603894888,-1.358585347216338,-0.49460380908093643,-0.47572191196126135,-0.4987401345905045,0.056509711137112166,0.3041185823843476,0.021728681408596875,-0.4120586442708536,0.7430772837659113,0.8857726994901896,0.9642662166152149,0.6048035977873951,Class2 -0.6842228645525072,0.09202235951372517,-0.031344801880213734,1.4323902966822282,0.043750676454997355,-0.07032982880077994,-0.699322323034624,-0.10666535154358893,-0.8830363865627207,0.8755583760417405,1.5471102332265234,-1.3654657091212161,-0.5535645522177219,0.3780389733146876,0.2217163334134966,Class2 --0.11866392337966458,0.0884963656006337,0.40944794010418084,1.1900667954946191,0.410963280660339,0.4990486150032716,-0.6614334642099626,0.8107583341747957,0.6123206294705786,-0.15149831790374782,0.9923363276805645,-1.359269468360839,0.863900616299361,0.6812727591022849,0.4293688256293535,Class2 --0.16723290478034897,0.6005077364660161,-0.6781931897866808,0.33356593773684673,-1.6877413314934249,0.6346613195183255,-0.9007574012679588,-0.809187767167643,1.5278164103630698,-0.21424633295453802,0.5049397558122798,0.12792257251477568,0.37202692637220025,0.0943426568992436,0.18454944505356252,Class2 -1.4294745451824127,0.831721393177576,-2.4673754477776684,-0.5342755275090847,-0.41447321492902334,-1.0631365682336111,-0.032439294059661644,-0.37583020495118696,1.2742248375178125,-1.8093221826344144,3.0352510859082296,-0.08841098346024398,0.18937817588448524,0.22710726503282785,0.8785998625680804,Class2 --2.6545899210581667,-2.3670361821875177,0.26366153303946344,0.7502746849542984,-1.074469431978526,-0.8441817553263238,0.4567124984698954,-0.8056769166994473,-2.0947314076707926,0.23209190234526683,0.10172173508977744,0.5616136727992267,0.7155450121499598,0.5307079458143562,0.7784276439342648,Class2 -2.8211258734851747,0.6987583786342806,0.853865961912677,2.2930842233970106,1.893688715081478,0.8551342587302219,0.8951789873939793,-0.3865549589469178,0.44271016324693124,0.1650739933567936,-0.32584182612343576,0.9847796275914884,-0.1314642303623259,0.025771674467250705,0.037781599909067154,Class2 -1.128473113614312,0.18226616989359018,-0.2292121612756854,-0.6634624364105183,0.8510197544547842,2.0020387515102667,1.070837125837899,2.055637534286974,0.06412851206292111,-1.1519275943985416,0.627068649910609,1.065747462448149,-0.7822792488150299,0.6037648348137736,0.5287889488972723,Class2 --1.3898259892263112,-0.2130389123111206,1.3025934434530606,0.6339762961497799,1.2864810578481902,-0.9667521355998627,-0.17238956658262877,0.13297917716268165,0.20707271544477687,0.8712904497761689,1.2526195498838433,-0.7450801181028169,-0.8055407237261534,0.4263644127640873,0.4045515905600041,Class2 -0.7717578367963368,0.8504549967666045,-0.3079790681591325,-0.5354203246602411,-1.597049715299388,-0.8489900622006537,-2.096707337254421,-0.32578797811557897,0.6298249237766185,0.051097639110587664,1.2442224115106644,0.4341276667547485,0.6738047883845866,0.8709556465037167,0.6766971356701106,Class2 --1.1307454246956201,0.2317784910290145,-0.9396918063969365,0.6405822663105174,-0.7199571199219348,1.4044067900567019,0.49214786857152604,0.7332019036044318,-1.2468878832025847,-0.21284277452254785,0.22427277361633724,-0.4069196703533122,0.0967324124649167,0.9194884363096207,0.7985730187501758,Class2 -0.9194249225087353,0.6998020848082726,-0.04498926491155066,-1.5581858088548732,-0.6374558983845334,0.3407805698598384,-1.1972613838166732,1.6825725171711545,-0.5980125300298589,1.9147620811191792,0.5323492918689102,-0.47808472354354903,0.13483369257301092,0.9313603814225644,0.8004707195796072,Class2 -0.8562789353518299,0.3758228851973917,-0.9704102448046218,0.2827808342470235,-0.2765054156300642,1.528829504313124,-1.065444257314231,2.7031901111203887,-0.142496416777699,-1.2313321950442093,0.4740611625555108,0.08070752518430216,0.8033615150488913,0.3301027831621468,0.9193095767404884,Class2 --1.6742714272060892,-1.749613468615427,0.12900308993020015,0.467410003103536,-0.6141953042473666,-1.845588360182228,0.27781654840859016,0.13079290060836896,1.7009244034071098,0.4061265033822903,-1.8081252981364218,0.3131943315462502,0.9425377757288516,0.6962020986247808,0.18868066417053342,Class2 -0.5898265179267999,3.481300458936264,0.885935481596954,0.3702021967604859,0.2642816458606955,0.5274226887814287,-1.3472074006234112,0.7498824064281738,1.0038165101054517,-1.4364713557915165,0.35677009432016443,3.08780836244923,0.33712099585682154,0.7478544469922781,0.9413222393486649,Class2 -0.9932132238606357,0.8027041736560525,-1.948143998489946,1.6161493745539648,-0.7887714603815364,0.008231341108318904,0.8779673281462217,2.0459106535913967,0.02799026635879687,-0.22595586267388917,0.492880531122886,1.8709940618452536,-0.599955749232322,0.2856680385302752,0.9032681726384908,Class2 -0.20324583784042013,0.6255978534210712,-0.06453388673408993,-2.9200040018500357,0.991350372352362,1.9503002296310759,1.1223611941925908,0.8623929397487416,-2.2364310108601733,0.4493868254798911,0.190449315923075,-1.3794344916300847,-0.5688167200423777,0.84146194672212,0.19872371712699533,Class2 --1.2824972870150695,-0.35746457134391285,0.6719882359157856,-1.1088787092659633,0.34437545032183364,0.4529725822371297,0.8162523804291003,0.832288370419624,-0.34869429507263155,-1.0183007688257444,-0.18643557666438076,-1.968971572183152,0.6062377253547311,0.5782882827334106,0.7140770866535604,Class2 --0.549354350701063,-0.3521430227919495,-1.4585105788571147,-0.29618040524723394,1.2565939529175016,1.7766223636201832,-0.7914361245521038,2.246965813892608,-0.17793402526867658,-0.9073454053158567,-0.34879518080039046,-0.9817683645595663,0.49339356273412704,0.9205076145008206,0.5937626734375954,Class2 -0.06385653317667295,1.0657019255749955,-1.3134697186080686,-0.8518454851677242,-0.8066611251179593,0.8375242134496642,-0.9500801529360938,-0.16416071914552993,-0.5610237225789363,0.23020652284619417,-0.47799153328922617,-0.31277293790335353,0.8709046789444983,0.8462132322601974,0.5307705206796527,Class2 --1.5665997035217267,-0.7305876397200879,0.6456484935143026,-0.704819902118906,0.02694275056388552,1.607584891587356,1.2846893059786593,-1.0464030213027682,-0.46380832255984455,0.6827401672257036,-0.232650326872209,-0.9304893175752699,0.8525503175333142,0.7515283874236047,0.6719401241280138,Class2 -0.29016964835256087,-1.315630224696942,0.6881088391098776,0.802690182430939,-0.16014206684399243,-0.21707786031163795,1.1590804905215875,-0.6838803091926114,-0.5308327058183686,-1.5469415401885551,-0.7585541225160368,-0.6670605546042774,-0.14289501309394836,0.549192005302757,0.7410502685233951,Class2 --0.9578555557625601,-1.227765815606037,0.40680842899535385,0.9641269226310951,-0.5230771551204312,-0.4719216903257049,-0.8156452002577478,0.28548129095816543,0.32763900908024957,-0.00937800425278977,1.9114419743238733,1.3204348168179338,-0.042291194666177034,0.9176070664543658,0.6377582279965281,Class2 -0.7244644968986063,-0.6922528623940625,-1.0183932861276161,-0.33119454409416343,1.376789912499504,0.947715973609672,-1.2382021129933003,0.7695443839881284,-0.5888639881224349,-1.1031331735100742,-0.12702094131277566,1.015847818136949,-0.8240005294792354,0.737206490477547,0.5115868030115962,Class2 -3.160559903579564,2.762968121095657,-1.0410403450668018,0.5883504514682776,-1.3132941339707223,0.14569233739003376,0.9455509615765822,-0.5439145525418709,-0.011809829341735798,0.15543384629017407,-0.6885745990999685,-0.1288058874447416,0.038164922036230564,0.7772197024896741,0.9755900073796511,Class2 -2.3554267024934084,-1.0481562420870665,0.4792204881352178,0.3241741523223021,1.0422925489960044,2.8004450303556063,-1.011148268133171,0.5451218228159415,0.27001314421470257,0.8605446643076965,0.1863311023231385,0.15432140496671554,0.28095200564712286,0.28070750017650425,0.06466015707701445,Class1 --1.3686513064278276,-1.9199805277345028,-0.9526334643605927,0.2549336486899774,-0.42294714542109285,-0.33680111975593024,-0.9494776785388676,0.4037816595850667,1.538560102511869,0.3221397550492673,-0.06887988186147291,-0.4985610678914709,0.03605218278244138,0.16127677843905985,0.7259680738206953,Class2 -1.8843746902192855,1.6641020483318316,-1.5190277971729782,1.2367698573854888,-0.13467535452347162,0.11267537060313253,0.5126014897978929,-0.5208153136794733,0.6099754369394167,-0.68350997247267,1.4925044038155497,-0.7101527752182768,0.2396276816725731,0.6504093294497579,0.7667790022678673,Class2 --2.05591021491753,0.16174044495626516,1.1047105522387402,2.1483082739623804,-0.493591626448445,0.22121817427788834,0.5997811019663475,0.049953129345705526,0.3463914629478533,1.0980588103739117,-1.2863628796619573,0.278504418244919,-0.5632350691594183,0.3995240933727473,0.8674560457002372,Class2 --0.5983226938738837,-1.9687142056647815,0.452824100666685,2.469887288959244,2.213316132392086,-0.5728271548441748,0.5541553040528627,-1.8691758850515647,-0.21507461824680177,-0.6281650814887568,0.4556282996595504,-0.1560765025671912,0.28098320960998535,0.9466745138633996,0.013781569432467222,Class2 --0.6088080684381318,-0.14258072215958978,-0.2471589158493158,0.376455578682956,1.1337460311425105,-2.0634349664008647,-0.14095719328368003,0.12063634306905684,0.24442045301746435,0.036809526272257524,0.17971299491810086,0.24124786234231999,0.5028346250765026,0.6619574797805399,0.5003537682350725,Class2 --0.8841536312708222,1.6827476005722803,-0.08080124813413003,-1.8719354245459543,-1.165648086120638,0.9072640612640497,-0.014588718938878949,-0.9523200843195548,1.1216736814532131,0.38201217396106874,0.5716627377483225,0.007959903242219407,0.35616292525082827,0.6472973627969623,0.2710936691146344,Class2 -1.8367937524765754,1.1305244343827812,1.7590748070720577,-1.1095943133543538,1.5901600201139026,0.9642699954583714,0.5553946506023875,0.06423114413307876,-0.15358003902173273,-0.680336832836049,0.7604106741926903,-0.0974616268628302,-0.3174692327156663,0.09602873702533543,0.6518949966412038,Class2 -4.13937796205562,2.81988512634737,-1.0814174710353106,0.8094997014299665,-1.422957646315557,0.24460006879885662,-0.15124080425250652,0.4417407247214615,0.1564958553561863,-1.5368687303115176,-0.40728867029878973,-1.225809769229111,0.1073217699304223,0.7163123735226691,0.5230120290070772,Class2 --2.3182858938756854,-0.9996315674051361,-0.4949622776941344,-0.5443246585005993,0.2827528313337582,0.47689704144719935,0.918472178797798,0.6294199264439267,1.0583520662002739,-0.9317194688311027,-1.0837737091612578,0.975491039827066,-0.27158820629119873,0.9348802538588643,0.9199973561335355,Class2 -0.5983231521280403,3.2143948588120406,-0.6820887411013025,-1.5455883770870682,0.4252527460775519,1.1559948069174892,0.7353962390022871,-0.9838785601264869,0.5945893258581813,0.02983554707237267,-0.839452527146444,0.8526050800270655,0.5645878640934825,0.5232489067129791,0.38607917982153594,Class2 --0.19440830116216845,-0.6032977402654393,1.121473197818153,-0.15960704409864393,-2.4268600094200896,0.9786358406196026,-1.4636126515189942,0.3137667203213872,0.8440532529047929,-0.42426637503611625,0.7530500385182968,0.09604096670653448,0.8005502205342054,0.8622612578328699,0.541708379983902,Class2 --0.9192487522244301,0.5953290028742568,0.3409798739069116,0.551027740333281,0.7637536297416697,-2.0030320614559707,0.004811029056051961,0.5387000639030527,0.7259559301847666,-1.5177778960681316,2.0320068421614357,1.1034685711077725,0.7702316837385297,0.6863417294807732,0.2962699963245541,Class2 --1.144788310926202,1.1414779135451603,-0.14593125926093115,0.3109119221273432,0.36205646784322176,-0.18847093319270955,-0.8471922726312778,1.3645555280458774,-0.8391400696093076,-1.1910780035282307,-1.3965440510757847,0.5583873634399559,0.9891335610300303,0.7883672036696225,0.1114681619219482,Class2 -0.5746972440853666,0.776125732775397,0.8361063651761311,-1.3263942322761988,0.19792511694033493,1.431230160424534,1.7227509907948617,-1.147010604300465,0.7822110783283871,-0.3093016862328251,0.35724303927504575,-0.04197745458868923,-0.4957021498121321,0.31839611614122987,0.366629408672452,Class2 --1.323375653555186,-0.5632841329163646,-0.2242397916354349,-0.8885162983629231,-0.1637590387769152,0.5302274390206193,0.4421972165996318,0.1737085323887904,-1.0985142008699043,-0.18933350088731696,-0.26221658885733934,-1.1319269913596677,0.9124924698844552,0.8477946671191603,0.14842246123589575,Class2 --0.6594846779298268,2.036532992875679,0.4228690530373286,-0.7593522422029917,0.434382663437157,-0.4402725023392082,-0.23986219573452647,-1.1778970916982392,0.5957918083619884,-1.3343099498487452,0.15253320228775336,-0.8173229970419699,-0.05730753904208541,0.14132982259616256,0.6217505398672074,Class2 -0.42956376873587154,0.40912385964239806,-0.1459058005590327,1.2562963095540416,0.9378071527598371,0.9973984903038916,-0.6988440330842868,-0.3453318150905414,-0.13944817395142756,-0.29603460431540796,0.27442971274027034,-0.03266401659791087,0.018532559275627136,0.09326960844919086,0.48865738045424223,Class2 -2.3845222206033734,1.2234759288350727,1.1051535319561892,-0.5880860487728764,0.6792937602690984,0.038204113097163525,-0.0786742062415126,-1.759568876845153,-0.5007783257623075,-0.7910612570323533,1.5515671977070715,3.3855354516420952,-0.7673504515551031,0.729742118390277,0.7827844151761383,Class2 --0.05211370987579478,0.0683401892808838,0.33620496884740364,0.37868240539394277,-1.1132435533999407,0.718983871311997,1.3551765328857757,-1.6146532109305232,2.220344017056206,1.6302324198679266,-0.4599439893182724,0.8881606075380469,-0.34858462726697326,0.09196231374517083,0.8073192944284528,Class2 -0.053276083674550626,0.12958719116099188,-0.17485833489621092,0.35434243418880274,0.20076359450649464,-0.8993353123868835,-0.9709458864223461,0.2756862513956336,-1.3584202472010625,0.7051158203905966,-1.9587952612503519,0.6637128621579934,-0.500233668833971,0.41656527761369944,0.838717334670946,Class2 --0.4852823132260621,-0.5407761694136913,0.3182697372934572,1.7620878981015868,-0.4655224108291931,-1.3276117304638881,0.15348181266350405,0.6223466612608268,-0.3299154309214354,1.1559797788455184,0.6939125129707601,0.15419370581580033,0.15632635029032826,0.19145602639764547,0.8311028170865029,Class2 --0.14396565206472378,-0.2638698746408644,0.6245852770822163,0.7171708497013952,0.9362553140873476,1.6453631719937154,-0.6525407764964998,-0.6911739248924484,-1.1696387202748084,1.030218434992226,-1.6795571274798224,1.1219104696858089,-0.15408224193379283,0.10995417111553252,0.7590049786958843,Class2 -0.5324258147862904,1.4578190268993199,0.08733911904411833,0.8124154748844357,-0.624167991928936,-1.0158024925313287,-0.4257162743756832,0.17533468802501687,-1.148582023071949,-0.6060925117157762,-0.7179732191844826,0.5963536019627353,-0.04962463537231088,0.13517223508097231,0.02888230886310339,Class2 -2.1958581170793314,1.885752802944392,-0.1549130754026568,-1.126328387778004,-1.0528390628503586,-0.5626305650021426,-0.563332183269894,0.19487890290141066,0.33771158865036344,-0.3918723732673181,0.783332864059641,0.49845032520762117,0.4862009845674038,0.899923033779487,0.8587279678322375,Class2 --0.42766608705594755,-1.5822834469417657,1.240567019894699,-0.6421134405733366,-0.24502651610548487,-1.8783172291271384,2.2181398021928445,0.5830224620262318,0.7937680910672328,-1.8962832564799061,0.1913454271861447,-0.22061998992203194,-0.4089716007001698,0.53777133859694,0.9155036401934922,Class2 --2.9464827498346304,-1.9682693095465609,1.0434034914637753,0.04173381206185673,0.7305826257512436,1.5425680012839433,0.9109917349218078,-1.7250653455239535,0.16851276528271275,0.18747375532805188,0.03562085727810108,0.23455492407432937,-0.2804936822503805,0.07833928032778203,0.9329936651047319,Class2 -1.1613344651082746,-0.018534971268292733,-0.5670905892529976,1.6029346747408986,0.3871151632033668,1.0098357738126345,-1.7636312975127189,-0.5927658308642215,-0.4866306604404619,-0.6205760163366645,-0.8956220300893182,-1.1784879390663796,0.44081939570605755,0.34649421530775726,0.9533622970338911,Class2 --1.2795023637138598,-0.746814420601717,0.0924116311155459,-0.39593089026098144,1.4216262550184626,-0.5067627570050786,-0.4607481372635546,-2.037873132348939,-0.9151446069770323,-0.7827081449691674,1.1476782583108645,0.15944675204633202,0.7370856804773211,0.6821855320595205,0.8485781750641763,Class2 --1.6057015826808647,0.6517032826586009,1.5118738542420902,-0.41424010155234714,-1.3315614647094602,0.6288612436241917,1.0447745283771428,0.2823454468039889,1.142425041484824,-0.5608223533744049,0.6426388549691123,-0.06938382361600437,-0.700656761880964,0.7561244941316545,0.9194769011810422,Class2 -1.8684210116399194,0.41517678646863676,1.5793294339514328,1.3520161661831192,-0.5747394183200358,0.30594772537088594,-1.3417136109533276,0.3655022173688411,-0.4088363948432239,1.0822296080547729,1.3235568053722069,-1.0996720693576183,-0.2187293004244566,0.8478550019208342,0.42656461405567825,Class2 -0.7451661953752391,-1.044627938392036,2.147618537790058,-0.4636728281694742,0.27579922778359806,1.2872480942153723,0.617095315261169,0.6624243890120224,0.9424884220921788,1.2398059783263322,1.5389035738359185,0.8741466209580596,-0.4949814537540078,0.9993806348647922,0.34985760576091707,Class1 --0.812088228332471,2.306579199435907,0.3719209071881556,1.2341077552150586,-0.034726655251190626,-0.9893720720023114,-0.4590136909396361,0.07179521708122445,1.8524071498837051,1.9083360487895877,-0.44228893098421335,-1.130347012701946,-0.22966689756140113,0.03163836966268718,0.005074945744127035,Class2 -0.40255170638054577,-0.5411094752854282,-0.3011395611322164,0.7548036915551737,0.38910826239867197,1.422393045320467,0.40587664539049234,1.0739612551727231,-2.09558012132419,-0.40951457851656725,-0.03603807552958929,-0.0031686971081691896,-0.8517310973256826,0.6963139940053225,0.3552886643446982,Class2 -1.001527448238151,-0.5871421327010391,0.7361233647606854,0.11575627085331266,0.3072939504133268,-0.06104714080530985,-0.47252189798420735,0.8044142284122028,0.01059157671476336,0.0649592852279908,-0.22842142093229467,0.08002518182945978,0.37596630910411477,0.9597704666666687,0.21260478836484253,Class2 -1.5340325360207132,1.1099472291424075,0.37152428509795876,-1.234448195366819,0.9946348984616914,1.0899938236290438,0.12406292780396046,-0.24503738150556903,0.493643589196552,0.4086535214109849,1.0614453885903565,0.7589307938132303,-0.20970032084733248,0.8113808687776327,0.25159568921662867,Class2 --0.4829723389308195,0.9368008114789775,0.30428497614097394,0.05978878543169554,-0.058875689016234106,-1.506436884913659,-0.5691042160679707,-0.3056338310271463,0.35481062174173716,0.2129405685103631,1.349855037004524,-0.2523899720617782,0.21201931545510888,0.4941742036025971,0.40727419033646584,Class2 -0.25416988874354374,1.126407992310174,1.4361048681556015,-0.47916511873168105,0.8763237585675371,0.6152435277720899,0.937447443443214,-0.1941163087608145,0.8621013208289336,-0.24337704022464846,-0.30787445994300905,-0.8083524880671208,-0.9308091588318348,0.07699156343005598,0.10246456298045814,Class2 --3.1636402114274866,-1.6840123144776578,0.011018809509640395,-1.3370236624976826,-0.8679521806406643,1.3615243779620287,-0.7814968187340842,0.0548958279911922,-0.6508691416063156,0.06956176085093561,0.5996342710001007,0.09231132713667264,0.7672256296500564,0.598036544630304,0.9528287074062973,Class2 --1.4138473886699041,-3.6553504225648155,0.8011872924309319,-0.7106530147763452,-1.987493697897529,0.0323953039659615,-1.8905838930201593,-0.5304089936464147,-1.3784201078427687,-1.211040579449877,0.10388983095072618,0.6058563751178634,-0.8376459274441004,0.8532007150352001,0.7817199286073446,Class2 -0.004495928356654591,0.11026870760398573,0.8017736887626586,-2.1258601088150417,-0.442161339697442,-0.6818170692618001,-0.14334393868430523,0.9375176666209111,-0.6637579937304425,1.2109436088064498,0.2747758033701797,-0.23457513911573571,0.5853237146511674,0.7270907261408865,0.6828216628637165,Class2 -0.004569487329909465,0.0737575993148632,1.207350263208532,-1.227271444157788,0.3547854563604433,-0.11763207376424299,-0.010129044756361446,0.851286943532229,0.9069771333103078,0.4232979519741328,1.781502974074895,1.342060669122777,0.26731182262301445,0.5916475909762084,0.31623677373863757,Class2 --1.5449771719432748,-1.6406746718763052,-1.118426361352921,0.2686865710020185,-0.3027796258042448,-0.5707600431923558,0.7710549509715294,0.018622604023705507,0.34371863051708207,0.012014568902102106,0.59428833625426,-0.04572677392598422,-0.4361626789905131,0.5253282631747425,0.31923021632246673,Class2 --0.4339821939532035,0.25098264582167595,-2.6987313348151227,0.7082026274874054,-0.5273934727331331,0.057972290959251445,1.2537773100012402,-0.6853478403642133,0.9418200366181126,1.1289869282498755,0.5807155559049272,1.9651597142640918,-0.5875519877299666,0.05663997493684292,0.8918784742709249,Class2 -0.18442115219803618,0.37070512310271747,-0.0920483912509693,-0.45826402123495785,2.598656127601382,0.8321213139585295,0.4073739226288319,-0.879019823013582,1.8197058213140662,-0.16074258338814168,-0.4115216281387358,-0.9540253576323308,-0.9402708522975445,0.7374773288611323,0.2047640907112509,Class2 -0.1527721849877186,-3.4201105257744664,-0.8168766820022372,0.8332956104178005,-0.4112792806235934,-0.621523627481532,-0.2907634875435568,-0.30714709472570784,0.054142349297215765,0.8193183277539265,0.5290461351187842,-0.25759271832164155,-0.26744937477633357,0.6694544558413327,0.7616723186802119,Class1 --3.1243602098861807,-2.1783511479925988,0.9833429252675697,-1.2161017878621578,-1.0724783123674408,0.08839859091178369,0.1482034134631059,-0.2887758619011152,0.9871874542737976,-0.7152232136069565,1.9197801696743224,-0.08435405591811969,0.02732840459793806,0.8596496221143752,0.6545074612367898,Class2 -0.45606633581343214,0.5085712068481404,-0.455285734931381,-1.111266077376272,0.4633643202224363,-0.6164946454968339,-1.00415347483681,-1.2310756226496367,-0.7303143649568188,-0.7387873146776137,-1.924032890282426,0.9419318342084079,-0.6669106050394475,0.0921169517096132,0.18233159626834095,Class2 --0.3573847547552076,-2.009264758811609,1.7346486621456025,0.9755371229250299,-1.1851362992812373,-1.3770101342223902,-1.43886986887599,-0.7654766663934692,-3.2175761932947275,-1.2225627215314272,0.38393972113204783,-0.28386761527572574,-0.8665327611379325,0.08518936112523079,0.4517408574465662,Class1 --1.6253022028109254,-0.1671422713345203,0.6079788280955642,1.5493093364579225,0.784944854160892,0.35251139321261776,-1.2476238272008942,1.3313174248797546,-0.12619051595886768,0.37917345842261646,-0.7951605428164076,2.20213789421085,-0.19234761921688914,0.007707660319283605,0.5508938345592469,Class2 --1.1674959503970888,-0.6775681337081204,0.5274465257589517,1.7468119948493832,-1.207666653463924,-0.9344133297808225,0.09208432290113816,-1.3789274047186229,1.6252166964568056,-1.5022880159413228,-0.9820098379294014,-0.7625485636799705,-0.6223830129019916,0.6776013777125627,0.8879733870271593,Class2 --0.4897745985014124,-1.1641202467506064,-0.15032323796680763,-1.021018362951766,0.5387262416242695,0.35454307987399114,0.6074007810943035,-1.1281971487899887,0.3446966710699836,-0.2385177814266565,1.0992189822708165,2.084046633468637,-0.28437460446730256,0.9963070775847882,0.06780681107193232,Class2 --0.7567974279943249,-0.565549430203646,0.3145570172684985,0.4125336621689053,1.3386381124975484,-1.756812459984965,-2.501504461046595,0.4657949355664656,0.5169431096688804,-0.021667813357596516,0.7448027123691399,-0.46812335133516775,0.6008093999698758,0.2912717037834227,0.8216193113476038,Class2 -0.7482068629490478,1.0719665096896134,0.9823139538074664,0.919669398413333,0.44006193495695956,-2.1003864046696186,-1.0191517886980237,0.44803899298184935,0.5818857285727932,1.3851927904108599,-1.3587396185137586,0.300112238452014,0.08570608869194984,0.23015519534237683,0.8741147750988603,Class2 -1.327001070561551,1.4570012624642703,0.933919674281423,-2.4425785995752096,1.2723313346531104,0.8118135914348475,0.20609441917499727,-1.0439935199844024,2.201015065256916,-0.5356731200978241,-1.5031404607116585,1.17423293852154,-0.10474088275805116,0.4902721527032554,0.44518273370340466,Class2 -2.157673226051335,3.338773394016533,-2.8992207042423,0.4700397420979905,1.0426621078021308,-0.7290188267235514,0.7342270690132211,1.3557521808814639,-1.867383146970368,-0.05172516835080512,-1.2905695432090374,-0.4209099436398561,-0.9453203422017395,0.39932344015687704,0.7380742388777435,Class2 -0.9106266282826789,1.343067497887393,0.4879603436688567,0.9095411558478227,0.15416489576874146,-0.16548483692262658,0.7449102744576261,-0.7938048686006218,-1.071521015444106,-0.4813678692865114,1.2713157732618938,0.5925256558804751,0.7612790735438466,0.4161036675795913,0.4697798127308488,Class2 --1.1214941379706405,-0.22099616258320143,1.158239433881981,-0.9433238244174601,1.6354802700376547,1.7227781277378251,0.1723402430075664,1.0616467145124528,-1.16027667471417,0.8353937868143421,-0.9579575964192446,-0.951732159002375,-0.0029884804971516132,0.041444466449320316,0.9083397802896798,Class2 --2.083124769899343,-1.8202249642354478,-0.503815098633265,-0.16071817958989842,1.1701484665552955,0.08335344523505513,-1.187473560984416,1.0315071984997939,2.1032147785959876,0.5542369066344268,-1.6988786349287184,0.5537735378337042,-0.21211720537394285,0.7849767170846462,0.06286792550235987,Class2 --2.200459449200899,-0.4910257195060541,0.20493075600998642,-1.0359873497444354,-0.0550174454973381,-1.4131553663295344,-1.3286886709340935,-0.859667072824217,-0.2524126359878581,0.6209253003732388,2.927170602848353,-0.30042571001035434,0.8410697733052075,0.7225585340056568,0.24565404606983066,Class2 --0.892078539314252,-0.7784425661548074,1.9358593490736622,1.5734288383058914,0.18583777501566076,0.9830685079239553,2.34677735734849,0.9601264978246334,-0.055641040966834136,1.7827721314169014,-0.02085060495705877,1.7128641543580374,0.30326252710074186,0.6893562055192888,0.7856341025326401,Class2 --2.219525907071909,-0.8146625921235949,0.7102779762400974,0.8839593221016586,-2.207128626805534,1.8496499481047748,0.8648861784168674,0.19489662695123056,2.06698967703238,0.405674120580388,1.2366248799907482,0.7024025544197822,0.27982966788113117,0.7898354511708021,0.7067501766141504,Class2 --0.5353274423768066,-3.731012884129445,-0.5158464386022105,-0.7659929607189278,1.0017349423889472,-0.5850865947817013,-0.1804292369517074,-0.7479042895996602,0.6714148383758003,-0.5210868202074895,-0.2139093833061785,0.37941313900426527,-0.009122304618358612,0.3516147108748555,0.9206082746386528,Class2 --1.095023155576663,-1.9675709099185728,2.056136979877679,1.2514210243360688,1.3653380362511285,0.2490286903102468,0.34944806794952576,-1.0825292802491684,-0.6664174089782822,0.040215438328846886,1.365142050492793,0.818601506204284,-0.8911797613836825,0.7326610656455159,0.11113745463080704,Class2 --3.0380874558629136,-1.8304682334393174,0.9623139267413822,-0.692761241902667,-0.9395765273810766,0.62097674400517,-1.266040482150409,-0.01872403457988219,-1.168934547668809,-0.38892052566834706,-0.1992901597588184,-0.9774082351039464,-0.2611840288154781,0.3634063918143511,0.21148818987421691,Class2 -2.515670627308411,3.6075401770814017,1.881811613978287,-0.744900452819399,-1.2032059898192875,1.154122595600569,-1.617578691900675,0.8857291018505566,1.3627686178695548,0.14168348552590826,0.5823740772876151,-0.8870129282146363,0.5258220625109971,0.27915793750435114,0.30889240838587284,Class2 -1.8617582501055376,2.1007300152733825,2.189308700586417,-1.0098573495862078,0.4710865818899476,0.4223683949051558,-0.3157005833396033,1.2046733989341352,0.6821857390139092,1.0017035820174456,-1.5974004875348375,-0.6082844091411832,0.24841906782239676,0.8871237884741277,0.6662138069514185,Class2 -1.472131800294581,1.5831737749455899,1.0467490000658204,1.1822574307034033,-0.403075244995331,-0.885794371392961,-0.9329076555444004,0.1978059301712735,-0.655141027388112,1.0185331456340703,-1.0153098504143663,-0.36711552275752596,-0.5932887434028089,0.41482604504562914,0.6406665723770857,Class2 -1.6946510851327248,0.30322394834639643,-0.5747733504670623,0.3013926457381316,-0.8115450203197128,0.5960045777884931,-0.5699934882140621,-1.0554771284327378,-1.4425579610941115,-1.4852218839242268,0.3348514053693863,0.4204094023850344,0.21204788517206907,0.3011096359696239,0.7019954044371843,Class2 -0.5199871951289311,0.41596864610415973,-0.28147373793216773,0.7748919182092555,0.32228613920237065,-0.059028956670002596,0.6815714761838536,0.8776203574344501,0.2864360936524159,-0.45071174927878455,0.6683156991367625,-1.0996178662599037,-0.30718786967918277,0.008778085932135582,0.13064812100492418,Class2 -0.7488207770005457,-0.8921559099882277,0.502896585316946,0.3527884758847615,0.628042539648205,-0.9685932349943477,-0.7721910940092909,-0.5935064979631065,0.26087313768386644,1.8081621391252627,-0.31162423729062055,-0.2569474234530053,-0.42645951779559255,0.12386105349287391,0.46846285625360906,Class2 --0.4320763972742534,-1.4325566417142872,0.47308694217611014,0.6359086548075046,1.1283095569606905,-1.2132517382574255,-0.020609843922948384,0.7500053505693846,1.6442018098224767,-1.1441693370782704,-0.1374842969305766,0.675454756554721,-0.8207358219660819,0.5457771650981158,0.31474803062155843,Class2 --0.9650003217543182,-0.8790378313175731,-0.996518575514587,0.2891567388688704,0.8711756800889916,-0.5795383230320326,0.44607672586759756,-0.5360652184197873,-0.3039585777336844,-0.08876286127577655,0.7185136988159284,0.2386334370689555,-0.7021291898563504,0.46542866877280176,0.7329977420158684,Class2 -0.9065596312862716,1.0576931706834418,1.432874682788882,0.9780486664099282,-0.60408808796389,1.0279926314200658,-0.2317927036133373,-2.5963716681450415,1.5226404732064798,-0.06695129603028607,0.014194051107462581,-0.2836631992907258,0.8277198947034776,0.7573331585153937,0.9560166306328028,Class2 --0.6471241821582285,-0.1764120920901952,-0.07641218603918543,1.007617429472091,-0.6910675648525587,-0.1265647077248688,0.46333094818187254,0.7242485568114643,0.37430111129843807,1.4161795027887614,1.6636977092607168,-0.10276434707905162,-0.15644056955352426,0.6670893209520727,0.35886809532530606,Class2 --1.324294741325407,0.5595067454103378,0.38760911979279944,0.40899528308384114,-0.09267078810896696,-0.42060374074219375,-0.6482439616144146,-1.535573069508683,0.5291695941542603,-1.4621451495593325,-0.27371680779996577,0.4739742515929233,-0.6861946862190962,0.8998114843852818,0.7669600120279938,Class2 -1.7352812927383123,0.8437225731364125,1.1834185175647678,1.265015458582357,-1.1235574239104447,-0.991767692869815,-1.839245332098297,0.6978508931890447,1.1892415871277608,-1.2692154463896979,-0.019018641107854263,-0.04332943274334894,-0.57306521339342,0.7861379706300795,0.32828978495672345,Class2 -0.4065149465148302,0.8759465298375523,-0.8493322269955192,0.7515289625707607,-0.4439405256140762,-0.7147623129043168,0.5726691744141077,-1.6669037545880634,-1.2433345181821882,-1.3101866223873968,-2.1782974633168215,-0.0644041017316041,0.6468838113360107,0.78570825769566,0.8591523969080299,Class2 --0.46634382952386627,-0.33092810417385626,-2.0182110170125216,1.5508075124597591,-0.3743925003313693,-0.0782309117380584,-0.015779919655861663,-1.746290261042623,2.241405978493983,0.33316805331306454,2.0604756511630877,2.767743569365758,-0.1494617834687233,0.9920147475786507,0.7027562486473471,Class2 --1.6276437369385424,-1.5968237237283114,0.40417206846483544,-0.787295220533879,-0.2542697450252014,0.9823924113644243,-1.6386210248930064,-1.2967480843984052,1.1186357556997386,0.3200391439269476,-0.7242199720121835,1.506987962100903,-0.1671468517743051,0.8790190070867538,0.2624721461907029,Class2 --3.9092484769165567,-2.150745158534681,1.1873012757666357,0.48686367190979707,-0.7045340021421445,0.9609705646504637,-0.06652871257854744,0.5751477166098511,-0.6727614893902092,1.1968527456984945,-0.4315135819564212,-0.542540818166839,0.4093896094709635,0.2760503152385354,0.41116597480140626,Class2 -0.8701905290800898,1.2475060010871084,0.007031998410175376,-0.7097328670841442,-0.9384589546131878,0.016963455972111314,0.6068929428131926,0.7043855732956528,0.5374108896607133,-0.18391116085644144,1.1726183135083637,0.31950586631596933,-0.39962111599743366,0.5707078045234084,0.48248236859217286,Class2 --0.9333919159025977,-1.3381604754652234,0.9682645427690064,1.1423969653916681,-0.31004321377943117,-0.09762895097031002,0.5023080223565748,1.156389279528015,-0.162651451476402,0.3456312269017898,-0.5825172208202345,0.9388098780502101,0.2665976812131703,0.923099948791787,0.35381473135203123,Class2 -0.7508993663415128,-0.2687461241273187,0.36149003628204945,0.9256562229399591,-0.09615341328253146,0.35637157541371295,-0.18169775773236896,1.3816365479097121,-1.033409909800996,-0.5506859399509626,-0.7133025746011782,1.369113584585147,0.06002038810402155,0.36654901481233537,0.16026092413812876,Class1 -0.7849478849422469,0.20762829797505772,-1.25664188126335,0.5549002812362467,0.5958969179864897,-1.8926491225902693,0.25974775889251023,0.7449332810696292,1.2202955634005574,0.20795495554920493,0.785498129489602,-0.718435756201861,-0.04151416756212711,0.3684883357491344,0.08679624670185149,Class2 -0.6153091243684395,0.8994046530582432,-0.715263714441727,-2.609649978084208,-0.7964242051571155,0.7230737758953866,-0.01224154334044164,-1.9705818549217016,-1.942419126925711,-1.024106002973682,0.19500409762851692,0.1941894419221603,0.37863827124238014,0.3325443190988153,0.5786092209164053,Class2 -2.784487800061511,1.1905893243198293,0.020687456443684464,1.0641910763600892,0.5447195542170759,2.209350607196068,-0.05028757995413096,0.5911729561961997,0.6145741727573438,0.8464794795337623,0.6196772106555551,0.4776520910505126,0.9029648499563336,0.9435502921696752,0.5100184471812099,Class2 -0.5073861943503611,-1.693038869503074,1.169806214073564,-0.008065660619815332,0.20696356944123298,-1.1275895603391795,0.2875417035290026,-1.4660756417952383,-1.724869937739888,-0.7039895104142411,1.0369750790534844,-0.3979677772873403,0.07670979853719473,0.7433138189371675,0.31329415808431804,Class2 --1.625024941554882,-2.1989899808166355,-0.056458586103580684,2.0563726427930646,0.36580240383668877,1.2340080969633298,0.5956775261124732,-0.9889044385113938,-0.15771334419014882,-0.6237119383345755,-0.07246318136054772,-0.17873205257523214,-0.4794601798057556,0.7406028255354613,0.6556872487999499,Class2 --0.3319249702436203,0.4977140073125584,0.09163709295241294,-0.47278496820279836,-0.36000983178351414,-0.8336014182557642,-0.5765767068831165,-3.017059551914307,0.3216362017586496,-0.8450209983963349,1.745595280130064,0.13595177411330012,-0.4253604570403695,0.3138745797332376,0.7439082765486091,Class2 -0.12035682750710497,-0.6848855244742005,2.1284268090725678,0.8409178057167056,-0.3715721316732598,-1.2468406175450908,1.6948314045219202,0.13808327120192956,-1.2873706952805875,-1.4412196339690528,0.901646308473608,-0.6120470671483309,-0.9664377914741635,0.08713738108053803,0.4784704374615103,Class2 --1.3240645292333841,-2.1206549790789437,1.2069595776580562,0.06519575894731754,-0.6869486231262363,-0.8671824122465517,0.11308882527619067,0.47356413122916646,-0.09578501350643066,-0.73320168031091,-3.0906739197172572,-0.27240525658418047,-0.7461532582528889,0.5552875618450344,0.8711232447531074,Class2 -0.09742884210042124,2.4908736129312,0.8115794499175084,1.0202997621320997,1.5725442773825398,-0.9977165531737809,1.8348357862062579,0.44158381159531224,0.19688400894949562,-0.5841909564374691,-2.0282844677471408,1.465909003697259,-0.7907551401294768,0.07046411046758294,0.3549668053165078,Class2 --1.1910276214178288,-0.21438113500649192,-0.08670148458553989,1.8302112384780627,0.28233121581219284,-0.7401723716963483,-1.0387107472282924,0.31203020988703595,-0.9564506963933397,-1.030093943437664,-0.9938946505114842,0.2117784653134262,-0.30406307661905885,0.2885769666172564,0.7516640131361783,Class2 --1.0928042774706552,-2.28340948747005,0.9043779496445907,1.5032139530336392,-0.8451364609832909,0.940326845234286,-0.36852356050803187,-1.5893228127689367,0.9175189773041912,1.8411272292233116,-2.6558648075626268,1.106536095930789,-0.9029944106005132,0.7655651117675006,0.29794586705975235,Class2 --2.775353731566096,0.15425031414868906,-2.3806678969659556,0.9673093115261547,-0.190828418087814,0.5539317558229586,1.2932398076107259,-0.1450424867069617,-0.9252398491444815,-0.32504672740910456,-1.3532494566906241,-0.4877006369298748,-0.25809214916080236,0.4311350400093943,0.41580685577355325,Class2 --1.1329923512084037,-1.2585467773465788,1.2460220081686082,1.0133042589382595,2.07322942388871,-0.10504117653050862,-0.1492221906645565,-1.0531580794096163,0.6873032027373319,-1.3217971239034343,-1.0308752729059136,0.7535982574072626,0.3084249566309154,0.8900801094714552,0.5246040171477944,Class2 -0.6664935597919068,0.40511314155351746,-1.7774244997161817,1.6138695817625814,-1.4288120438881793,0.44159576425738667,-0.6164301059588472,0.3824516627412471,-0.5082923052088625,-0.9724641082920353,0.18313110690084927,-0.764220616466033,-0.13739845156669617,0.5167306936345994,0.5089601054787636,Class2 --1.1302601241930001,-1.7153293139157815,-0.4706330612519237,0.5447888773139204,-0.42380249773157636,0.6658191638694638,0.1055636367055079,-0.059214240422194045,0.28569416620322274,-0.8266576382206473,0.6034145615751284,-0.1359883540160726,-0.5248923883773386,0.5539619228802621,0.3976905101444572,Class2 -0.6954729081597092,-0.0016395136330777937,0.003523170424939973,-0.11219964793881333,0.9309853844650894,0.18858468063629616,-0.7932759735225249,-1.1612097484170936,-0.1427038917431821,0.2460611679717631,1.4831740599575791,0.6843574246685725,-0.44121828116476536,0.21680187433958054,0.4917402360588312,Class2 --1.8585631323056808,-0.7094705840661009,-0.4163449666848352,-0.9867028380778308,0.9306207070660986,0.750106177208018,0.17909737980522528,-0.9586505690960224,2.0553386217204066,0.13522575068207346,0.0992494481635028,-0.32754577200788104,0.23993566446006298,0.29298463510349393,0.13447862025350332,Class2 -0.25575991521855357,0.42488939252904856,-0.9768290892857864,-0.9532158620449076,-1.4459455991374144,1.2427488473534127,0.18748912291211442,-0.613734095410743,0.6987320486911086,1.0891069791296366,-1.3635919943163382,1.0156764001665155,-0.06432025041431189,0.7362717103678733,0.30145410750992596,Class2 --1.1833172936094734,0.9052186661879014,0.779445129499673,-0.10010326213416744,-0.24098201121157253,0.10844751149529486,-1.0637636483426451,1.1375511186231837,1.8670668655867424,-0.8357773856701748,0.13604704451099675,-3.0940953903794015,0.7835827404633164,0.7994293603114784,0.347275652224198,Class2 -1.330464211547017,0.5125811973750303,-0.1178619665143232,-0.17143632256219138,-1.055211062190814,-0.17576532160041866,-0.4260661313170476,1.522172727883415,0.7523464995195207,0.8693209725684405,1.641872376218064,-0.12108973075662638,0.6342682121321559,0.39393315953202546,0.16462195408530533,Class2 --1.4889091108154433,-0.8732895744495794,0.4276616548302541,-2.47922803009021,0.36579155563385485,-1.2422715174478143,0.27010623032308073,-1.1955760717624102,-2.037455523619197,0.49046717251011446,-0.4538193722862825,-1.9367076172763649,0.2048906059935689,0.6294078163336962,0.06371247000060976,Class2 --0.3507752285103134,-0.28572242794931946,-1.2927486413216018,-0.636790573822923,0.5401805631423138,0.6315105532619869,-0.41275778092556087,-0.050840344044004254,1.460528687324303,-0.6073437044187148,-0.44443954378414546,0.34027340395260264,0.9706773404031992,0.6804795148782432,0.8510817580390722,Class2 --1.2207182016855236,-0.044544546823913844,2.216618727670605,-1.703629243089298,-0.38233413172847214,-0.2373062452710242,0.3813488541381978,-0.14313255006190953,-2.6352383476714323,0.8069170176225046,-1.357438295154354,1.2899307671174995,-0.9104562667198479,0.5195725236553699,0.24892105371691287,Class2 -0.7005213674458669,0.3050016298684174,-0.1891630175401049,0.02669782735150223,0.07751617750057109,0.9271305622337032,-0.6219601733198264,0.313732051407056,0.4553396599664341,-2.674371436918312,0.6684695201935831,-0.8494323283411078,-0.5813374663703144,0.5702152433805168,0.12990673980675638,Class2 -2.7779060839241208,2.246155906662233,0.5740171963946289,1.0066308462984175,-0.5530256078774186,0.5311455221958421,1.5090413970965295,-0.40468590277686795,-0.8942777327268259,1.1128900758088138,-0.005885542881988457,-0.8530796352629979,-0.16860040556639433,0.9808971064630896,0.38977788877673447,Class2 --0.7879884335777414,-1.9932541658286806,-0.5784877731752629,0.2539642785293255,0.004859337448564493,0.08525083781435398,0.2243225399071778,0.2956782499958853,1.8670031034788632,-0.369778448018767,0.9616600950645212,0.3904688348492565,-0.938039876986295,0.6172430855222046,0.08459521597251296,Class2 --1.376630637573132,-1.1762428969878926,-0.1770305187707167,-1.0261013883838745,-0.3394021314175925,-0.31626071578240417,0.44112236937541727,1.2194835292087347,1.0249641343390896,0.21918428213397267,-0.3552328656441407,0.2811314089795665,-0.703721437137574,0.7251312611624599,0.3618157668970525,Class2 -0.31027905505200115,-0.2987157047920852,-0.10741784904477653,0.09718981513669597,-0.07637937789746016,-0.6484949115876675,0.31746563777596015,1.0563191815400221,-0.406746613733551,0.1552439040727002,-2.4755933151346565,0.7023339788886644,0.9851709450595081,0.00427358690649271,0.4669258608482778,Class2 --1.9242160867662967,-0.7603817449531759,0.34497361965970447,0.8005454909703521,-1.5454278416596818,-0.9072031264961419,0.5659422248076843,-0.4444125907809768,-0.5036916537919875,0.805866910203154,0.19105976757458074,-1.2268369797354817,-0.46869808761402965,0.6819956726394594,0.20338932028971612,Class2 -0.46888697114324185,-0.49109847220567726,0.8931399050199686,-1.985300394680645,-0.7479740978616988,1.4061811269227515,1.2128237607212395,0.4099866977435109,-0.8455837556073458,-1.176504069834144,-2.5809744792345843,-0.5618655235001873,0.14772805478423834,0.5742659156676382,0.3480044063180685,Class2 -0.1986463313659872,2.7744549539817562,-2.17392883341993,-1.0202856735128552,-0.5645483592934704,-1.5358691368783888,-1.344226745508882,1.3947393928236456,1.9785168747999056,0.11722173630622403,-1.0113167528998048,0.5272822645901244,-0.16967317275702953,0.2939615936484188,0.14993080752901733,Class2 -1.4070309448598137,-0.2762636860874642,-1.0143454993158125,-0.04237991745523108,1.0761893147739539,-0.7562672665745969,-0.4470490521910285,-0.2382214683282217,-1.2509245545431638,0.7429002856950636,0.02432273808547231,-1.4406590849648333,-0.06578394072130322,0.6106849140487611,0.14025260880589485,Class2 --0.8141383857608383,-0.8364228355477016,-0.6244675633244691,-1.2701241641529535,0.26262577280272575,0.25188104175056336,-1.8852040117473496,0.23904070178003367,-0.5401221634251149,0.2501824768932904,-0.3368082537003253,0.5815198600509999,0.2900302647612989,0.6516861864365637,0.5031043265480548,Class2 -0.23104015094051195,-0.5965244122430524,-0.7699210429605041,-0.45415002731198467,0.6808076902042095,-0.39500861039856766,-0.4066441981701056,0.16978922408474634,-0.592569725857509,-2.3202538421302186,0.8005150945147851,0.3154963058394283,0.5416127261705697,0.21976187103427947,0.46546324086375535,Class2 -0.29019647495790524,0.82035070103541,-0.8755489819590478,0.12840192935245884,0.5410315963371808,0.6745523758069414,1.3125815012211608,-1.836851358139218,-0.96212006714167575,0.8260626649566424,-0.3710038619964809,-0.6872314751299077,-0.596862870734185,0.5240733218379319,0.9955115236807615,Class2 --1.8088129274203064,-0.7236230330891676,-2.0019847529797374,1.0410316294754272,-0.5624173468697887,-2.966286928188315,-0.5452600056976257,0.6408239193630465,0.7093530322797609,0.6320376861299339,-0.25480528267203256,0.05628596444687426,-0.8073891354724765,0.4681490615475923,0.7704791168216616,Class2 --1.5842592312354955,-0.4306690072832122,-0.012830107535755772,0.8533010577342731,1.3546273536289903,0.8029392966048933,-0.2650204589913463,0.1693688467936856,-0.9952931573007499,-0.04414013803287955,0.3511936559914578,0.8875134170899175,0.6714692902751267,0.388971395092085,0.06028518686071038,Class2 --1.2385556780611566,-1.234517451329897,0.18281139514328676,-0.6621293182889614,-0.4460566419308639,0.9520452283485695,-1.5282365762846772,-0.024398236369345416,-0.2013090076928616,-2.1684210596248623,-0.47233400543224147,-0.6933210335480637,-0.1137855458073318,0.7159566774498671,0.41065939562395215,Class2 -3.0188131099876556,2.382007601261677,0.4379124243568081,-1.8332712439484684,-0.550220564572816,0.050828730477325085,-1.4250423059503396,-1.5972020866816963,0.3382892075307388,0.08585745582235108,0.5551193664128504,0.4576637836667117,0.472260728944093,0.1506310231052339,0.7359269422013313,Class2 --0.5637251149171127,-1.7567515862916683,0.47201771672302706,-0.16411490536437814,-0.4252221671871581,-0.5550585221314724,-0.621885206282897,0.7474016017721317,0.6386836271251025,1.4122200443355595,0.24428578303394644,-1.3624191763871567,-0.5938614597544074,0.025789510225877166,0.3634784109890461,Class2 -1.991900319562314,1.84463609278501,-1.2868072219423998,-0.9531948392806223,1.6266191580236726,1.3970621486025865,-0.38350960631794473,-0.9933493014725966,-0.7115315408589102,-0.2543236948509189,-2.4890526375652655,0.15236970840829525,-0.5736349849030375,0.4938412131741643,0.9922965033911169,Class2 --1.5034606086130178,-1.7797857644068302,1.155229247608011,0.5981807769294918,-0.005214643861067807,0.6534136989876816,0.8616730135405686,0.409159645571537,-1.536143144282265,-0.38352421916166884,0.6908723170485233,0.2031068269677685,-0.7493770392611623,0.5738847125321627,0.36629988555796444,Class2 -0.49255487791897823,1.0445689656715649,-0.48551286422970363,0.6787022578532131,-0.6145381909970146,1.146103739114915,-1.6470291689739445,-0.7506218717359617,-0.8508467390736403,-0.5541765572138222,-0.15761542299205217,0.523483259476466,0.23096510395407677,0.588555698748678,0.22522637550719082,Class2 -0.6870579604788851,-0.41560261831355777,1.6571110879067856,1.191226451080043,0.15211123455947387,0.5006702921957277,-1.5036421286894508,-1.861035430610525,0.8991637604969239,1.0761896963468405,0.8336172404492039,0.9931290448989126,-5.123624578118324e-4,0.46626852941699326,0.9143224891740829,Class2 -0.7492330185339875,-1.3794532420574486,1.4471304874343593,-2.0957348999225553,-0.7263346655309921,0.47111750954802,0.9977303707846756,0.5316714076982463,-0.17457603887325043,0.8417018949995065,1.3167242886689825,0.2351094619976824,0.7528381566517055,0.9364397826138884,0.9301723588723689,Class2 -1.3038042952744844,-0.6208760062501595,1.172594311180715,-1.4769317230816208,0.9271933049427569,-0.6984241932536198,0.5587522380612251,0.9113072025269036,1.0374103587400283,0.6388545296827557,0.11418298993099095,0.32128270546285015,-0.6285409610718489,0.6585558555088937,0.13268338912166655,Class2 --0.6888839254771635,-2.7530482971122074,-0.3570497854388791,0.8269030822748147,-3.0307593618850177,1.1574750843541992,-0.6143639167479011,0.17476104174687224,-0.1758740071683668,1.4436351940327623,0.5170153477711842,-1.0684997801318152,-0.6180584072135389,0.30423349351622164,0.8326047877781093,Class1 --2.0040747381299573,-2.9313095182812887,-0.7213874654728144,1.4774597296409968,1.848211954253608,1.2207565717742896,0.06687038090956107,-0.6336365304355585,0.6192969898517384,-0.0704868873271451,0.009246618795427989,0.19151183424989698,0.6526887128129601,0.9795374965760857,0.6676206234842539,Class2 --1.8992545710885367,-1.6671734978663528,0.12881105388931952,0.0877647827993747,0.28505308105165245,0.11593456209731413,-0.5983900982343311,1.3803571364664917,-0.7325193199966354,1.5108123646113918,1.7163882240067014,-0.9480530802965111,-0.4045262415893376,0.6824342159088701,0.7722692773677409,Class2 --0.4175417586649467,0.2959641138234229,-0.6850447059174112,-0.06987256519276688,0.11810905540808594,-0.9552825222411848,-0.9502484329752011,-0.4740271778820887,-0.6821465556648934,-0.5997302681605599,1.0038045616472242,1.1109190289685802,0.7463003401644528,0.2636404966469854,0.2566429781727493,Class2 -0.8946076765506354,-1.1880183911659457,0.3125549377225489,-1.1196057611259713,0.10831508547016619,0.4186252953541615,-1.5863564991567334,-0.3708232443464108,-1.61043877066443,-0.7600983925527458,0.6937206108200751,0.14128748150353837,-0.6668503852561116,0.45768065052106977,0.4842012783046812,Class1 --1.6563971058561564,-0.9468087111408887,0.6073054273640662,-0.2652587425634941,-0.15615962495513877,0.024462767152280653,-2.077751775618865,1.0706542161522823,2.368201653238779,0.7694789741014227,-0.3276055820263642,-0.910841941541745,0.9134720331057906,0.052412914810702205,0.8883347157388926,Class2 -0.9019061101272441,1.281295612666181,-0.09295946236548676,0.008974577197608828,-0.8446739866381949,-1.1644599533039537,-0.15307244832315214,0.7040264094710483,0.6847743340035034,-0.7261992567178552,-0.0621399377419202,-0.8717775344830468,-0.8393606618046761,0.4759911100845784,0.6888685384765267,Class2 --0.30613414091029484,0.845815798795986,0.8564919860612596,1.0781228931575184,0.4122889782079047,0.3970664087343362,0.6397032007033256,0.7204855720465754,1.5865833944480319,0.5381225514864464,-0.08986879374649076,1.0877816595432384,-0.5758502664975822,0.3876722720451653,0.3579922476783395,Class2 -1.9515259383296435,1.993865964489951,0.011918201816081294,1.233793403432657,1.4993016455230712,2.160062406791041,-0.5297301170762805,-0.5540325420616188,-0.20060975685214694,0.7599870559190405,-0.5645981106342208,2.3391355557778,0.48333178414031863,0.306519664125517,0.842361957533285,Class2 -0.7180685700512022,1.8543205401888743,-0.5014050168135242,-0.29460817920031573,-1.3474788678447152,-1.898362997474063,0.3030379001708368,-1.5586778426323256,0.258349982956171,1.2097528314483743,0.723553874399716,1.6671242935324893,-0.17044871021062136,0.3470036822836846,0.8525779724586755,Class2 --0.6260507400383668,-0.08361615067292444,2.103848238130802,0.22826045603800504,0.04687322589548275,0.8063119353029784,-0.19737584927467422,-0.8750198854534059,-0.9261760833267001,1.6057103134267277,1.2700311715650574,-0.5096869381731732,0.0431854953058064,0.8829302401281893,0.15051146992482245,Class2 -0.709821594818967,-0.27292256004969573,0.053568129508804824,-0.6517360511878507,0.7213378504437097,-1.3443440747583701,0.33256003785782917,-0.9823170355422366,0.028559171305538693,1.6486695117807573,0.28308695164644243,0.709943095385641,-0.2618942172266543,0.040879354579374194,0.043550431495532393,Class2 --0.46301284945364307,0.8267267174543239,-0.2950571071042069,2.2205808468210093,0.7041373219978878,-0.7319249446933703,-0.07806290803777878,-1.2237597509644838,-0.32123171218640223,-0.8849040754200395,-0.3325090629452929,0.4885365908799105,0.8689524889923632,0.8999352033715695,0.89601129014045,Class2 --2.467300676954714,-0.2693811137379254,0.5571604720188337,0.499684937034419,1.3677496224465089,-0.7338426185632826,0.6552235516477094,-1.6413533577107084,2.2133890575312773,-0.1456018241672576,-1.2385903722532627,0.43109601965811445,-0.525406735483557,0.23291765223257244,0.07524224440567195,Class2 --0.3256541247755911,-0.8649781575944597,1.4533667246039987,-0.21288521821748557,0.06870099145274886,-0.35277893923614956,-0.7739799947697978,-0.48973768876496443,-2.8533644844071735,0.9309072360875483,0.17541639294495168,-0.4708835257048175,0.2415103567764163,0.32742459839209914,0.345556273823604,Class2 --1.6266055264728656,-1.1002761992857562,-0.5973473432755648,-1.8017102662131002,0.8823636203810468,0.7917164656428847,1.565045706726633,-0.5311214962132923,-0.2649704718667013,-0.964158771598212,1.6613451582846128,-1.1831810387693338,-0.763045693282038,0.41208698716945946,0.14272273937240243,Class2 -2.675427157047532,0.8856142878168961,-0.7016968244907263,0.35229256766266825,-0.011572834783165089,-0.8671162369157395,-1.0620230080675699,-1.0084783099372614,1.122214083751558,1.1505477107357343,1.0419943423653775,0.45944761572778026,0.7869056407362223,0.8598001336213201,0.07163893291726708,Class2 -2.686547980104744,1.0490818542451321,0.6826790958978396,1.8419067384313708,0.28291838290951704,-1.5122096639056428,-0.7756190897470449,0.39736299797854496,0.04740803503019258,0.1626273762366398,-0.2628534638881121,-0.9449005278878293,0.4423741800710559,0.7708608352113515,0.6699002920649946,Class2 --0.6189286052741434,-2.2213343537028685,-1.4713304430187832,-0.7528972401463506,-0.2205708601663678,0.2524505054715428,0.1828468647397195,0.3019688772862664,-1.465480176956199,0.9529450163223443,3.3264855588058753,-0.7620713878093379,-0.5496340258978307,0.8224416174925864,0.8288005760405213,Class2 --0.7345398866274093,1.0894820584018896,0.7358048226440708,-0.050267081769334664,-0.5603618417698675,-0.8919035117260211,-0.914816617857852,-1.7985157706164177,0.7496661266032999,-1.7718917458872696,-0.7928265598013072,-1.2847849666226538,-0.10930057987570763,0.008790081134065986,0.5517622698098421,Class2 --0.30035717268057516,1.6218735286656936,0.9216520777286225,-2.977997821163691,-0.19751065388594283,-0.33907134855621934,1.904574955538912,0.24185645448201434,-0.3063515483006793,-0.2768005293670862,-0.8066788426688223,0.988826754527543,0.026320424862205982,0.5552985037211329,0.7041919135954231,Class2 --3.4078522656774073,-3.5477439384389715,0.9506713581642429,0.7122064470128678,-0.353812879867092,1.9396929041237034,-1.2141352378965133,0.22117453321300903,0.7602261878534823,-0.07113060471697266,0.28947689071204064,-1.6188253224391809,0.04248404689133167,0.5485773056279868,0.7369767662603408,Class2 -0.3417056116965068,-1.9602481824668838,-0.2474640373529065,0.0768867795056941,-0.0948719428115308,1.190363745852998,0.15186788497026357,-0.20194669458553768,0.050162177057001996,-2.358136004796907,0.1893997088941708,-2.1337662536284148,-0.8456973126158118,0.034716673428192735,0.332866643788293,Class2 --1.103974186196442,-1.4864928842484468,-0.21958255869742152,-1.7961650116692338,-1.1491769477554348,-0.305826007197367,0.9271526774827731,-0.0752445402419761,1.8624269492692762,-1.2147112236648192,-0.46111987869722476,-0.45185446662325385,-0.7065018266439438,0.7096326258033514,0.43006868241354823,Class2 --1.7692894122614329,-0.9019238173083621,-2.290077357828919,-0.7601071613058042,-0.4156847136604528,0.18476613405003103,-0.9805043878388651,-1.7631844879241032,2.038103581504632,0.5053197521348721,1.6639970809498954,-0.8057444839224337,0.4010252091102302,0.9205288125667721,0.2649916564114392,Class2 -1.3349414273932387,2.2059674187403626,-0.36604033252182716,0.3733043447688266,0.8956907044377861,-0.13129364921494566,-0.5079681850503971,0.21556589531363718,-0.45269080883083057,0.507539796410213,-1.9102093281305246,-0.12269625560877002,0.35160998720675707,0.35902506252750754,0.0484623359516263,Class2 --0.19794729027524396,-0.1848888887532172,-0.7672664910877312,-0.2994940779880347,1.4148385460679946,-0.19206209060799176,-0.3813480722125064,-0.8143164013159495,-0.2171332604192086,0.6002120174484331,-0.5820701314952519,-0.20634248150881968,-0.5784069551154971,0.5136471050791442,0.9349573636427522,Class2 --0.6083865766772739,0.3061174003188095,0.5870307386456212,-1.2027743825603652,0.49151044942831285,0.5403520201529516,0.38575610968495433,0.5862606904758247,0.3256837870138304,0.6069564674161537,0.08538714630526155,-0.8261060205337267,-0.4034487931057811,0.02109346049837768,0.3559353491291404,Class2 --1.3586103882638887,-0.5541876754059913,1.430837098955733,1.5053873756332248,-2.0206065442568595,0.4694874403368285,1.1524690199859375,0.458083481223146,0.7129232346598163,-0.4008791089269332,2.1688379740199,0.410103439547301,-0.12869265163317323,0.12749416707083583,0.4580433217342943,Class2 --0.20319385536363974,0.13561932782704358,-1.398834300550021,-0.6669878450809524,-1.1134739075459765,-0.19728977007783352,-0.16273703427103772,0.1837627679995499,-0.5486560713275064,1.3010977877578196,-0.2965203391708487,-2.31344236312156,0.9926352873444557,0.275497272843495,0.22417566692456603,Class2 --1.8093553699313205,-1.9687236273769588,-0.908894177832287,0.3326103859954327,-1.0976760113083641,0.41783372771182137,-1.7199460718953115,-1.8720928094134903,1.1393009096855657,-0.3233800042968578,-0.29758507429397396,-0.1597558691357988,-0.28341218922287226,0.33118099090643227,0.7034507158678025,Class2 -0.008696329089796784,-2.287685786103582,1.3323995646900915,-0.5268044087746584,-0.2503969064467201,0.009453444648029968,0.061182541172529616,-0.12244618700368559,0.7008815521959483,-0.9866510863351042,-0.5875378124071428,-0.2649696928069479,-0.3078634114935994,0.5326781014446169,0.3747581902425736,Class2 -0.5530859198899855,1.5042112114117838,-0.8963342699170883,0.7602641062001246,-1.0710163586944583,-3.136020530966457,-0.36177640593266275,-1.857576665630616,0.9568474903111147,0.8749006428067897,1.578023178768701,0.28215932801762195,0.3918367326259613,0.4650272305589169,0.7902564068790525,Class2 -0.9710532304110993,-0.030818263301770565,-0.579451579837662,0.5604730590067999,0.8919815179409216,-1.6549051665679124,-0.02645186719853775,0.16150392168962893,-0.2524458752298508,0.7030434974807059,-0.7978548057802601,1.2287690615020732,0.18342057755216956,0.5486723626963794,0.8722346820868552,Class2 -0.9942344343823475,0.5979699665424469,-1.9288552912056363,-2.585935514695302,0.8679643324198525,0.826520812158412,-0.6288231700485131,-1.697884219166425,-0.3000819308387121,2.078799448022975,-2.2112823722008934,2.6987382130931556,-0.45552126644179225,0.03049136814661324,0.7144577256403863,Class2 --0.11179482736736401,-0.8856321788325263,0.5288523020588295,-1.9186240173802422,-2.135629366384549,-1.48702368733709,0.14511082036425432,-1.2926121852675705,-0.6260728367440085,0.8897711603130495,1.2565323481064825,1.0650656025788177,0.33998224046081305,0.8104552722070366,0.07966664410196245,Class2 -1.2839217554851383,1.3180526914991568,0.6835510881749475,0.4514670001408783,1.2695129899665647,-0.017726075459467517,-1.2303170354408621,-0.7043154894710997,-1.8453317432701744,-1.5563952330360031,-0.15620153276360135,-0.25200301167079725,-0.5870809117332101,0.9654929945245385,0.9676805273629725,Class2 --0.49352335113037826,0.5130471523833444,-0.7190265215321889,-0.8895791823235724,0.5153356557764253,-1.1313623807367927,0.3511286131213403,0.16279844357821394,-1.0877367434614333,-0.720377009034829,0.7881415514163277,1.3215193620569778,-0.2282318864017725,0.7217197632417083,0.3574344434309751,Class2 -1.1720852523861711,0.34361644948602793,0.09237419996658956,0.4787428504922092,-1.9864485031689354,0.3121295670543621,-0.25745732801777116,0.1944177697380039,-0.5059891559609682,0.5528645647606923,0.18096350332252023,-1.4210964564855255,0.9445780185051262,0.4006571227218956,0.13941847276873887,Class2 --1.7038965709234626,1.0935856441479364,1.1719594255230807,-0.249017399674132,0.7810794237873497,-0.3594405041309719,0.14008444767971873,0.6399662434157796,-0.46955182277741103,1.5120645606836534,0.9235597103634559,-0.03808362315615229,0.033093353267759085,0.43263031844981015,0.8079455632250756,Class2 --0.421738549803928,-1.2935939068414317,0.15071403044845227,-0.689387803771365,-1.2914856875942027,0.4803826271948432,1.2242388382606448,0.47333628406291156,0.32252829672313477,-0.6436580557732073,-0.027098176896840975,-0.038314001849227314,-0.2384586576372385,0.3984131303150207,0.825046403799206,Class2 -0.6575121995963971,1.0167710799689607,-0.16999646124717227,1.457039330706618,0.20995911210310192,0.05709011132836871,0.6671924109445471,-1.7321936689295516,-0.017929251188513434,0.7742537495845984,-1.0947541973045878,-0.036456585837016886,0.44680779427289963,0.9854927982669324,0.17466807109303772,Class2 -1.4554470955148602,1.062949267069942,0.13109750462511646,0.17120925123146552,-1.1358310327094538,-0.29164289987012837,1.3259677301776642,0.19982833308868211,-2.038182085387854,0.4602901486689296,0.8975303510791293,-1.4089015725971163,-0.08097040234133601,0.91026018303819,0.488152330275625,Class2 --0.8249607843960225,-1.0397000649306243,-1.085758678225388,3.485810463425064,1.2955038530283758,-0.722913550987846,-0.13097384058887415,-2.3071484669463143,-0.3068801280150057,-0.07935984441021242,-0.3119323279971813,0.0434742405612899,-0.40075345104560256,0.5032950204331428,0.8619473627768457,Class2 -1.7165725364653677,1.4438004586795417,0.6671275559625529,-0.16559665761445466,0.05573779634654682,-0.7315303427582771,0.017850111296775134,-1.4218248904163329,-1.626376207240327,-0.3570521824030322,0.5899182291068689,1.4852424354636455,0.6787725421600044,0.7743836534209549,0.37566780485212803,Class2 -0.30220904767306017,-0.18602295228389948,1.3261336152254206,0.27697856309595176,-1.4325104830981636,-2.0553068741882328,-1.2061872223789245,-1.1295175509544038,2.266584308427677,-0.31184810832550436,-1.5353326372326934,-1.131942075311055,-0.2949138800613582,0.7532314984127879,0.22485404857434332,Class2 --0.07785471596693094,-0.5806003240685028,0.18298011532770414,-0.32450482978827994,-1.5073183663599339,1.3018848615821363,0.4688140739308223,0.3037785493749634,1.367096902572223,-0.07468202945458834,-1.2389438194152596,-0.9485412781957117,-0.5230062282644212,0.8616271722130477,0.6167665820103139,Class2 -0.3241913757702074,-0.06885084581465048,0.302854045273877,0.4764134966133371,-1.4594461771329699,-0.5508300660564601,-0.11955319537549296,0.20325353942993063,0.5219761631911499,-0.15418600505148347,0.4983313388985201,-0.851649176172728,-0.7990502584725618,0.37958886241540313,0.13520530657842755,Class2 --1.5105962534348292,0.07956915611560811,-0.8198508871268999,-0.21225286373877714,-0.2601524762033363,-1.9241252430612759,1.3435836946400679,1.4963440784099804,0.5490476378889868,-1.2590548611641772,-0.07242564148774704,1.3823059316950372,-0.29488795809447765,0.14265254279598594,0.5267606924753636,Class2 --1.7770219798037423,-1.5326584362094808,-0.4253140754172494,0.8333170849622273,1.062971948488387,-1.374699131280797,0.7979685958759942,0.5556607974717431,-1.044982720932805,-0.40271709583117865,0.6371639471597007,0.845175436574183,0.026961387135088444,0.003924176096916199,0.20789953926578164,Class2 -1.86587668458303,0.4167693944606877,-0.26114726701267393,0.11447875196205162,-0.8400660552766965,-0.49297526009313014,-2.705503077022873,0.21054563252947522,-1.483342792552928,1.5698292697050706,0.5099640502063236,-0.6491509804222704,-0.5779351214878261,0.8566171613056213,0.299116249429062,Class1 -0.9741257066453671,2.9414774898389258,-0.7121109658305201,-0.8853205707629529,-0.6775458119454575,-1.0093730409811013,-0.890604478287972,-1.0734944972863114,0.6472250203891242,0.5227414595261222,0.7612867924876958,0.5527696620415739,-0.46318491781130433,0.25372059224173427,0.29118175408802927,Class2 --0.05786222765135651,-1.6138336030766025,-1.5678986482073567,-1.1670627607324364,0.7202639155360172,0.1738731931935172,-0.015322750815379711,-0.020659251388909703,2.2798146960394168,0.23429649824612023,1.4863751330599022,1.2659400820190845,-0.3198758992366493,0.8258694054093212,0.5538890489842743,Class2 -0.469699977835875,0.548662171350421,0.2880353218846719,0.2556260374398731,-0.7544090816802514,-0.16203969820904054,-1.6522005634033725,-0.4098284984465941,0.3087538886170457,-0.157042215537615,-0.14698734613068462,-0.9192680093438242,-0.13630429096519947,0.15758107230067253,0.03748442302457988,Class2 --1.3094608056074608,-0.310919229716475,-1.288648310224672,0.3184575589108993,0.6480452242166942,-2.284968830126093,0.4908266845972315,0.5183169717926923,-1.7704491014647035,1.1532026681372047,0.3457127418921042,-0.14434188388536154,-0.8121304488740861,0.9963356826920062,0.011485775467008352,Class2 --2.190560019392211,-1.893519240069242,0.24988826686569623,1.5602398121958396,0.16787572619098326,0.21027068710383875,0.762236648573897,0.3412951398964679,0.33000834529284756,-0.9454103963916866,-1.8661327497884879,0.05793413787354472,-0.27701335679739714,0.6442877128720284,0.5749984327703714,Class2 --0.43175387845332125,-1.5951734025011026,-0.7686490319528146,0.34241606390481566,0.37997432684493904,0.45250411682749864,-0.14055496831745953,0.9214090093936819,0.8082609292667269,0.8747556497022632,1.8404444996713993,0.20355341193024706,-0.2216422948986292,0.30367673141881824,0.5678119340445846,Class2 -1.235005565715137,2.0168675882064955,-1.1208793624878648,-1.531533356020609,-0.606181260590622,-1.0985310869332174,0.8676997925982587,0.20281913627883502,0.26100900357963486,-0.5610171709714786,-0.33934291906578207,-1.0144059264737506,-0.5052404459565878,0.9592500359285623,0.2569797101896256,Class2 --0.8865914961030699,-0.35912324069721,-0.49637191796373503,-0.043318357614649965,-0.5010480688471718,2.1482759044908994,1.7803084728938092,1.7318712823900169,1.1352586951856745,0.17570584398946587,-0.24596616873598318,-0.8657731959971816,-0.3834751038812101,0.40849489462561905,0.35677480208687484,Class2 --1.0047784867765333,-0.5652204158488477,-1.0209321757415994,0.52671138105481,0.06173831468956854,0.7544192752050217,-1.1110592810022972,0.6696358702461239,0.524044285844247,0.9817645842333846,-0.3974399025748687,-0.3688302275559181,-0.06332769338041544,0.38577986136078835,0.22240345599129796,Class2 --0.7663400097448919,-0.7478001139229723,0.4807983243271258,0.6600167894741662,0.548083589922173,0.015447971461250166,1.2347602743307229,0.6755328986652908,-0.3003620066788008,-0.9331270655086783,0.7193028333198654,-0.5119592340070571,0.03377848444506526,0.06591892265714705,0.1660266756080091,Class2 -1.3979279789815746,-1.3028703652348668,-0.5611176445275975,-0.36754586822074625,1.6130603736499158,-1.1164693929406138,0.698480929960992,-0.7758558629912163,-1.2433379452644318,-0.5742076953002375,0.4651552225189126,-1.8923953063020933,-0.4203096106648445,0.4228545387741178,0.9209827217273414,Class2 -0.8251138012710504,1.7923777869120867,-1.314950565193269,-0.860074034268165,1.300246814376754,-0.8571394173892836,0.7399325553827532,-0.22378863913931002,0.9473992479147239,1.6392839196674154,0.47804565401120447,-0.8180882965686812,-0.7585657062008977,0.4782596519216895,0.5675622499547899,Class2 --0.1495481174452869,-1.0688422931084782,-0.06402569079686019,1.0975651770762131,0.5596596689343937,0.62526749918399,-1.2048150413911198,1.2257683336907113,-1.0569265908976107,1.2027753435610224,-0.5806539837170231,-0.031509573931073974,-0.8134023286402225,0.8432740014977753,0.5755994631908834,Class1 -0.10314353556675473,-0.03154492445087979,-0.5170187745472846,-0.007713514193765211,1.7475185496703198,0.563282170663103,0.22045136065094698,-0.9483258984171087,-1.3566525752340273,-0.4132775890884546,-1.119705959540656,1.0946313817351794,-0.18775824457406998,0.20863664150238037,0.06331498245708644,Class2 --0.2712726699285345,-0.8460622389689639,-0.33530566678939755,1.3705553102574772,-0.037401824607466286,-1.3996051880012108,0.021922025451511434,-1.8575846189444072,-1.9297247588480468,1.3840880057966765,-1.5895743192598388,1.6856835964673103,0.41198671888560057,0.5502724207472056,0.945949399843812,Class2 -1.327532123118052,1.5464219558339423,1.3169302963111391,0.2162752423445198,-0.5131868909525704,1.6053786531424075,-1.168829658580636,2.2410514246570505,-0.13024199490543079,-1.2605817144336449,-1.1396633829997498,0.5131691305278742,0.7650741543620825,0.17197119118645787,0.3089838877785951,Class2 --0.689374151666958,-0.7761109643356587,-0.008840870082815684,-0.8473233674176861,0.008892407109035013,1.0825481378954493,-0.7521966086325439,-0.8775895910311771,-0.5449297822397424,-1.3616186655739049,-0.00687608738800748,-0.42054820850823926,0.8276024837978184,0.7022611340507865,0.9120832590851933,Class2 -1.4925565500754105,1.9063616931010285,-0.5924943469877754,-0.40026653185592154,0.09628424007653671,-0.5042808097609799,0.8375692376632181,1.1725621519164728,-0.9250924382638612,-1.281521587206508,-1.4773222290507588,0.06459568332819363,0.9990215660072863,0.5776052004657686,0.7361674325075,Class2 -0.0720443426364617,-0.4983156216941314,0.6817144994033281,-1.750801136993669,-0.37422657495938705,-0.209644310539443,-1.8816425704245254,1.548007456509727,0.6919612594540594,1.9177355598821018,-1.2382554209321333,-1.1690540947046482,0.6147481258958578,0.4225368588231504,0.11423109052702785,Class2 --1.3234675448912252,-0.6013673455663817,-0.596860489540674,-1.370852120298113,-0.3714891442394466,0.8553746594276939,0.599633053189549,-0.6806444973845895,0.38340689015006824,1.713341156453676,-0.5861247663257154,-0.8821703962839149,-0.27006219467148185,0.3361987480893731,0.7099114223383367,Class2 --0.21493660957568037,-0.20834104455891553,0.15218076658566598,1.1696885619572004,-0.6193843527563184,-1.2171143739809809,0.8417106238955872,-0.7071869496465337,-0.13394267201781648,1.9364465755356621,0.16400750651742804,-0.022774989585895102,-0.16936510056257248,0.18995284638367593,0.9981107714120299,Class2 --2.3521191956095846,-1.0346956386173782,-1.1830245749998236,-0.33368180431350863,-1.671283722404659,0.7301756826442004,-0.9602215997008652,-0.4420022133814573,-0.5045882421603314,-0.5438924867824575,0.999009473134152,-0.9708922337862183,0.48731897165998816,0.36580390110611916,0.28683201549574733,Class2 -0.8867097084857006,0.2890962601287791,0.059320025890889166,1.4553237095913447,0.24837865854584026,-0.2605622614717369,-0.6308808365965755,0.8482984879853691,0.23796280666113073,-1.332312637501091,0.1333967131056408,0.6669743315896374,0.7854482582770288,0.08973037940450013,0.9132607069332153,Class2 -0.14827438523266817,-1.0985817226927064,1.5673490542721553,0.5248619871080444,1.7764417082484811,0.5495893620022589,-0.06518138692504594,-0.23338393832535656,-0.3248971468588225,0.9915171143567011,1.4663273570542625,-0.9591343977246646,-0.9429143504239619,0.49109330237843096,0.41119147720746696,Class2 --2.1246643942423735,-1.3339568555723598,-0.63582326567376,-1.377538633891959,-1.1633692063544927,0.33763814811211923,0.9620223944805822,0.9859697774390962,0.2912488062425164,-1.7858901964950082,-0.0576407209474386,-0.4708823223352297,-0.29836251260712743,0.7588838529773057,0.23753852467052639,Class2 -1.568508708942128,2.104616621488371,-1.1861142899905777,2.306930499453207,2.160769223664854,-1.0063416326895755,-0.13228539573860965,0.5424197266297317,-0.3994956282974946,-1.8678952222550809,-0.43150929702005875,1.2802164217044942,-0.21613634284585714,0.6839803538750857,0.7526434562169015,Class2 -0.25465803291948946,0.29223895849426534,1.0242709923306372,0.8553364995762712,0.7972356871536345,2.4799338244668365,-0.23476053522395682,-1.6609549317961256,1.2105430881140065,0.08840864108451545,-0.4625632565824084,0.1213427237458081,0.24068515561521053,0.6797323673963547,0.6967696105130017,Class2 --3.380637104321714,-2.1620439115259886,0.3983210410175087,-1.3305399627728254,-2.1843074185194187,1.3422068800040887,1.3632125237002723,0.7428892518667244,1.426393742736392,-2.734334715960441,-0.48497230838283817,1.4277930362184748,0.6882107998244464,0.7531721906270832,0.41271955613046885,Class2 -1.6949936964120345,-0.08617849106663622,0.5282698707208721,0.23397028034037726,0.1049702698619165,-0.13825661737979889,1.1947412622925735,1.4120623439390942,0.6269072284280747,-0.82533646513268,1.3237729505990992,-1.79068003162541,0.16179565014317632,0.8454406321980059,0.4659678670577705,Class2 -1.0465702316403616,1.4523511414582355,-1.022278803310361,-1.602861794035326,-0.11317999558239593,-0.8636233424639153,0.7579953894562874,0.05592550547522237,-1.194823564386697,-1.7405669623653695,1.1007921047063223,1.6485234356745566,-0.25007929652929306,0.8579994216561317,0.7357317276764661,Class2 -1.2467604202632594,-0.28470436627041434,-0.4916527728092454,-1.9117670412958836,1.1093674207519097,1.7413760843557111,0.08346073872175332,-0.42298995605802486,0.6141419566884663,0.8811779399751691,-0.8958927548055583,-0.7260123696944203,0.01616752240806818,0.4196206892374903,0.4504641415551305,Class2 --1.6751722095096093,-1.4303540425922543,-0.6010211257315118,2.1135524811597075,-0.34280622834191976,0.7141065814823915,0.2963419859766024,-0.09804059464319419,0.9350900737639606,-1.3865410867598231,0.4841785075595271,-0.8450816947403467,0.5382765238173306,0.9089243249036372,0.23222591658122838,Class2 --1.241222812645986,0.3969677008960531,-0.11212308038960445,-0.4235474496851011,-1.3496611583757365,0.9533118329872353,-0.8903071966299804,-0.002538833102172223,-0.8729020797567613,0.8935940519332496,-0.6349109642791214,0.030390643268254584,0.43213779106736183,0.4641409399919212,0.1395928526762873,Class2 --0.9579453994076372,-1.6778860689209,-0.2891479430325024,0.3808280363363748,-1.7652008911377528,0.6284953402216373,-2.634276602289288,1.8981580811475343,-0.2902793324653091,0.26885532602118456,1.9362482565534025,2.212676967285044,0.7736273477785289,0.433842281345278,0.2584523765835911,Class1 -0.4493323714148233,0.564588190876652,0.6383375392330444,-0.7816020655540462,1.18930399657861,0.8390144021045725,-0.5767850253715154,1.8677266178871994,0.17208898783273255,-0.6435460124720459,-0.697880199778943,0.23101905951307655,0.19366817828267813,0.5472140733618289,0.3790118438191712,Class2 --0.8187561505103591,-0.43750796694601535,0.9825796036789676,-2.4145449322609984,-0.6365893795391024,1.618414036386478,-0.6142675811816515,-0.6599984482206678,2.5182822239712137,-0.0019121026240437357,0.4147540975604016,1.3631790868842004,0.5068667368032038,0.4514288811478764,0.273453360889107,Class2 --0.338587193615434,-0.7080328472035532,1.728425882894804,0.5125744692799523,1.2679917363902156,0.6252452794492981,0.038130241907039564,0.27140770880632814,-0.6353316406154149,0.10759553904746516,0.3644156016251761,1.4672525339418825,0.5879426021128893,0.007481646491214633,0.8581497888080776,Class2 -1.9894481278790888,3.0572460811977535,-0.824042535239876,0.1970958190422588,1.9012275756069374,0.20840352750656382,-0.5587057549606269,0.4988280379885671,-0.47905361428362603,-1.0723024272419512,-1.3946035884940555,0.478718499551192,0.7798445387743413,0.09319475176744163,0.0109417790081352,Class2 -2.1263630647554104,0.6415935385639089,1.237948988542605,1.02741775981593,1.2709174398745058,-0.448772071403849,2.7035686529228813,-0.15931080278285312,0.11627389378049507,1.041110946553493,-0.7360970172515654,0.9126104799787216,0.9529573917388916,0.18265874194912612,0.3582937337923795,Class2 -0.3847960292861783,-0.7842186169540553,-0.32568938445245743,-0.023094343067644723,0.8721830806032963,1.2581778359449043,-2.466185039024165,-1.379894495702708,2.0167355361216037,-1.0868524592058704,-0.6531768885176136,-0.4301705885803353,-0.3869251743890345,0.8652134668081999,0.9701771079562604,Class2 -2.496377699846889,2.0896615741427955,0.8246754702899627,-0.17994823309405739,0.8652814567435785,-0.2267403167435631,0.04385142849590574,-0.4507028204387627,1.7981430393715643,-1.1909660249241885,1.688323474584718,-1.5388608638801158,0.8400564095936716,0.48193632322363555,0.4219637925270945,Class2 -1.9005879132179362,1.1995277589160245,-1.0058463848944241,-0.807704036718404,1.303822278734318,1.586403006580313,1.1148734700395149,1.0224144025914692,-0.842926904875189,0.23663848612389926,-0.403094155806053,0.8105999616328224,0.4813991957344115,0.00301791587844491,0.6645299610681832,Class2 --1.296347722968471,-2.187621533598035,-0.225020604724152,0.020811046403485856,1.3902420894527427,-0.2874099470823754,-0.5475739004781535,3.152337635659612,0.04550719905239447,2.1024040750325574,-1.8490208927538432,-0.038230414247057684,0.38435840979218483,0.5591981785837561,0.9241070803254843,Class2 --2.7502426486427316,-3.1389818955706854,-0.8916559451930037,1.1496109983313418,-0.3239171248791954,0.738414299691167,-0.06234617857961546,-1.0913761968289344,-0.3200427396085751,0.12959110124417272,0.8572513388178189,0.6767351448378276,0.12593151163309813,0.5949217604938895,0.9109439803287387,Class2 --0.6819614118782038,0.6657741717106362,0.08963915919911937,0.6428026933540555,0.01741854402572401,-1.0076302240292399,-0.914427768963068,-0.1970804984446655,0.8334528962835035,0.27743060634972794,0.4157425498286777,0.5614361343978068,-0.8784984811209142,0.8377354254480451,0.4056391669437289,Class2 -1.957109346541735,-0.1752057440779058,-0.39661320557129015,0.06721999925110463,0.5158315244604865,-0.9495332470345538,-2.2153170201872685,-0.15521136147230333,-0.4118077031982503,0.14418831520777245,0.1455467488782177,-1.646181431792005,0.793359930627048,0.3332768369000405,0.1324134268797934,Class2 -0.4963634027771982,0.5543050166450543,0.6282877718058819,1.2426179204353118,-0.3929780118077299,-1.7658394541657962,-0.21126680712767104,-1.6341276666349074,-1.2167145560274253,-0.006020763970686024,-2.2399292034580536,-0.6181020004659711,0.035410371609032154,0.20553146093152463,0.743948029819876,Class2 --0.22520239623227417,0.621689024012188,-1.202112793195517,-1.8266881067801424,0.11663694214220106,1.3540384729204356,0.6902048218769894,-0.6677018053745261,0.5651074337586662,0.2882306879667382,-0.38463143320370297,0.3566683088242033,0.12319311406463385,0.3571358856279403,0.0029023410752415657,Class2 --2.3623752129833724,-2.2396505937669193,-0.12419491166055195,0.6314945536652353,-1.6242274313710374,1.0425148824850874,0.9795761551611505,0.5328191652829439,0.0019400488930092507,-0.217132577962906,0.5004277679533102,-0.08070263893490667,0.7678852169774473,0.47388184955343604,0.8000026361551136,Class2 --2.10411056783944,-2.8609015614385243,0.25985814368619414,-0.3872181218048703,-0.06761020625133525,-1.4524645820202067,1.0137139844169782,-1.4531874969754288,-0.38702756280159856,0.3859872014749421,-0.42544363108294087,-0.4806506887752892,-0.06706062890589237,0.8690663825254887,0.7308087449055165,Class2 --1.0205402203591836,-1.7439063010200855,1.1949735360371565,1.0281267778860825,1.1273077261773512,-1.142207045893258,-0.545931132239561,0.8483614262023484,-0.1939341231135413,-0.7077589272118282,-0.46778926559017053,-0.8731320572823236,0.6885364460758865,0.7365992579143494,0.993258026894182,Class2 --0.9248817484931451,0.07506784584427001,0.8197648525916783,-1.2206170703196026,-0.17737750822763265,-0.08633670752856258,1.491294664895681,2.105505642944951,-0.9320839693417311,-0.17541219139241102,0.1793713558150975,0.33884100923850896,-0.9716184064745903,0.37373858550563455,0.669591641286388,Class2 --2.9615727972706436,-1.0201441776571902,-0.48772433238629676,-1.2642858820432175,0.4995317646040901,-0.27386977848213306,0.15810870265975355,-0.8285607882110715,-0.33106612977316574,0.1484879162865758,1.0071777352253137,-0.5977306734670216,-3.053918480873108e-4,0.5165492207743227,0.06230680737644434,Class2 --2.19271718529809,-1.3041028599135285,-1.1351610065874873,0.8342745390511873,1.6750945387611613,0.3182138515261168,0.7632019922716405,-1.3386409194083115,0.9364778763454936,-1.4538111828465283,1.298631812490788,0.9445513709387167,-0.0881728520616889,0.8854457756970078,0.689393017673865,Class2 -0.5268055663616994,0.29471960889028237,1.5420815283260896,0.08539416964733824,-0.5470561321452718,1.2328051914389717,0.4887009881988532,0.8090504867505894,-0.12188662094121477,0.40669737606540507,0.039941711564876956,-1.0495218924070333,0.7696081213653088,0.599612849066034,0.9955176103394479,Class2 --1.2503324255930757,-0.6503382036940941,-1.2925122538567102,-1.3939784927660908,1.2727366191726022,1.8196962162372292,-0.4905621614182435,-1.663846073304992,0.6417634653876985,-0.9533308219521517,-0.13039382915289247,-1.5093294797873487,-0.7273949584923685,0.32218596432358027,0.34517307905480266,Class2 -0.6591118519995222,0.32946185166461306,-1.2393982493798044,-0.022533683897081793,-2.185066247194288,0.9264958049262287,-1.3521940949102473,0.16846207082928494,-0.005957592101759353,1.7998643280774864,-1.0632600028975006,-0.8654653422263946,-0.9854266913607717,0.622382253408432,0.7968060367275029,Class1 -0.7729152438483202,2.906907006867912,-0.7652791155465681,1.9875991293032265,-0.4914740833313778,0.8787996957207499,0.5987041162209169,0.2350100590306114,0.13226382396368605,0.45598604380987323,2.0846302830898797,1.1938144188831752,-0.6685373419895768,0.28794664260931313,0.0771182794123888,Class2 -2.934537327877445,2.1603023810879387,-0.1587598012160849,0.8359315709115692,0.7288425639540896,-0.12286867455230321,1.1597513272397735,-0.7536290083719652,0.6625829314846212,0.8435348845783817,-1.190540862867839,-0.9595723642566667,0.5135515495203435,0.3072600462473929,0.014359915629029274,Class2 --0.7631394939767557,-0.25800201431873826,1.8992310293149504,-0.21139229042791266,0.8536806398087553,-0.19323458474504648,-0.8165677889195718,0.022274238554169313,-0.771117039095817,0.8434801839914055,-0.7393301617643658,1.4023402820326818,-0.010690821334719658,0.37848843494430184,0.37249246845021844,Class2 -1.6859907205763873,1.1586876238280595,0.018985211234106757,-0.5689490182091057,0.44592802327916636,0.14608138793130437,-0.47829315902084774,-1.1638073909295037,-0.7095998363386631,-2.6156838005420986,0.1943180460132945,0.1634822709579275,-0.024727676529437304,0.7766606984660029,0.6207579446490854,Class2 --0.9970211542344144,0.18831110834143405,0.8941874072524155,2.264708374218083,-0.1642697897194784,0.7360427216645739,-0.6479703885800956,0.09380404129516161,-0.9252564546121432,0.07049427552872903,-0.7947252378910022,0.76881967833014,0.7789757926948369,0.43298577168025076,0.974387762369588,Class2 --0.0671308568596766,-0.08743265822859755,0.03632959190888873,0.4201094462467161,-0.18976254223830674,2.4972628181593626,1.4980693731251062,0.42303448167585816,0.7939711620480266,-0.14094016987756333,-0.3929462842095402,-0.07199778303356666,0.10478367004543543,0.5722510500345379,0.6738638626411557,Class2 --1.1485010166536744,-1.0806253719046333,-0.1935250045316091,-0.705117071921037,1.0660498191056995,-0.9242122057805435,0.43154076753363285,0.7005332244737553,1.6078751069059207,-1.625994921652992,-0.2222400310147076,-0.7136508805450689,0.9659477020613849,0.03717105765827,0.1167387596797198,Class2 -2.499522003730851,2.1662788151355894,-2.235772443999396,0.17077477040925376,0.08831906697185954,-0.40532686678991187,0.449243993080836,-0.07528092419378585,0.3337121541793131,-0.15655415707875942,0.21992895448014252,1.0824659230046458,-0.9051277623511851,0.7599313755054027,0.9425698004197329,Class2 -0.15168729160573546,-2.1585557161675593,1.510930867698964,2.2684422694627395,0.9486004453827279,0.2455031068708171,-1.4024877411002394,0.47307083478250445,-0.6566791211680625,1.3084617553953928,0.4893345436328556,-2.249634487776141,-0.40304568596184254,0.2970238735433668,0.10755770816467702,Class1 --0.9407654572714043,-2.150756014564728,0.1522094284572129,0.18495120591146855,0.28846952395208336,0.4909344944794647,-0.8315724819757121,-0.44601267363845337,0.8197073841729575,-1.142738369946939,-0.4168645684643014,-0.08364733771985967,-0.9780447958037257,0.8400236263405532,0.19073431892320514,Class2 --1.1236824520009971,-2.2965909941313836,-0.12400731552823091,0.22003959953420898,0.25338404042004875,1.0136136132599478,-2.0209665655232207,-0.803412510684674,0.8840707196746566,0.9958722782560689,-1.6332585268283581,-0.7532053763486494,-0.6268405904993415,0.01912170951254666,0.22141460911370814,Class2 --1.3201091883449037,-0.7471792261174782,-0.6751635308411621,0.37002854618633835,-1.1708805575459251,1.0110901904687801,0.8160247520351542,-1.1677316199434296,0.3367436449417508,0.6549145973161499,0.9472836967296343,-1.3118310258562507,-0.3681171159259975,0.5403146927710623,0.9063917882740498,Class2 -2.990041134926631,0.8072888649657832,-0.08622372017624168,-0.5129727254966132,-1.3073241957506267,-1.799490849540848,-0.9782446103422188,-0.028491594595192,0.5970352806223359,-0.3715617903554505,1.4408958879155969,0.16433647444399185,0.558485921472311,0.22161341039463878,0.16443269653245807,Class2 -1.4836142356178408,-0.4388369525004592,-0.7830781954856614,-0.49714495197718006,0.8108576395205267,-1.289739661027667,-0.5968947720986105,1.0763176950104805,0.20620320331419692,-1.2171002268724604,-0.1727553785718382,0.33055872788756113,-0.5238479031249881,0.9354643535334617,0.7133792170789093,Class2 -0.09380561004135393,-0.5851471796054519,-0.31648891408972807,0.5631596058373143,0.1709753367208048,1.8200962625356865,1.2753551372704373,0.7496895446338482,0.6027521563406624,-1.1541732385665866,0.30827181959075894,-0.3675641546551728,0.47853327821940184,0.9790039444342256,0.9633375271223485,Class2 -1.300663597357345,1.7532740887721925,-0.47277347831787314,1.7366656205817377,1.2324463899004634,-0.10008146288826958,-3.607032531966527,-0.5553790556944243,0.20767143135641639,1.9213101790464984,-0.034117316488805054,1.0775590045893169,0.8378189350478351,0.9810526992660016,0.25114259496331215,Class2 --0.8367300059221607,-1.0378199024570405,-0.5089953619270139,-1.1721227804518304,0.3234588819020731,-1.7565674761307777,-0.3987531452969088,0.4111214517912565,0.41341431375643783,0.5289429576755337,1.1186138612507412,0.5326869091611671,-0.7583715268410742,0.8497349082026631,0.07487071142531931,Class2 --3.2638207045471885,-3.1480724295199582,1.620800402014831,1.0266808109443033,-1.0704252697925813,-0.13853796032149213,-1.43731217138705,0.2784326612273455,0.8609118969442622,-1.5784469193195532,-1.1227979817009155,0.14856378495687544,-0.1693770089186728,0.5527089748065919,0.414685589261353,Class2 --1.1786253812411838,0.02111311244698222,-0.48477680305018206,-0.22204866341013177,-0.4880095270499392,-1.2430835566936402,1.563460310567388,0.6259720940680591,-0.8326523434111088,0.39818223629018806,0.1115994558883041,-0.7482004306094366,0.8852445776574314,0.6127658071927726,0.5063708105590194,Class2 -0.2187466705706259,1.7349526794317742,0.05378245742802711,-0.8975592921486192,-1.154841347609971,0.1266017190530091,-0.24747853077582294,-0.5920248787105712,-0.020847585919925252,0.3024940531602705,0.28954942211323775,-0.6628813011892867,-0.014057749416679144,0.8628047292586416,0.8901520371437073,Class2 -0.8601116231166668,-1.2340328168799621,0.5378649105201089,-0.28696887275450295,-2.056075226327574,-0.008325965672456205,-0.5139579426319463,-0.649992082802706,-0.43116019351334256,0.42072068083358527,-0.41472934538518,1.3449741072804475,0.5551049276255071,0.7018174084369093,0.7994339542929083,Class1 -0.11431917351636824,-0.07420478567038453,-0.4684403737907476,-1.6072152041348338,0.2723146122024616,0.16827760828871294,-0.0291307048307033,-0.038213678656891355,-1.1738821884761674,-0.3638167906621564,1.5824877454821666,0.11868058218432358,-0.3431469714269042,0.9945645746774971,0.26218489999882877,Class2 -0.5281698778516378,0.6962831675926792,0.5209088203663615,0.44172026370535955,-0.13154843618405518,0.020953418037300584,0.5714119819871215,-1.5678808014913046,-1.3734773285167359,-0.9672128003674316,-0.005893496325971415,0.6395671854574951,0.3154463949613273,0.712082261685282,0.9646636531688273,Class2 -1.6413936846356358,1.48021682744368,1.4570203404214603,-0.5260617347563215,0.12302390109201689,-1.0984196852980366,-0.743495183855465,-0.5717963871078072,0.3288294750783291,0.280638936280957,-1.256192225084495,-0.22994779564963747,0.05813137302175164,0.19467157684266567,0.8446184920612723,Class2 --0.5447833411445637,0.19311975522227148,0.16981449062016496,0.03475086885867601,0.7089877382329225,1.0999082923300283,1.2326397703049612,0.20036212014611582,-0.7274773211599717,-0.43762995657429277,0.9767339933222979,-0.08205822704594808,-0.40117967315018177,0.6850616547744721,0.5310787507332861,Class2 -1.8873835107755608,0.2743979213430089,-0.8885842703949062,2.2011389717889394,-1.255813869225217,1.0659187992511028,-0.5933889218297045,-0.24122317044085545,-0.025313128279490796,-0.292283507358233,-1.5345459655338283,-1.898632449909411,0.40450056781992316,0.9339124097023159,0.13234976748935878,Class1 -0.9746145436513018,0.3494125852284011,-0.3511345977583761,-0.15214019048317814,-0.7986846123955802,0.16599827065468536,-0.5290110626460998,-0.4191545011756019,0.6935692281885487,-1.191862145537904,-0.12563093836615002,-0.7530346017300951,-0.7861762545071542,0.8579256117809564,0.557872423203662,Class2 --0.861021842431474,-0.32530464246608815,-0.3046783316624689,0.17418473251468095,-0.5241928362808161,0.7584615618219103,-1.0617968588054758,0.3158577377032462,-1.5321425004989215,0.5006965347891865,-0.1350262217199157,0.4692087559425751,-0.13114455668255687,0.390286612091586,0.31291091395542026,Class2 -0.9696729044298258,0.2643586905840913,-0.017834416001327052,-0.05575587616135283,-0.6374848607566467,0.7223353154058106,-0.3675937059337954,0.004887780895202375,-1.5136534880761676,-0.22784432214238667,0.30131712802170846,-0.7820030104789326,-0.4256886993534863,0.5250641682650894,0.5940439731348306,Class2 -0.11287148493860164,0.5712267897541771,0.5297665895417885,-1.8351284185732784,0.2173075503695074,0.8786388443668107,-1.9950535793113144,1.1337729347203576,-0.030689271875230635,-0.18727246053185836,0.013821530151253791,-0.18467000092689456,0.28487085551023483,0.336646543815732,0.6072708254214376,Class2 --0.5476206717328627,-0.95405133909841,-0.7569600753041628,-0.08903126011617352,-1.2131055125644106,-0.7911860335116356,-1.2294445837207615,1.2053834763120055,1.361810265916855,-1.4456757679073466,-1.356812352186908,-0.17700140091629252,0.04401885299012065,0.32839197595603764,0.45277525018900633,Class2 -0.49642659645707626,1.2507823726934713,-0.8096071645440723,-0.7167967862607745,-1.7528744985276774,-1.745609090869823,0.9287476947031713,-0.4223520021024352,-1.3392076543484037,0.8554550841936207,0.6360743837367879,-0.9132156302904453,-0.713268730789423,0.1698981940280646,0.4283409286290407,Class2 -1.4927508107241723,1.4864897087341458,1.1846006046486899,-0.43363531390463406,-0.1685418376347608,-1.0395897524768618,-0.21604168547081853,-0.15899538099781632,-0.5847598324543448,0.4819339196767815,-1.0564886082451084,-0.9037156612802627,-0.41380851389840245,0.10794056230224669,0.32955118943937123,Class2 -0.7942323346092712,0.2340178283332896,-0.5399819314054349,0.06463846882318992,-1.3253439640307008,0.3355143645927172,-1.4455061088136807,0.45859812369021485,-1.9884795270444473,0.8977264497120292,-0.2826779103763885,-0.1368926602671914,-0.20925340615212917,0.7038779917638749,0.32530783652327955,Class1 --3.6828403665060025,-4.081205697704208,0.11021813257672369,-1.0160318645210795,-0.6533554789664516,0.4389632334502576,0.32324312832002716,-0.8094143558107906,-1.8455088933265642,-0.21029517522355712,1.010066534605342,-0.6094685145642352,0.25458393013104796,0.19862694898620248,0.31139333778992295,Class2 -0.07682319616552746,-0.8727478641276317,-0.23315722046173676,-0.5515182083965154,-0.13360809667290732,-0.3446864319741636,0.3940584441666848,-1.4813884232931416,1.4796013267951873,-0.6398806398795364,0.6579749038928335,-0.5572503568789797,-0.6958851348608732,0.16995435883291066,0.8047110962215811,Class2 --2.6641193634726537,-0.47398309619914647,0.9847928125654024,-0.157501522062883,0.08530144072614353,0.27061824160223774,-0.4859670717397313,0.8472706680491935,-0.02445114537154412,0.6658038855406975,-0.9989196298980836,0.7476738320838905,-0.0842022504657507,0.5284625147469342,0.6209400063380599,Class2 -1.3110713753648535,0.593924256566271,-0.8752996360431233,-1.792425978558658,0.6962144725731564,0.4000251828273844,-0.4399177819019982,-1.0722880191820696,0.40249006535003623,0.20971207925793403,-0.7826265990909486,1.272345841864909,0.25072653172537684,0.7380847840104252,0.6274045424070209,Class2 -1.3056927433510355,0.5289751705158134,-0.7176430563605561,-0.3125819375591701,-0.3194979232319178,-0.2995425887413458,-0.21868148675486032,0.3134827241336526,-0.4309360025320731,0.33551627331526196,-1.8078200688214094,0.5662098750264455,-0.7745352401398122,0.8914645293261856,0.8474558428861201,Class2 -1.111830161593043,-0.1384233488006245,-1.771441646721291,0.4987005419845995,1.972750997967282,0.9847840112237104,-1.933081467594997,-0.548094661117819,-0.25395781272539114,-1.243408995486446,-0.9325235414793535,0.43064863710542806,0.8281229860149324,0.32095532258972526,0.06378979003056884,Class2 --0.9917035537675828,-0.9609884156313663,0.3444830872776224,-0.7004863830039848,1.1485802615956004,-0.17395030191289573,0.30370134095922463,-0.38298039661936417,-0.9424949409121484,-0.20134978409682375,-0.3888307374531429,0.8300777511422203,-0.38040934782475233,0.03453934518620372,0.6401119267102331,Class2 -3.182706447993293,2.162310546532481,-1.0054789757113347,-0.2517332463575945,0.227279323491317,-1.4089549620340018,1.4274784468940682,0.19509777486123617,-0.4530128668380245,0.08478562283040617,0.40635088980612916,1.139267775724857,0.45542943617329,0.30624584876932204,0.4882084485143423,Class2 -1.3954122655919696,0.29074639301778404,1.8605606761441815,0.9515574837284451,0.7542218752495092,0.03582845477381127,-1.2656746722562229,-0.2521710982139121,-2.6049126358827546,-0.09746563125659731,-0.24234505216405497,0.9580808090636902,-0.20174934854730964,0.006931426003575325,0.4420960445422679,Class1 -0.6914891028880272,0.7071105187004996,0.7771032096521436,2.0775845744212407,1.7993249399711204,-0.8550470361644263,0.31558163293255936,0.7336073464152789,0.684950966824695,-0.9418913820697955,-0.5031443943568543,-1.8362514832681283,-0.39814715599641204,0.8340321849100292,0.5145560328382999,Class2 -0.1392659842559969,-0.3137719606525257,0.8805901248482815,-1.3697986034480123,-0.9353237094398718,1.7993057918302493,0.6686953103270827,-2.9447588077386997,0.9809142358365964,0.2939878522590026,-0.23005899418069983,1.837890614958914,0.8441960513591766,0.9932443210855126,0.9554810742847621,Class2 --0.5248320521551898,-0.9155407940036165,-2.3009424637045774,-1.4533529771280065,-0.24194471478735907,0.4476981078625467,-0.6888476420678981,-0.7155876586979416,1.1411251986383333,-0.9107237593236114,-1.0686828835604694,-0.25757637397417316,-0.8712079934775829,0.030600045574828982,0.9721860243007541,Class2 -1.7308864492225822,0.41046396673215624,0.5745638494252019,0.7085108260067176,1.0079519241647654,-0.052194428430755564,0.6792734898593433,1.406725904043891,-0.37618236854275705,-0.14729241649244673,-0.45460374628328887,-1.0992211188422873,0.1324711898341775,0.8172531414311379,0.5549985403195024,Class2 --2.6375336090717143,-4.0552175221519615,0.538339138757931,-0.14954015885514033,-2.0696275642391875,1.552061473525252,0.8377732139648255,-0.793017122450861,1.5933763800593224,-0.9687315887395517,-1.443473781786319,0.3069628795624414,-0.5802923832088709,0.00640715379267931,0.15843224292621017,Class2 -0.7728140132443015,1.4082267372705148,0.2937216779795855,0.1616608077412685,-0.9793892767287681,-0.8897620647302837,0.6169123184478759,1.245368196449334,2.5340351182841725,-0.40678599421330697,-0.5520596420018096,1.4854346748124538,-0.6620989190414548,0.8181965118274093,0.24956556013785303,Class2 -1.3742261260425006,-0.42397111426603645,-1.3778639179598973,-1.6924935117017383,-1.9604643280855156,-0.021825116141717983,-0.10075872627570627,-0.27097301004764657,0.7861762323675359,0.4224580641711046,0.024640812506806133,0.24524766749567464,0.48103365395218134,0.25264958874322474,0.5394300541374832,Class2 --2.4631329542706752,-1.3738288308852291,0.33824718295299055,0.54108398020013,0.35572442555795764,-1.724238859029873,-0.2609186856995656,0.9332453009092841,0.7345765020296569,-0.1650758403133886,-1.9215927763535814,-0.47585922951814774,0.4130952060222626,0.34671162511222064,0.4438750490080565,Class2 --0.08081776131392157,0.33819495065985095,-2.9640691265486017,1.5364837320386124,0.7234617307077568,1.1208752580823211,0.20465268832880626,-0.513148856371026,1.4874143376849986,0.17027364562970398,1.5299228203049544,-1.4437754308206068,0.5757579910568893,0.22240079659968615,0.9322066369932145,Class2 --0.7963808017563383,-1.3503073550385234,0.12554977342342033,-1.4675696645489127,-0.32943528428569635,-0.49832867737420294,1.484774596931624,-0.09346546694524711,-0.17318491572247446,-1.7332390729448457,-0.6491216725089398,-0.673794134875836,-0.18236477300524712,0.1980101023800671,0.6676354703959078,Class2 --1.6410708638682312,-0.8502303192208648,0.005749468361786727,-0.25491552429710884,-1.0850481111340167,0.8950319042825862,-1.4933097813239735,-1.019653214501748,-0.4069480982826681,-0.5130702962621817,0.48201907769518176,0.6069211678195923,-0.07499210396781564,0.6931779007427394,0.46650792681612074,Class2 -0.07963256893837309,-0.15080034369910167,-0.2841967920896972,-1.109891858294675,-0.34257825749125137,0.8558802752536977,0.05116615901925529,0.13797681193868003,1.375535070228366,-0.15491884877493065,-2.0766273147556276,-0.9044568735307337,-0.06849563959985971,0.6155588633846492,0.503169515170157,Class2 --0.37104961237066014,1.389683883228963,1.4515332641743726,-0.12468360513014638,-0.3866870499363029,-0.444440316580014,-0.6285015654038122,-0.21111296567640145,0.6068900308614177,-0.43065355985334103,-2.512362902689378,1.3892579909650755,-0.5922314673662186,0.6234736847691238,0.8739954782649875,Class2 --1.1812289295440954,1.254625489929957,-0.5648695862956387,-0.25665253815184075,0.1924348530698981,-0.36693922539673196,-0.7833271453279917,-0.09390304860789922,-0.7546982109776668,-1.3764914155653074,0.830576722623324,-0.87598420786791,0.26954948296770453,0.300281087635085,0.6162575706839561,Class2 --2.0714823236116904,-3.4508270624604642,-0.9613120971255306,-0.9605541817802881,0.8443880011915926,-0.2569784055951089,1.0316849141155073,2.209905085846426,-0.18769632168085434,-0.1729911342064302,-0.5713359883743925,0.19525021843406268,-0.9027789197862148,0.29606234352104366,0.13095285184681416,Class2 -0.656507355622651,0.8169907273377047,-1.260169545982297,-0.655378270634232,0.8123760271073169,-0.049810168436204005,1.5364996450563964,-0.7229697375391309,-0.03358704012333154,-0.38718835195857376,-1.1630457025145249,-0.01850025468069821,0.4438632414676249,0.49481944809667766,0.09103970020078123,Class2 -1.263340539871057,1.631419572456855,0.07918119876488627,0.2852964674042142,-2.1487738868061155,-0.3381142468670993,0.1876192742008376,-0.37422055534538384,1.3260007763542838,0.016281773826901245,0.6758610099238009,-0.42886972903078285,-0.7830706154927611,0.8455212861299515,0.5719267728272825,Class2 -2.337016676108927,0.5057311135805911,0.5532941287315396,-1.6866599637177855,-1.0740176189016826,0.06798068182277257,1.4138216256807898,-0.1250384222392461,-0.6822805815043663,-1.098677870219653,-0.12899774267928962,0.5819577332857625,-0.7211653059348464,0.4009049024898559,0.5005087044555694,Class2 -1.2376944761252564,0.0569332392218006,-0.15869289613602708,0.5200656504483244,1.1451328990166203,0.6611310466953674,-0.41631477615460105,-0.5995151515437145,0.08879946701059648,-0.46626917561114667,0.22755162790540184,0.4309683590792442,-0.8398655699566007,0.8736696974374354,0.7802563162986189,Class2 -0.6087968784080439,0.8276158728885193,-0.7494154910826559,0.3972019868121007,-2.5871283502791464,0.03894453530837025,1.999894590131308,1.5366948268204235,0.8457876153279417,2.291713387372756,-0.11254048088677875,0.33908440055175,0.2670504883863032,0.8395129621494561,0.13581194286234677,Class2 -1.8817674068164039,1.8432625295156748,-1.4007694434704465,-0.9971389005311802,0.42414290579538416,1.9949973916179327,-0.7235307405170139,0.672153255194795,0.8754142219583849,-0.8881631336822587,0.014796640752818987,-0.3107334908131562,-0.7192847258411348,0.30132352211512625,0.04405682208016515,Class2 --0.19069886204663353,0.018526243442416296,0.2503575984150056,-0.21493527173219368,0.9471504481088454,0.0469195137565146,0.6544097820950353,-0.7492035532910497,0.7372600581710891,-0.068993242950366,-1.178679956881329,-0.7933266832404365,-0.1497402717359364,0.36652818066067994,0.5913672980386764,Class2 -0.5177458832844535,0.1539674944774217,0.5583065761334411,0.7491578441556422,-1.2653363526870873,1.0162217030005951,-0.5591779873398857,-0.40091674098593605,0.5162264388740051,0.0015669614300332126,-1.3580689889890236,1.5105927852629082,0.954107029363513,0.050051950849592686,0.35817346652038395,Class2 --0.39886532550330717,-0.26517580026254733,0.5730471491346687,1.1150678047201146,1.3344732695258221,0.8093159558467707,-0.43093807109509175,1.2031129484121807,0.47185378309611,-0.5451769787333262,-0.06152001228536291,-1.3904993675121244,0.2971961507573724,0.9688085520174354,0.2008572337217629,Class2 --2.4046001685438707,-2.0887573435473943,0.5880161499333795,-0.10030336782861131,-0.20570454187681225,-2.9115119096120874,0.7188376536966219,0.36749416913179567,-0.3865169696215534,-1.74697353593361,1.5825108101265988,1.3096183254787643,0.10855448571965098,0.02150782640092075,0.6164339771494269,Class2 -0.09280899453677918,-0.35876715750596466,-0.964392458886734,2.2773682991893334,0.2547259313908289,0.4945769235551367,-0.47217756952082757,1.6694925079265661,-1.0708923086578073,-1.4123041566095567,-2.1568538471267287,0.5002433677042474,0.6408688630908728,0.13362820469774306,0.27958715334534645,Class1 --0.7926385128358495,-3.025260421002467,0.7880818012734286,-0.4227077188301489,-0.03125001733563533,0.560375165634998,-1.6768882876245954,1.3450242146846503,1.4662772390004675,0.9843841208461577,0.9453795737076747,0.23936661412478066,0.21104449918493629,0.8643631038721651,0.9854940308723599,Class2 -2.6885637690290087,1.6193417294044856,1.1076031259747685,-2.131239160861179,0.06976462825470163,1.7482385497332935,-0.02940617296137629,0.9102246829652918,0.9029103438377017,0.24750823277289008,0.6474595764576947,0.4622964877608385,0.949188684578985,0.2921101900283247,0.16697097150608897,Class2 --0.9012682502302372,-2.156201858560075,1.0513253890336534,0.13774966702393146,-0.6471402221242374,0.8254570790203273,-0.4276402093617594,-0.6734097441331023,1.249888802862389,1.6486033618049878,0.9837607313583947,0.20577218565060706,0.2832086803391576,0.18656127690337598,0.9378439846914262,Class2 -0.15800534987379167,-0.015410948710944117,-1.1305617492440942,1.4061776108395785,0.6585236501498021,0.3209582610300762,0.18404084573756224,-1.074492808858086,-0.933294880369377,0.5222967748805077,-0.9125217171533447,-0.46094370984285865,-0.5484891477972269,0.2699215847533196,0.21741839102469385,Class2 -0.42351295114922966,1.1258823957035005,-1.4067735207579646,-0.5032560676206113,-0.14366315467899118,-0.0484407884333096,-0.5216210779014083,2.0843627611049533,-0.660773348662412,2.3163179833802072,0.4538568869536061,0.2389157808375767,0.5906200665049255,0.29686630330979824,0.18275933433324099,Class2 -0.7536989993690673,1.0103119731209165,-0.6704279739112394,0.39207650602451827,1.614981687378699,1.0544300960345891,-0.3897112644186408,-0.5064849897798896,-0.8982446339027896,-1.2550377348416184,1.8944771389452761,0.028763209202817674,-0.3325612940825522,0.6753554791212082,0.9280145792290568,Class2 -1.2520161277198367,0.4956043541010752,-1.0047938075299405,-1.082529312967981,0.48423837744646814,-0.9146200481845138,0.019403613568321865,-0.6528747741527979,0.3833269835626276,2.391109557502229,0.7102570709901562,-0.48773712036933725,-0.03870067419484258,0.26001782109960914,0.18996924976818264,Class2 --0.23601581194500862,-0.20809283372290174,1.3711331468649888,-0.18939696036851628,-0.25052013241668913,0.8968122564764446,0.3278938523041719,1.4443126540848525,-0.33705929158363546,0.022013676948848898,0.0058350223324808775,1.0014650628986614,0.1190083185210824,0.9959770876448601,0.7410340590868145,Class2 -1.6810970716598637,2.1642724304218,-1.5027561845044572,1.7953408304342136,-0.7009949677448014,0.001656928621046591,-0.3472882161081329,-1.1351828747403547,1.7109768035981712,1.7518852993631902,1.2292324796072243,-1.6881819837544463,-0.5047996682114899,0.43063696869648993,0.11114481301046908,Class2 -1.9142897641920449,0.7488671516346925,-0.5526479207346017,1.9928818192142523,-0.10461744127585443,0.5519840235693657,0.32892870998792145,0.06242901325995528,-1.373729205944483,0.34981032590855343,1.5514117602062543,-0.12854327506317914,-0.8483067667111754,0.52333511877805,0.002823296468704939,Class2 --1.6412050192242291,-3.316864000399756,-0.3210361617583266,-0.7342384587162011,-0.5070315872343978,-1.8540870509736858,0.9914439067097242,0.02400361668321047,-1.2741917590031007,0.35517004275099995,-0.06566709505894178,1.3732868182641094,-0.09696847992017865,0.6802365980111063,0.06535550812259316,Class2 -0.4053565904339146,-1.3854321406756402,0.17962740873308125,0.4694222136856118,-0.12104520566508023,-0.6678497969392977,-0.17581355811181892,0.4316383587349879,-0.11099923947385879,-0.5113906143818648,-0.7237189547204678,0.8600685144131476,-0.6721640885807574,0.23033817345276475,0.7987530359532684,Class2 -0.8921015439050105,0.2909801139900553,-0.46102292192123234,2.9272330362170114,0.8283821154887892,-0.151507115919653,-0.25004366481197116,-0.4355823276740803,0.5167870745636639,0.37722573591897646,0.25482929548868294,0.15101766998982616,0.15289853792637587,0.5920814611017704,0.23193099489435554,Class2 -0.6075428986255004,0.5107529121104307,-0.9194868472436294,1.0958882843460411,-0.6836462089746775,-0.02278180815139731,-0.31466628031000055,1.7869618636232611,1.3019740495609686,-0.010856356899418227,-1.950257691142064,-1.1412054384355943,-0.8863643053919077,0.9559543738141656,0.08757535321637988,Class2 --0.4838130582099909,1.0113184787356013,0.312240355549253,0.3736369615101507,0.14787491302506714,0.9076768578889537,1.0175794479851046,0.5467793883346644,2.211389908921283,-0.4598771826041022,-0.04679491791957949,-0.028726150234245032,0.36172299925237894,0.35339251067489386,0.6290607361588627,Class2 --0.6349993977870302,1.3110681880980812,0.739972128954355,-0.39873644911411815,-0.6751617146433618,1.2949312899411554,-0.5715247245082853,-0.6186073779735575,-1.0675513213700825,0.9419424829020153,1.0502492452076233,-0.4717388000340445,-0.19749334687367082,0.9235629837494344,0.6476614931598306,Class2 --0.8690723669249834,0.5329937811527361,0.8264742848487506,-0.4476095941607286,0.42286411685968706,1.577437841782337,-0.8556545863359869,0.6770923245737799,-1.1645412910851112,-1.0062260414341426,-0.2106738374125139,1.427804139817923,0.24938335362821817,0.5794733900111169,0.4245775693561882,Class2 --0.22830429631166865,0.7045950333041797,-0.15206843762906264,-0.011377122343740073,-1.1615931567076105,3.2132462545718146,-0.821184572856781,-0.8684976463078738,-0.023092794055703945,-1.13539659951782,-0.18665695358309495,-0.38274656641193416,-0.04964760085567832,0.5887488042935729,0.7318046863656491,Class2 -2.8352870249284825,2.5473418591566697,0.469967669817349,0.2150890031363766,-0.5856345145433811,-0.5718154896056701,0.9877753666346248,-0.8764054291766005,-0.4359930679919338,1.5307133065966603,-0.44988145635188875,0.31277586612336233,-0.09624137310311198,0.33745576557703316,0.818581075174734,Class2 --1.8534635663891583,-0.3140305557832167,-2.1996780860561516,-0.31687803363801104,0.5222151166965188,-1.853261675706178,-1.5120217695167475,-1.0197952714957002,-0.22027121463823285,0.6145113572474276,0.2278080923728507,-0.248799602452706,0.7719148281030357,0.8684071835596114,0.3588533077854663,Class2 -0.748647313095298,0.07454479436816075,1.3081950064879477,0.8679091547037644,-1.160342324825267,0.33021146031469817,-1.1331641863442936,0.2987788704348238,-0.5538955518915395,1.0089181156041263,0.21091096826661793,-1.1886967094169123,-0.9072959776967764,0.38581775804050267,0.157022709492594,Class1 --1.1295019382638831,0.01946394295396181,0.3901472777479214,-0.08782228370872408,-1.090990719738382,-0.17716801636142432,-0.5487762594509505,-0.934732414208208,0.5606558509068432,1.648568423457673,1.1865782175836894,1.3138542005037048,-0.6872288892045617,0.5313552825246006,0.3534669193904847,Class2 -1.5746567353191947,3.5523548761224344,0.7438010832264619,-0.29234920686642724,1.059790303155849,0.3934350966196134,0.981506841321401,0.12361547224926503,0.2743534622881129,-0.15975309485734804,0.6072638561280008,0.8041201999673068,0.6755405613221228,0.05801983526907861,0.23518803855404258,Class2 -1.6559067412218045,0.9799805940106847,-1.3747116997604996,-0.644395805727907,1.162440010087095,0.41757196380511746,1.627176164781762,0.20604879356942454,-0.193422512884472,-2.214484215166571,1.004532500122834,-0.7316909237855156,-0.2059733122587204,0.19616674375720322,0.16241561784408987,Class2 --0.8970046207973165,-0.09953358954726532,-2.2388395109728174,-0.11942023146823305,0.07176049516858563,1.6956824705795683,0.5559488723477429,0.876329444794962,-0.16245737023914802,1.1303581918950822,-0.1873684764434084,-0.5695008197844316,0.32796058943495154,0.9449646940920502,0.42433038889430463,Class2 -0.6352558869654313,0.8437378060720703,0.07168836404004636,2.2107054811829543,1.3908810559554772,-0.13189015977441207,0.699184673229242,-1.8162197560753883,-0.6976235180452325,0.22718900232826794,-0.7399719678455782,-0.23475844427460762,0.46314458129927516,0.37715866486541927,0.8350699117872864,Class2 -1.1182752195538652,-2.121347483703456,0.06757634355658834,0.8688766647433944,0.13314593730081528,-0.6973501601861117,-1.2789114219056081,-0.5566211677386601,0.41839139747268705,-0.5568705930413094,1.2880200991398767,0.2932947357439187,0.7092564953491092,0.8778232210315764,0.8997559393756092,Class1 --1.0303957620622657,0.927701152190159,-0.6644372507172625,0.3500246242158078,1.396948026997812,-0.8870636418162201,0.6285732159146431,0.7483588538042092,-0.8679534571328639,-0.9657145152314583,0.1094488093597174,1.6140130648729016,0.45949270110577345,0.6517025935463607,0.811835803790018,Class2 --1.3460447496736083,-1.811233505607271,0.8528231745219151,-0.12283685604960766,-1.0220600249395593,-1.568124670728662,0.5088717462631281,0.17130484496673357,0.45292574070400804,-0.9064689354080422,1.2320633711038655,-1.1167527449148635,0.8034290308132768,0.04005892132408917,0.18342556268908083,Class2 --1.3846900403576616,-1.620898829785021,0.33364100186788176,-0.43333428783287437,-0.40876222214198177,-0.43875216957689755,-2.667295578423501,0.6043852045882364,1.949775011866651,0.6260245031163666,-1.3043128051571233,0.36789825488486827,0.6603078469634056,0.9379698915872723,0.6312207872979343,Class2 -0.7247960903626267,0.8029619304888989,-1.0916298717472328,0.44597161477054603,-0.9194241383854361,-0.7275218181061993,0.9359588823491909,-0.007844895925239743,0.7373738622534984,0.09453970521925893,0.5334328629304128,-1.1542702516334236,-0.05785097228363156,0.660475121345371,0.4913327519316226,Class2 --0.8549457410975517,1.0563895848010945,-0.5847265638209804,0.5979569530877165,-0.516465526631346,0.7543527546327462,-0.37713614514160243,-0.2843583263971471,2.097584739213388,0.3816297460875168,1.0489323776403026,-0.10425137942330998,-0.12481596926227212,0.5431708483956754,0.5663511597085744,Class2 --2.2965412522883675,-1.1594571647352883,0.7346950570080588,0.4177578097645925,0.8895954501716075,0.3164706829805692,0.5761047274682086,1.2556137547815216,-0.9275394158687772,-0.3552701594650642,-1.0787113961361106,1.4291634028154365,-0.962482066359371,0.8887241217307746,0.889185382751748,Class2 --0.16091680582676987,-0.5446690255416284,-1.3499908835534407,-0.6172693972013034,0.06665848413237307,0.6107478936861708,-0.6944892298970832,0.2034518369567916,-2.723164696887833,1.2903461484089713,-1.4530036810095477,0.8135690397336464,0.5153175536543131,0.2288634346332401,0.513314226642251,Class2 --0.20146998121239648,0.15399950433536883,-0.04270759789799458,-0.3450611724314187,-0.14323121892387886,-1.6512644381213024,-1.2594092919824962,-0.16699818674911515,-0.011376403160230364,-1.3173365026991852,1.3991536581649426,2.3195180245137625,0.6677997102960944,0.60780467139557,0.7133072335273027,Class2 -2.4048535020235144,3.251778520858587,1.158031242308652,0.3530367062041566,-2.303907910783484,1.955680101937453,-1.3984123656542202,0.8013068535301638,0.6172225322981212,0.6681440613352551,0.44791789014193234,1.160571993917749,0.4275852534919977,0.18642578064464033,0.4264582709874958,Class2 -0.7778175911764116,-0.39228913828247586,0.028376971799354094,-1.3518201140315027,2.2002515680952546,-1.2493390253488257,0.14971257936825638,0.5470033380957159,-1.5980562282153234,-1.3042606997941761,1.1915115283392375,1.4182820491300683,0.9374375529587269,0.6126882433891296,0.06697874865494668,Class2 --1.8092594985403485,-1.649044377712579,1.4455303468419234,0.6300158048947996,-0.6343013989744869,-0.784230755038377,-0.711239967247424,-2.0178311202347756,-1.0159914635792815,-0.04694833299969712,-1.422067022154617,0.7036390329444511,0.8596606790088117,0.24261099938303232,0.39063570625148714,Class2 -1.554421137213211,-0.41730510661666875,-0.9074903250231824,-1.092315759656998,-1.394140541777779,0.31707331765718977,1.7548664796124813,1.0979040314424708,0.3364615096693232,0.7907595384100312,1.9507009715734194,0.4076964317608004,0.8268131306394935,0.6182246748358011,0.2863880598451942,Class2 -0.3917248003600992,1.213077648669268,-0.11096355187227043,0.3007042328140398,0.619231852200191,1.601099936807046,2.1521626543076837,-0.9209562197290424,-2.1856511950882256,-0.19136173924921013,-0.774909887049508,-0.07450970934833998,-0.6556640085764229,0.6292313714511693,0.8275251132436097,Class2 --2.0089049096174483,-1.9515245296990604,-1.0301225533698442,-1.0131264210167585,0.2449524680196513,0.5859122758414343,-0.612951001981529,0.19604102030691095,-0.41788407259312044,0.3226490765442641,0.08506031459115972,0.4781244787334314,0.715187843888998,0.38261727266944945,0.04912248719483614,Class2 -0.8396569993614172,-0.3693974209328553,0.82145375631857,0.7913921572802787,0.6568059814492679,0.36318990050660455,0.13938204389850908,-0.33542427435961747,0.2941044118257888,-1.0374839424375786,0.6946382876419173,1.148640658872759,0.14040942816063762,0.9174459914211184,0.7816308396868408,Class2 --0.2315836061458838,-1.2934564553771681,-0.41074018785648586,-0.3479825318892297,0.2699137975313109,0.5690656060031841,0.0397580538780077,-0.21344280273112146,1.3503833467729063,-0.28950195079274116,0.8663065437765657,-1.021847485952284,0.1552910627797246,0.8071749792434275,0.859962907852605,Class2 --1.5864802541919103,-1.6617403549133052,-0.12667474428938436,1.120219904073812,-1.1654173987635978,-0.09250363435686805,-0.39146353153411273,-0.49729844188504535,0.024652686993662846,-1.6162483060124155,-1.3288606987265001,-1.5849697434560783,0.6077428539283574,0.15317996661178768,0.2580072171986103,Class2 -1.5585895243368784,-0.11042579695787025,-0.5231105728712909,-0.39353623778814256,0.16754474564517396,2.01210237511536,1.4151948012144369,0.29995387760570835,-1.7111658626905173,0.8038131675486451,0.6423599192734388,-0.6598714720662218,0.15734145091846585,0.21842358424328268,0.19388239760883152,Class2 --1.1540713100457975,2.0743349457661027,-0.4151635545139902,0.08229882040895878,-0.6509209123312009,1.4318691631306217,1.3597387129974934,0.8933117858330372,-1.7409228569848707,-0.6052365827588251,1.1653638411297305,-0.01913309523103928,-0.8032822958193719,0.028678655857220292,0.8761371138971299,Class2 --0.8695202148560124,-1.103369662526191,-1.2012513388607753,0.5714217912882464,-0.17283741039622796,0.899189926392476,0.5709102215016719,-0.7632416271396164,0.6887895755802297,-0.4011483605079623,-0.6896552912701783,-0.45187402385549486,-0.5366570390760899,0.8467430314049125,0.7275490451138467,Class2 --0.7761981572492329,-0.940163597399448,0.40880216375220035,-0.02896220480474618,0.964645694248629,-1.349005629719927,0.9895367823335545,-0.6586697191408368,-1.0444870282771326,0.28553782663370325,0.4620299495207922,-1.8041664001017395,0.5494848843663931,0.29801127943210304,0.04279291722923517,Class2 -2.6879101643622505,2.744478611279119,1.5149116948171903,1.1949022812190684,2.1226891113082096,0.9875002990955682,-1.0867559536824873,0.6507337997205533,0.12376318616357554,-1.1451888809477984,-1.8346175689128623,-0.2890863282524537,0.07012739684432745,0.8169494138564914,0.4933202520478517,Class2 -0.35949317727038754,0.371703620729646,-1.3927023377400092,0.6072688511727952,-0.011617693863953865,-0.6809195390628522,-1.477804427824378,-0.9447578883414129,-1.0039541334686615,0.024127144629870118,-0.5308867828868072,-0.04207825222094347,-0.4091600766405463,0.9480548102874309,0.7481180615723133,Class2 -0.3320320482970732,-0.25927851268056523,-1.1927640560372792,0.17636972792898764,-0.12852541514836446,1.1626286798245082,0.0796413144949455,-1.6681589304310573,-0.5147739463517671,-0.9495641194824612,0.36760575907807613,-1.3991471347490083,0.9243762134574354,0.9765972094610333,0.10575069836340845,Class2 -0.025576823476884808,-0.32035569483946313,-0.12162733076905728,0.06282828527514926,1.3984443824073358,0.5606238110543594,-1.1882291191893646,0.6605683100699693,0.9530926777563634,-0.37923515130267743,-0.4837182735522048,-0.6944105811391316,-0.3489302909001708,0.7273669925052673,0.8110261743422598,Class2 -1.192040080473597,2.772142825266952,-0.8629547699115359,-1.3382497584843602,1.487235775958124,-0.49228679694513644,0.5836373954909764,0.5839205969848628,0.01662970298498939,-0.12347615746142906,0.982036803561518,1.6534843219763415,0.856487323064357,0.7983491092454642,0.41275184345431626,Class2 -2.5437003366175652,1.2898069136934844,-0.9910537488569328,-0.10946400729605639,1.0602260025926078,0.31299545645648763,0.24412589366204135,0.7879354137036265,-0.10216633905502091,-2.007530512292921,-0.42478609720612337,-0.7359583678281114,-0.8214289098978043,0.9525210217107087,0.8945211449172348,Class2 --1.3902566529945388,-0.769508113696736,0.16676512806218707,-0.4427955016999922,1.3750877982735266,-0.34543941559886604,0.8309996871959091,0.6122647662665525,-0.7913344331878721,-0.2354048598961347,1.0884503238927707,-1.7123544199563152,-0.2468665442429483,0.7468659896403551,0.5709276981651783,Class2 -0.2450153168890038,-0.8102118275044133,0.41602115815926066,1.6446142833734825,0.4234175566657324,0.3613957317608272,-1.4789087246713502,-0.8523277620618275,0.16410456040388618,1.5363870389180354,-0.011170802520680296,-0.9735617144337008,0.43884660210460424,0.3352193906903267,0.09831650764681399,Class1 --1.0453879386839162,-2.637998346164069,-1.4178092779415705,0.3087354459106025,1.131966739277627,1.3942283302960203,-0.7829433359728821,-0.05001627221126333,-0.29374889055958453,-0.5738592981592194,0.8927456893967427,0.009111344703322538,0.4557741959579289,0.9632852899376303,0.20835317997261882,Class2 -1.3949072083628673,0.3380039018740215,-0.049019583615729605,-0.5943243252641731,-0.091079445544623,-2.0295973452184444,0.27278238558240075,0.6273042870329135,0.0067242279385902794,0.054401191082964775,0.7481568736294845,1.2056895706466253,-0.23091218480840325,0.4147904431447387,0.7162821351084858,Class2 -1.3999064796322322,2.2122967930957533,-1.6762255727608693,-2.553491833806537,-0.3126915739380466,0.17283206679420418,-0.351186455122217,-1.3918383449611322,0.923765089282934,1.0799405944186906,0.0598271128554821,-0.524676935747674,0.062145906034857035,0.26845811982639134,0.4583155179861933,Class2 --0.865276373700298,1.7885321581072025,1.5688005744691955,-1.3606682237023842,-0.2183068015339459,0.1464519880826539,1.3503464252027308,0.3108674389876054,0.38821266229780144,-2.7743287811587,0.28768145869079825,1.315090209588577,-0.04520681081339717,0.8541049160994589,0.6460430547595024,Class2 -1.0243544989596087,-1.687179255734453,-0.7318564070074984,0.4509160146972881,-1.3229000008550604,1.5680356902446044,-0.24648294650398384,-0.667329900475988,-0.9476104920411551,-1.8700928436453523,1.1964517468844225,-0.7525800918784489,0.1635787091217935,0.06640757131390274,0.36018246738240123,Class1 --1.1883028959701498,0.2918345772325851,-0.37531871570919506,2.3932427270761836,1.4986267747833324,0.23738706515180097,2.9550376302091057,0.002377159944863675,-0.7438594695397664,0.5761896109440032,-0.230346556832681,0.9239013531988046,0.5259817135520279,0.6045331470668316,0.19750171271152794,Class2 --1.9817197398822575,-1.150398349475491,0.9966494226521254,-0.26264388966670266,-0.02641008747128448,0.6191167237933619,0.34230025245141543,-0.8384185774828818,0.8361731382179907,-1.858080311162346,-1.6108794699528353,0.5784061835091123,0.2029005791991949,0.07259646360762417,0.8182247839868069,Class2 -1.0984313130448191,0.08286597994732314,1.3582970108034982,-2.205266786906712,0.6416200608989728,1.5606868468945367,1.098936938225629,-1.263943362293947,0.9314513867277849,-1.5342374172410984,-1.481759583347527,1.0767167194140512,0.28381067514419556,0.3359648089390248,0.7207330132368952,Class2 --1.4123556169653884,-2.4117301547597063,0.9748794047023843,-0.5373865969014392,-0.7009501016539902,-0.5104578742140506,0.8504227003650217,0.7005805607489202,0.8038417114568471,-0.14160601761094693,0.5493712646349476,0.2684422700525027,0.5861576162278652,0.6173704625107348,0.17129120836034417,Class2 --1.9655880786859956,-2.832163748013345,-0.10895379403877686,-0.26697935390671046,-1.9974873539705726,-0.8858123848652001,0.41497991365598685,0.29655105412439103,0.1029035745871673,1.2134876182801286,2.29709000964818,0.975706634749223,0.3263358175754547,0.5801122481934726,0.9587604459375143,Class2 -2.21090259510963,1.3204723225174377,-1.0455670760493336,2.316725329384756,0.2615722359167123,-0.5480159164562981,0.5620850074165582,1.2757704335018067,-0.3893888910392069,-0.49634022207134504,0.36802423875244433,0.2602142788784289,0.4301396803930402,0.6152890031225979,0.43008007341995835,Class2 --2.137995337943183,-1.566298626960169,0.17630954143466127,-1.5140825661942907,0.038745731008086294,-1.5713174931847478,-0.0783842795039024,-0.2260034358560512,1.1478054452172521,-0.7292351884459288,0.6012876231808519,-0.0217092891558504,0.614133914001286,0.4108977192081511,0.07745363377034664,Class2 --0.8935371445426773,1.3098084538223789,-0.7859785186940577,-0.18714267040672228,-1.1288675019422734,0.24433797766441748,-0.19291118961933482,0.8002655667351128,0.6557679944282367,0.2989123691135677,0.18684788089748705,-0.08930501909617296,-0.1706452639773488,0.8879539172630757,0.7360347474459559,Class2 --2.7317739890885506,-0.7234513233313735,-1.373168378883857,0.32724358938473747,1.27963145273649,-0.9821037830134234,-0.3565520791649834,-0.18180162833692295,-1.7096376056257754,-0.05699199042755389,0.5812588092133819,0.7748857558535458,0.5651366827078164,0.6716576814651489,0.893402919638902,Class2 --1.477485109185082,-2.9495148499491775,1.9731197836734202,1.1083714389210737,-0.6770344304865187,-0.7608274887938483,1.234361927510322,0.005143064570485763,-0.41972889462812557,0.14327962927098223,0.9796538645855576,0.8795768503041759,0.6302349562756717,0.7964931216556579,0.4667105539701879,Class2 -0.5923452749987379,2.090648691362309,-0.7946981325153271,0.8589122545828646,1.9440489152832647,-1.609794892714414,-0.9497394560139842,0.10161162118589086,-0.9928914686934596,0.6996625136975528,-0.8752735006582925,-0.47464728653553223,0.5514814732596278,0.3333529266528785,0.6728846412152052,Class2 -1.6864695974115977,0.9876870154967797,0.9385314250457127,2.0149860128455366,1.2818234458675977,-0.8580895719019054,-1.3468782105540857,-0.4754872755234829,-1.075133717541914,-1.019257491405323,0.26711597154414424,-0.8423914057155533,-0.4344127750955522,0.5294470731168985,0.09808070841245353,Class2 --0.587625902485486,-1.0874875439107894,0.20505674763866222,-0.07176124318862939,-1.010961989292457,0.5614249112384911,-0.025448269585872568,-0.21923803897008756,-0.5392326923189683,1.1728301621959671,-1.040098555398263,-1.3068025187763257,0.5799610270187259,0.5679422745015472,0.10085243987850845,Class2 --1.3392763284836557,-1.6136512641325655,1.9602533984670911,-0.38689955969344264,-1.5587065059944505,0.1010050043781002,1.214208482933972,0.22867065617203003,0.3597528856862181,-0.11248139464323909,-0.5681006995711448,0.6587801409432518,0.5752283725887537,0.24697771668434143,0.06743551115505397,Class2 --2.5604629055197914,-0.07641722854579447,-0.39472236888972856,0.3603304257221201,-1.4151797222550977,0.46591772644880003,-0.2616988683299151,0.5615773379715289,-0.16436987336659006,-0.5723902587809832,1.5287350517098175,-1.8841071702299885,-0.6112838718108833,0.7299374360591173,0.5974683000240475,Class2 --0.8589581054093702,-1.3565443034696625,0.40585118685139715,-0.2595966838413933,0.007730689544250654,1.1733101910795127,-0.7924245604789519,0.1376276333168553,0.11387637421384043,0.37979479795268595,1.3789202914615049,0.22185264808368366,-0.4065482751466334,0.6392745799385011,0.9531389435287565,Class2 --1.608460017774825,-0.35656303836344416,-1.2555132123857389,-0.03292728920508218,-0.955638748128689,-1.627273738968264,0.7582558719672047,2.060353932291655,-0.53756371071683,-0.45650134799887,-0.5609455016024952,-2.4279437568259152,-0.31611574441194534,0.8930078924167901,0.8994244707282633,Class2 -3.5653776226343776,1.820204578016158,-1.594247743828618,-0.14208024834238722,-0.37260801733221577,-1.3001376004040746,0.5676916165624485,-1.1020498116759623,1.3072155905709284,1.1577530627872628,-0.0019962146031267704,1.0632501154543872,0.40821895375847816,0.10995695320889354,0.5501099193934351,Class2 --0.12220552717918212,0.4378265354582801,-2.3940817656368476,0.25230686381568385,1.0399804933554353,-1.65408676236292,-1.3691922296219956,-0.9469027775108803,-1.5257472907561118,-0.24292009774055692,-0.5468686929055171,-1.4704983400539104,0.9355125273577869,0.9733368633314967,0.718461767071858,Class2 -1.7415876462140676,-0.6249245999379629,0.42180738034833215,-0.4185688704714866,-0.6779015648938288,-0.7551021522076432,0.3646635796840256,0.3352457441913048,0.10296782579941081,-1.1421364199484687,0.2949697700957835,-0.14722465002542678,-0.6659081866964698,0.9720447501167655,0.8505117387976497,Class1 -0.8346494947178891,-1.101686982813581,0.12383870014515823,0.6822479900908432,-1.473510214409395,1.8009744290097973,1.2063425945922874,-1.2255410931575819,-0.7621212232652484,-0.4918615063628225,0.48878777957992414,0.147226297914237,0.14975293772295117,0.24943712400272489,0.005198678467422724,Class1 -0.7849490081691548,-0.3522973769670894,1.1289879169546502,1.2489845253931113,-0.24311597502072405,0.6543828701336574,0.34285455502227846,2.1770335324150203,-1.3676995763921835,-0.2340920029671616,-1.1022864589344143,-2.063360647367374,-0.5055932295508683,0.42176665901206434,0.47396433004178107,Class1 -2.4258929606694015,0.6050092376215371,-1.2719854466940141,0.587594988448185,-0.7978038673411445,0.18816556725991496,-1.2434645543686387,0.674442028713432,0.1800380192029767,1.1834914356320836,-0.8104216029770573,-1.0447193197320888,-0.4788832115009427,0.11638877866789699,0.019394495291635394,Class1 --0.5587509595242087,-0.943674546995103,1.3263105937984743,0.701708214426925,-1.2576047596490785,0.9721093596667785,-0.5992141045159114,-0.06838340403741909,-0.33043578209341495,-1.3356273151648876,-0.07156234002946443,-0.22436435017532827,0.24411763763055205,0.7504946028348058,0.324129929067567,Class2 --0.09766165828283255,0.3905922875055472,1.8842271638294925,-0.9700029473225018,1.4601374610586462,-0.22538382634115156,-2.328666397948074,0.5722907051657076,0.34205090749403133,-1.2682314777254422,-0.5671857744489518,-0.31750246280213357,-0.579933600500226,0.2611312966328114,0.23280747863464057,Class2 -0.9107584487731899,-0.07040756253737385,0.15210705141920988,-0.9404420849038034,0.2849408474012124,-1.2901059217011805,-0.5548439931436511,-0.9041648032831721,0.5471788169378854,-1.1901266103925687,0.36037211165979993,-0.13252306209504036,0.23243220569565892,0.7623316345270723,0.7976760256569833,Class2 -0.339646231631197,-0.11050318196477733,-1.671146962979635,-0.34129922686517833,-0.011738128334389494,-1.236661757626716,-0.899839219996117,-0.8014819916875184,0.24007571259714483,1.259039044975479,-0.02244198669676046,0.7569446338679334,0.8868069294840097,0.5482081423979253,0.3455234591383487,Class2 -2.2000795531959927,1.3481142775717248,0.726554917691042,0.7952303165971756,-0.7003897647191099,1.107840296726375,-0.2992228733725303,1.087068674503057,0.010423615526504266,1.6338310626292973,-0.05861448147657484,0.35312505668167893,0.7304260046221316,0.32466286630369723,0.49368396028876305,Class2 --0.2077730526905308,-2.240824406374932,-0.08037091451190108,0.3374206854726753,0.20890378901923431,-0.5545855895573448,1.2792475147035756,-0.21673912526696248,-1.2728223440824227,1.8776283594168954,3.0123016436301704,1.2702936442399801,-0.30124887404963374,0.9973781311418861,0.19667940656654537,Class2 -0.872093694127619,1.2138003650996598,-0.9819080261772066,-0.10679004138310216,0.9922670135785934,0.3473219777540605,-1.3276571884051385,1.328486220804491,-1.3131225280124594,-0.4923670227822465,0.5501895628902538,-1.6410773857823557,0.9026606208644807,0.5089442506432533,0.2722451747395098,Class2 --0.1757840664441146,0.4245957422496085,-0.6515007039951912,0.16671936511917562,-0.5595409928950085,3.1394129749094617,0.6630170389305795,-1.488255991507829,1.9037289667924244,0.7059823317705682,-1.2816516409102674,-0.14335152305569956,-0.3849441488273442,0.9812780455686152,0.6563695436343551,Class2 --0.8760050875732982,-1.175155796180126,-0.17872053504726385,1.3014864293742539,0.05000778273553046,-1.1568504729933025,0.7854520436881429,-0.5730194746584687,0.8231027341399657,0.706261432865376,1.326268565993374,-0.11348781607648757,0.5830254228785634,0.20405595540069044,0.8403176255524158,Class2 --1.3973995702648392,0.1620388216623334,0.6037257402065512,-0.4341079079348861,-0.14946182953092976,1.7107619775135754,-0.06618598884855596,-1.1691173973370488,1.6742907287468907,-1.3876484587330062,1.509657886752076,-0.40734213935371655,-0.6981252008117735,0.2533850360196084,0.2688416705932468,Class2 --1.2640884056576098,-2.5395409078861,0.9604700097074614,-1.1729785634261394,0.8600452260557025,-0.17446531562436376,-0.3895654818904379,0.4729869662718073,0.005365911081033632,-0.13368431780093218,0.20900071993007213,1.4624790715497844,-0.1377860181964934,0.6879804551135749,0.13842050568200648,Class2 --0.5659979007022463,0.5646024407638174,0.028822858173362324,1.2409947489615067,1.354626908524185,0.5553426526892414,0.11348960714564148,-0.49134646908815693,-1.484574388027539,1.4090645887811244,0.5699727900411607,1.2244393323311478,-0.28641730872914195,0.9990758905187249,0.24881602893583477,Class2 --0.647017683638041,-1.546638217781388,0.47907616708790857,0.02767674834693021,1.281913177600906,0.08778199641557097,-0.8802815254606905,0.05330778131188441,1.2045132886281298,1.8123395772992719,-1.2386029185420961,-0.3306707256450434,0.6069220625795424,0.6311355284415185,0.6260810941457748,Class2 -2.037460306445367,-1.5230356929503819,0.6929559397458841,-1.0021943838897078,0.9022559667888802,-1.514189091636655,-0.5453444846060094,-1.3397582098776233,-2.4452794084592475,-1.8097622004115448,-0.07983739143184751,-0.13766829038103862,-0.7036274112761021,0.8043761248700321,0.3193011339753866,Class2 -1.7533825464995416,1.257138022650586,2.572453339401283,1.4558687190893536,1.6058017361525054,0.8326798938490009,-0.5953325676826557,-0.35861224976072453,0.9090876867898998,-0.06271093236276806,-0.5835269436261912,-1.3689921125593814,0.28549160342663527,0.7636466992553324,0.627256136154756,Class2 --0.5847073636058115,-0.74719401036371,-0.0010398457863809295,0.37889643067271433,0.004798198109573815,-1.1612730501319664,0.5209862663678406,-1.4589991033922685,-2.079934898437066,-1.7105063393109428,0.5573037785330814,0.2627706588659448,0.9507505232468247,0.6058651038911194,0.6278010974638164,Class2 -1.3380638390208457,0.36680040366948663,-0.8827343007271192,-0.40106161360988335,0.42124051947509517,-1.3658128776709664,0.6033717000239237,-1.8301410567577105,-0.3629053664330582,-0.0016725996865857135,-1.0990005717592506,1.22947691139955,-0.9422790431417525,0.8035698144230992,0.28413370507769287,Class2 --0.26282379462090244,0.364905737529194,0.6130744257938245,0.28234949252388436,0.027050466140827133,1.256732333494184,-0.7712184274105612,-0.09217626941909625,0.07846521229949786,0.3544675397115994,0.32896257793167166,0.07672665159920387,-0.1974603245034814,0.9199068185407668,0.8382983293849975,Class2 -0.6891009590030248,1.3324428326752678,0.736736885772662,0.6200432640401806,-0.0868165537540469,0.30202426538055777,0.12494357620980998,-1.5402115736652868,0.5199484902496063,-1.3034430784202826,0.8362654210240413,0.7374384168470683,-0.019173264503479004,0.5825062624644488,0.7801947896368802,Class2 -0.895694035199619,-0.10950068625526349,-0.5631042415452848,2.6494996407516105,1.4884855781180293,0.3048580404604656,-0.5576968028699779,-0.3090899142677457,-1.1417764639355814,-0.34974584204471937,-0.6920222826753869,-0.3551728678531899,-0.5217922595329583,0.5325704475399107,0.11128781572915614,Class1 -1.2293738125365012,1.2724357341429917,0.5940250446461138,0.3681773131689287,-0.0414174318989359,-0.290086660698004,1.4390553417090988,-1.3547716223257902,2.2569761788565152,-2.157602728106589,1.9083912174291067,-0.615454480447218,-0.5291442573070526,0.5149147196207196,0.9695048546418548,Class2 -0.20734180403218647,0.6307166314184365,-0.8679402992695503,0.5167497830811788,0.06200560540452833,1.297907427044487,0.48508046956280243,1.1398263283245902,1.1876421359346727,-1.5521640862028847,-1.5629409644757357,-0.1454792148486231,-0.5576823716983199,0.7780152203049511,0.4214384809602052,Class2 -0.4467137531004894,-0.7280971275983201,-0.4018314656787179,0.1237407708419305,-0.2903055086214906,0.566706059692305,0.3279551757389326,1.6312042779258282,0.2500814760064649,-0.4776858182983963,-0.33055843636624865,-1.0018885689828099,-0.13169807847589254,0.8615811548661441,0.3773557327222079,Class2 --1.6860475100797596,-0.22171851610775217,-1.0509324430397269,-0.5445567049997748,-0.23149064648188508,1.246701028599002,-0.4542277509805905,-0.7145393136039537,-0.2347559752129842,1.5221038139579102,0.7280582223254012,0.03433466069956946,-0.24912205059081316,0.553731806576252,0.05915202549658716,Class2 --2.450953300484371,-2.4168195346116397,0.23636554480028346,0.5864532308505155,-1.913861821576022,-0.12767218958475435,0.9672135222511407,-0.6508635924968583,1.433950920991749,0.5162175036071464,0.3896548810653401,2.3310538915325774,0.1751962387934327,0.5770495014730841,0.7044422759208828,Class2 -0.454824101006559,0.3384935856960132,0.5749881923976827,0.08371189642029464,-2.0022541927165243,-0.39380867066568337,1.7244334884368984,0.869390646411461,0.3922287686441998,-0.4933334650251318,-1.9829768082775423,-0.03638879279692143,0.7222788147628307,0.2506180228665471,0.15018057101406157,Class2 -0.6771323844549872,2.0064840785492684,-0.1737521499870122,0.3749858756222768,0.9001142915001238,-0.6670110692021334,-0.44129987754843364,1.121285593402618,-0.3025892871473589,0.35969361134468547,-1.6686039991872672,1.4514214452318122,-0.1566054173745215,0.6263969377614558,0.18414810439571738,Class2 -0.28502657112666335,0.33549814495290814,-0.3185578981654407,-0.22501307275088858,-1.1026933626675621,0.641690068068764,-1.5025724287665494,-0.15379402009407828,-1.4795951967288306,-0.0014859232362950132,-1.7638431623995645,-0.14057705666613657,-0.6305436510592699,0.5920425143558532,0.43969966820441186,Class2 --2.6072105352926394,-1.4049350969471797,2.202978121312687,0.9765533155177716,-0.5267012688372843,-0.48523245191208225,-1.961200674736975,0.5339823728252544,-0.6704057502282168,-0.2988533940932782,1.6106077416232327,-0.745180510917204,-0.16954464791342616,0.29286866425536573,0.45896477648057044,Class2 -0.6431553546596561,1.0686253083072135,-0.21451308228285532,-0.5422597332735233,2.3885419964625507,-0.04488027302799546,-0.151999590468107,1.761561316335701,1.7162563172394767,1.318595279896738,0.5412712462383854,0.3764242756135971,0.41830197907984257,0.8364619154017419,0.8141640122048557,Class2 --0.3700189082621317,-0.8356883898733907,2.9480953204925417,-0.5344821933770318,0.2873252028256353,-0.6253172100969152,0.7636722414932419,0.35219451608556074,-0.08777005200225205,1.9070980072066588,0.44159287233885675,-0.7615679288556024,-0.21258054580539465,0.8112214419525117,0.3883471549488604,Class2 -0.9574975370950096,1.5391503059928626,0.7359585683664139,-0.5794857138155282,1.1960805325908552,-0.1587433497168562,0.1945989050476539,0.12602612729965196,-0.23652369846151985,0.32600668248656667,3.6822888068672524,-0.5776104062068415,-0.5861232271417975,0.7373660765588284,0.024264920502901077,Class2 -0.48465880436879505,1.4564033490606345,-0.37919080815534456,1.7737890934753342,1.4355463829707575,-0.08352105699983133,-0.8912658960771795,1.0492789000176606,-0.30256602969257,0.9415196841519836,-0.31952067733001155,-0.48417755606642754,0.9917739457450807,0.5705832971725613,0.7329613307956606,Class2 -1.3734108631004798,1.0000964210532923,0.4507972436720976,-0.572209514905802,-0.2583142991116261,-1.044517036012793,0.10455521006562882,1.4560883213853975,0.09026594392782644,-0.3882554057406841,3.0029149766430754,-0.6607625236669581,-0.16203094692900777,0.5578650685492903,0.8273636701051146,Class2 -0.45011709103998765,-1.481874129454796,0.9473694192539179,1.173539482005869,-0.47284179881496274,-1.114229828315402,-0.7694264564822502,0.3284120162694996,0.47144132745378436,1.1143345141068612,0.31627180802840776,-0.9522759368951307,-0.8413211298175156,0.338470519753173,0.01400767476297915,Class1 -1.628211297076585,1.3403903386963036,0.6869142023034315,-2.1900655415898087,-0.9259816197672527,-0.21501274664624478,0.25332076237161916,-0.9993603677792791,-1.2412739929778587,0.7834667747702654,0.45112069276574046,0.5530782045996583,-0.5005410434678197,0.9992567305453122,0.03664201055653393,Class2 --0.670645303029913,-0.02932350146414575,0.009741661303970002,-2.54290122038105,0.3925332090492602,1.0186790450747973,0.5382923977769561,-0.08413419386538105,0.5582036592356636,0.3297800974882354,1.897759922252462,0.33973744038473724,0.5291548962704837,0.08955063903704286,0.2644978372845799,Class2 -0.7549560352949521,0.07925791022183981,0.5296337517091281,-1.5740509114804897,-0.5209544073784467,1.0640743379824409,0.73560044330752306,0.23968308623586979,-0.26884118858396794,0.8252978520184961,1.6504800602967007,1.1786432848408832,0.47501991875469685,0.866731102578342,0.9019363401457667,Class2 --1.0134892351774116,-0.730246497858833,2.1267093723382566,-1.3992552312434214,1.548162839047075,-2.015279864230359,0.26648899865213116,0.5748510389773999,0.6872819301241437,-0.8936264896462865,-0.5289844716840915,-0.24075893141231589,0.785701687913388,0.4418160857167095,0.4092453287448734,Class2 -0.6534920266742172,0.5951285249054263,-0.7228315520994282,-2.1162097164200184,-0.43531896095088674,0.9759817691024457,-0.30513908287180225,0.4851117854992447,0.21178834695054846,0.23313681502479427,0.2931636701553388,1.7821390936300827,-0.7497046301141381,0.7661957142408937,0.7260717165190727,Class2 -0.13942075268309273,-0.830281316539154,0.4594179031762823,-0.24267623782836345,0.3223567212752763,1.1717505866581182,0.6147157245521913,0.9314487802287228,-0.2914866026644158,1.7909055604813517,-2.090910802963341,0.3660047562476082,0.4570719450712204,0.4029296631924808,0.7202408690936863,Class2 --0.0980261093704407,-0.8169075103529186,-0.19856296666081868,-0.19706172426947133,0.4243012168005196,-0.3066440029551374,0.6540818460469037,-0.332245579889831,1.2114610028955126,-0.7711838615759912,0.9269374339626111,0.2276713497782817,-0.3552028751000762,0.02546942373737693,0.7019459335133433,Class2 -0.395178237477472,-1.2232729284804769,-1.4092338823008135,0.7060888282743895,0.1943426811080447,-0.7786435140918092,0.9671335236661925,-0.7381351811882261,0.017478388987401425,1.4284616296301507,-0.7236902007102114,0.49540005828016953,-0.5210179723799229,0.8699706085026264,0.1381398099474609,Class2 --0.6095657889792712,0.5155338187773568,-0.15710133032258572,0.1399734440103017,-0.6185868587994576,-1.8398106263990215,-0.5657799841529528,0.7453525529499958,-0.6362425982752847,0.18128781466808172,0.7260799238963908,-1.0779013491420513,0.3117915294133127,0.3610090494621545,0.5969098906498402,Class2 -0.8620654193218915,2.646337680527104,0.8495537045262307,-0.02338214888706641,-1.4405142265215434,0.8851680703782691,-1.3404019257264124,0.34471042034144483,1.84867483189985,0.5410753522571984,-1.4338929198327763,-0.3155350404716161,-0.15103830071166158,0.8952091892715544,0.5600245555397123,Class2 --0.09095754258316402,0.8452127770827825,-1.8439983718541317,-0.7043106671198056,0.8888217741782166,1.3304699214685636,0.6857453629142215,-1.3484698091772076,0.10451291397835416,1.1045178652395846,0.02179300990833728,-0.5268715959937386,-0.4454309334978461,0.47942880471237004,0.8555964829865843,Class2 -0.8637909416472365,0.0017365041293652128,0.1574968457521526,1.013257835003047,1.7120012292922533,0.03204698902166211,0.8332148167645483,0.11415117882009417,-0.5171360994405931,1.3154427672640256,0.01591171019054876,-0.2898735275325636,0.013219140470027924,0.6437990434933454,0.7182476429734379,Class2 --1.7851174183841656,-1.717311024943664,-0.24558306400672572,0.8537935277414701,1.0950175416734378,0.05320802960227281,0.09876816204767767,0.5578484152636154,0.051288553898201326,-0.45819548643048646,0.25018990048765294,-0.4537872831348765,-0.01919016195461154,0.06009161751717329,0.3790873074904084,Class2 -0.4575991012493706,1.0539259216823817,0.5902214828340875,0.5642692139258808,0.43355226289413845,0.5847668640107171,-0.2670211290899005,0.1262657484310294,1.0177251217595258,3.328443482955611,1.497782159098365,-0.41143505874074576,0.8972324132919312,0.6573036536574364,0.43186447746120393,Class2 -1.5230616907228927,1.2138787389436188,-0.8531801115487946,-0.5494290131498569,1.2531971891550164,-0.6724229678360092,0.4013974414048554,-0.5567953092856104,2.6125816678501166,-0.791338563969782,-0.6664467722707861,1.049546737171859,-0.7605802114121616,0.8189590584952384,0.07990338327363133,Class2 --1.2318585800315234,-0.9849973349544106,0.6363553233001066,0.3338217070264357,-0.24410826115102802,0.22192974963485976,0.7997752032506492,-1.7107496050955568,1.7255422672203975,-0.39629331557854724,-0.11776481799798372,-1.542444485454642,-0.002674415707588196,0.11174620501697063,0.020510085159912705,Class2 --0.62154977059571,-1.0443386972346491,0.7882321643778857,-0.18782277809160894,-0.29823115877544965,-0.8520177760374309,0.9661201421279062,-1.7065506720294772,-0.7142771918210993,-1.140843850682033,-0.5390460745220662,0.7098772227934793,0.1970229665748775,0.3298887105192989,0.3876021630130708,Class2 -0.7550687731728005,1.3234913510546584,0.7313557519490049,1.034694959973895,-0.6766721189648435,-0.6596960242574172,0.8026164614099076,-1.8729439853803174,0.46013173529798507,-0.8974137778802188,1.4555182304631515,-1.8033230770688002,-0.8495600349269807,0.49865900445729494,0.34219758678227663,Class2 --1.353018619411596,-1.9967919677056722,-0.6914238166528529,1.4183467083256076,0.414557684493424,1.8778217432072513,-0.26087958799075306,1.2229020249941356,0.6333670265227758,-1.5670744505931162,-1.3733249043943665,-1.6789572448047791,-0.06642633536830544,0.18119500065222383,0.06944518233649433,Class2 --0.3059131780357154,0.7610500001634157,1.3738889408132486,-1.802454213755724,0.033488572604569174,-0.4019577953144074,0.10657998597540264,0.12836154856389986,-1.1797273598269322,-0.7756428516127124,-1.1980246621033237,-1.007328154138722,0.13691163202747703,0.056454039411619306,0.7455058107152581,Class2 --0.6363921032178427,0.8085204896999117,-2.3786821359303207,0.2910205812390962,1.276219880564094,-0.9072809582673151,-1.7000791912636046,-0.38514003574433003,-0.5964557331312548,0.6163754508113811,-0.5412324879485724,0.8212851189537128,-0.5389998350292444,0.42048968095332384,0.38786766515113413,Class2 -2.2783818169208656,2.085597607872808,-0.30946472122385943,1.8392184309070656,2.775557363847421,0.5748675516979466,0.43356617081926707,-0.5702407421071807,0.751291977339631,-0.689213318168558,-0.3098536122813638,-1.7658867314239148,0.29089444875717163,0.7154603563249111,0.7667480823583901,Class2 -1.7701658220266494,1.5366496646833694,-1.0342094596139686,1.1500357413278577,1.0070210469649423,-0.5288382356233328,1.7884275882567389,0.3742462895543973,-0.8727441823397617,-0.7528906009091003,1.1234616552148584,-1.986962365792189,-0.7111317468807101,0.30151356803253293,0.23801365261897445,Class2 --0.5743324084785197,1.028556619211436,0.46976657078515555,2.289215426060682,-0.8684254178351674,-1.4594945916938307,-1.5403096084777432,0.1554102630566389,-0.6552031112157828,0.9136115074845009,0.08578166053652089,2.1654863946261185,0.988044218160212,0.8213290236890316,0.3499929253011942,Class2 --0.2600659630804366,-1.7917988999212486,1.8991438384469455,-0.04797634514107184,0.7636912148453189,-0.6083843511425636,-0.6656840213193006,-0.13174921497022957,0.9149501139657416,0.027887429981361095,0.1072686211229645,0.6126453644644774,0.4577184193767607,0.03717193054035306,0.4547838238067925,Class2 -2.265133932274927,2.3579152893449855,-1.3477366592441917,2.879066897101106,0.4174886024809462,1.1470117941895772,1.0868961696307646,0.2001996743178089,0.21606760987741877,0.08390178792336554,-1.740151103273691,0.04710744207894581,-0.7954520424827933,0.3897722205147147,0.20082787377759814,Class2 -2.4540638440913676,2.331839352252231,-1.6419050939785294,-0.2545642060868724,0.5553908247352287,-1.0560953876291999,0.39801061184988457,0.585719903289533,0.39094608667264297,-0.6906979664602991,-0.07749646695827463,-0.569946174879934,-0.55569077283144,0.7522595932241529,0.8501842126715928,Class2 -2.3410845150158606,0.7611332719203939,-0.4690835652641026,-1.5688683069980818,0.23619873375251993,0.14020209334508207,0.07421611099065464,2.1971368754196563,1.4173216871806953,1.1355969549098859,0.9054065733651389,0.9770599673352376,-0.9688303018920124,0.21317874663509429,0.9904095882084221,Class2 --1.8062908776677435,-0.18081405915756854,-0.8270508738543757,0.9514758226847089,-0.020393892124161615,1.8596967928329284,2.1198670879739927,1.1552385466075776,0.5425640630326258,2.1723332249019247,-0.32883719316448207,-1.1870400345759937,0.8162927185185254,0.03699124976992607,0.8864774219691753,Class2 -1.1906796484757665,2.4382168420635644,-1.2146003281496054,0.34649833065347896,-0.5753831001599253,-0.2510384429211955,-0.17233676887049212,-1.133500192824996,0.07438085416828882,0.25649015875008174,1.9076516137072947,-0.8277182187511521,-0.7130190483294427,0.1752063287422061,0.016173366690054536,Class2 --0.07036863770701596,0.13622614678762004,0.2221877240505423,0.5882300677542517,-0.9681064799954044,0.05357832777772409,1.0644912479642967,-0.5770148372284755,-0.4328479272442162,-1.130254607397392,-0.016849954034056494,0.11648933737161873,-0.15275524649769068,0.8827937755268067,0.9654779874254018,Class2 --0.5837164443051912,0.6400349796901021,-0.22929929814346772,-0.8444978412231288,1.5039157030983532,-0.13537957560239833,-0.4819375606679791,-0.07598722900500243,-0.6856086593114646,-0.4700150560196421,-1.5707287333568993,0.6536997012144332,-0.6987423859536648,0.5691309529356658,0.36654607322998345,Class2 --0.93089300778317,0.36487475781654244,-0.705783277176165,-1.5458659186562973,-1.0280992416423387,-0.505435730156219,-0.024819425426139036,-0.5176264967468157,1.5497121160753806,-1.5634863783379012,0.32188734142606784,-0.2190769356533761,0.4567950996570289,0.5422545033507049,0.14797563874162734,Class2 -0.21387315714578864,0.7462550467629261,-0.019083366450541057,-0.26230505330408804,-1.5841486183471136,0.5859999743121624,-0.7788546978648836,0.06385574036331267,1.0438998926845535,-0.24830391145636227,-0.05099435760212394,-0.9002816823090791,-0.44866010174155235,0.2517258368898183,0.0392443360760808,Class2 -1.714855278112535,2.0796450960882398,1.3641018647054957,1.0236923405171798,0.9147780051389754,1.3047086515806325,-0.8640209482065772,-0.3851197557739384,-0.4300417305386805,1.656647425273201,-0.03597190234442198,-0.3123252198362916,-0.01066479692235589,0.6339556963648647,0.9435539348050952,Class2 -0.6103418835948369,-0.26572772474306816,0.6388861662637643,-1.2905822930274862,0.16086775158118147,0.7254961346168955,1.203887108556822,0.9860048839176098,-0.4894577575781163,1.393542910947092,0.2722681603493153,0.2408365380203681,-0.16520079039037228,0.8345542519818991,0.8007376638706774,Class2 --0.8342488649773285,-1.721448918111439,-1.2270823218517874,0.5717165171724669,-0.8926197462395546,1.1546923725198741,-2.5080126661984345,1.5085266300402658,-0.586498513434122,-0.6406638892643061,-0.7088156095723726,0.50468150127992,-0.12637066608294845,0.30019673611968756,0.17917311494238675,Class1 -1.268657258273838,2.127953440662288,0.21809115173222837,-0.2633603388703638,-0.11121426243163825,-0.7780216076274843,-1.4350159730566223,0.2719461492875558,2.162712846117347,0.7938626094337078,1.6097132864552937,0.8687298803434529,-0.6007907353341579,0.433748668525368,0.9364327304065228,Class2 --2.473978502373053,-0.5148560330984714,-0.3679828591345533,-1.3082804313214451,1.979926565227828,-1.1482453547777867,-0.3995359805551161,0.39122767398444946,2.1667563497716396,1.039281616297446,-0.24623967517215978,-0.6910563271075613,0.06745847035199404,0.3539506117813289,0.27467896579764783,Class2 --1.1403604938877314,-2.521157517584615,-0.33042041270783457,0.5828629753638259,-0.4010652610231111,0.17492110846738299,-0.5249648195628094,-0.7354571899870367,1.3197614477024202,-0.26764237112378836,-0.050275203516339594,0.719100076306654,-0.8740539462305605,0.9052655177656561,0.26062472141347826,Class2 --0.3329567789254879,-1.6064005831145909,-1.928889050388026,-0.6398562337133105,0.01848024730674944,0.37276134422916085,-9.82499719310124e-4,-1.0042320337769284,-1.4458588261061105,-0.40123406550662155,0.07724328010439402,-1.2921147697677295,0.17763611814007163,0.9893477582372725,0.8858367106877267,Class2 --2.895768919473153,-2.9684407193779485,2.3641637331618304,1.6522627052011143,1.9675384914037617,-0.3330316886742301,-0.6455010224229225,0.6314697121485924,0.42650842841490444,0.3584225883862447,0.8422832299634194,-0.9733642964695279,-0.9079865664243698,0.07349548954516649,0.42593747726641595,Class2 --0.01426048314429229,1.0707128556677623,-0.982373816740461,-1.1044922715686158,0.4789274031322753,1.5435530999577294,-0.9279609667848702,0.2349295047464939,0.1418336370707278,0.8254272464848352,0.8080035815933657,-0.12961443897376934,-0.057437754701822996,0.5257299849763513,0.9859139616601169,Class2 -0.6756977165281404,0.45887984820025585,-2.6623007791492235,1.2796768453252656,-1.9967956804813138,0.5303274476054104,0.1928619159558897,0.08678263359809339,0.0434448050359048,0.501162825772395,0.2506366132868707,0.2150232690760546,0.07187232188880444,0.7184766037389636,0.04416604107245803,Class2 -0.2536289043575484,0.3852259544098818,-0.48643206969501795,-2.9100556104640694,0.5129328483371275,-0.671708834926529,1.7158086957358065,-0.8744189499084536,0.42615749422274046,0.6765584771471463,1.0478837540063877,-2.439509226868427,0.8790246518328786,0.07111403183080256,0.048899002373218536,Class2 -1.45795928829977,0.739123793854679,-0.7550552847466748,0.10984113292128439,2.0384307717690615,-0.7784974219618804,-1.1247467338816657,0.5250063476149496,0.17116270868381342,-2.0689026315924135,0.8751692042232867,-0.3926374754723035,0.261839154176414,0.46450763498432934,0.33519664919003844,Class2 -2.0491849102296227,2.0725718420324695,-1.998445891154022,-0.07029395064353035,-0.18661794009927613,0.6039564333440817,-0.19897742255659193,-1.0275363444179628,-0.30429599882280683,-1.51132350154463,-0.20003038048754168,0.6488263074879326,0.46469139494001865,0.3522738115862012,0.2879829222802073,Class2 -2.370222262902038,0.7992750645765692,-0.07362652994916892,-1.508697517215776,-0.5920776228238285,0.27604188375302224,0.747603349160892,0.6317484091407723,-2.484213813636349,-2.1145475198184913,-0.03724700566240474,-0.09008444408719213,0.3502264549024403,0.05695895478129387,0.24280118057504296,Class2 -0.8974281636113683,1.052080131537429,-1.4789442910831767,-0.3295636394781374,1.9836345870573653,-1.1065465526399973,-0.1373760057217484,0.0552502009267488,0.02182540880073275,-0.566427478411991,-0.10540129695763155,-1.164162317062253,0.9434797912836075,0.7908543227240443,0.2694742130115628,Class2 --1.4046721645782019,0.1157495780643234,1.4670694935716286,1.1652133448208541,-1.0192496130924893,-1.583073555830414,-0.3134803639981336,1.0257931838682015,-1.701654689067952,-0.131658745447403,0.17331577935433543,0.3279546346088327,0.1096621872857213,0.689286669716239,0.6365935350768268,Class2 -1.0983290366291418,0.9341910471340504,-0.32314979894603907,-2.309505683672315,0.18108714001232656,0.5408812649046463,0.9552470590254124,0.0844569713038102,-0.11523576158610926,-0.2194779182734416,-0.7946084339317384,0.7768640812718126,-0.9230084936134517,0.509982498595491,0.7085211207158864,Class2 -0.6782100662599317,-0.5207351339373065,-0.9668530679111789,0.08436078884616433,-0.41550787474737405,-3.0195327743982743,-1.129643028777468,1.8332863257977274,0.2719352026777048,-0.26764663097442026,0.24866856948083768,0.03601791178704536,-0.018547996412962675,0.10434535029344261,0.5047224340960383,Class2 --1.991403566391682,-1.9514560669454046,0.32064450331566513,-0.5359040070877136,-1.6829311438195962,1.1588346975415929,1.411229714266329,0.19128598497061705,0.12199769411943245,0.8094532963651505,-0.5102565861659317,1.393938217701196,-0.863282750826329,0.7525553551968187,0.16142471856437624,Class2 -0.9698174189775715,0.464532728961408,0.6190848710777757,0.42565830146414185,-1.6147508904046592,0.17805765601404203,0.3476376701868008,0.12486734166555183,0.6100315466653955,2.2762610802600687,-0.8459132032207173,-2.238925417341935,0.14641328295692801,0.7681307215243578,0.145204178057611,Class2 --1.978158180652065,-2.18469108981991,-0.7480777466538244,0.3455662597971538,0.3938977561617941,-1.279256514094151,0.09493931844244936,0.4924067507991566,1.6405455295283942,-0.06520281205957609,-2.8658986970902482,0.6872861673870787,0.3760392451658845,0.0032074647024273872,0.5767974022310227,Class2 -4.496042078146767,2.4331769290532765,1.3009796221482608,0.6267704103948946,-0.6236295016851711,0.5943534271675981,-0.5716120633310471,-0.5291799335072976,2.3650948740722804,0.31535999867753206,-1.1084504263952286,0.6827653283591103,-0.7886124844662845,0.7296149958856404,0.3688390969764441,Class2 --0.6439565874358484,0.3401255527464328,-0.49398111564105324,-0.9053503391804137,0.5024121588618127,-1.0477829501225946,1.1195780935025834,1.8358423244061468,-0.6600627543605903,-1.7547557669357405,0.3011261107261915,0.33995411049409185,0.09005040628835559,0.9417073654476553,0.1293561509810388,Class2 --0.028568855582406627,1.4684330055148151,-0.8901182247686295,-1.5650777029997875,-1.609817307366339,0.36680091052136193,-0.36227484452166775,-1.0370089674348832,-0.9393527635773615,-0.18064584717377188,1.355076031059463,1.6855091036374077,0.8230993603356183,0.718507356243208,0.19056200701743364,Class2 --0.27559398462950446,-0.3730750246456185,0.32649382102475716,1.0267644142248282,-0.035540723082488165,1.4983512576473887,-0.43926408029965075,1.6759915450111438,0.15837654816569816,-0.017495916370125653,-0.5420599621546757,2.3608040414290055,-0.23906999733299017,0.7869835854507983,0.10247194580733776,Class1 -0.46788067187263693,-0.10675766939448028,-1.703240943767124,0.7430317904012493,0.5451670858151801,1.485291022599882,0.5348132311671322,0.6586438153323194,0.3756526717478067,2.422832233888206,0.48564716418932347,0.8857626714717846,-0.8194941622205079,0.6952712694182992,0.16615793970413506,Class2 -1.1603031615416204,-0.6835273407589992,1.006334906178917,0.3802915868381765,-2.8107723261523643,-1.1388907828218553,-1.6634936322000777,-1.5437200008846497,0.39099222298252073,-0.8960831747620205,2.559942444031132,-0.41602273513345883,0.3422913569957018,0.5424439189955592,0.05788829340599477,Class1 -0.578765756722978,1.114419554434794,0.7197102820535988,-0.5012834982921673,-0.2720989239136513,-0.3745814645851316,-0.4963887392916264,0.3992001141540985,1.109441772921112,0.9047947795614775,-0.17907825728510193,1.2250643135218469,-0.41630458226427436,0.3057024048175663,0.2530971865635365,Class2 --0.5370687621184259,-0.11728971604804764,-1.3422552057590278,0.12150085117476775,0.11926585147013102,-0.3376709736384591,0.827662702010183,-0.278485031372853,-0.37336570179260226,0.43248404520276085,0.5061981589529394,-0.9031494088919534,0.18045764928683639,0.43513853708282113,0.2114745827857405,Class2 --0.30267027044251654,-0.4964319336950926,0.2711191443045127,2.193607599915643,0.3039620345735616,0.4639347231850078,0.6263671686766819,-0.14044061291307222,0.28956000479034283,-0.5221932499634752,0.8502332992254941,-0.023544487418486956,0.5662507810629904,0.38969025295227766,0.815687287831679,Class2 --0.713894445297903,-0.9250111782707864,-0.6739782092302398,-0.3927014184309619,-0.03597367644315726,-1.6868440589364906,-1.773874976368035,0.964672995661603,-0.23675586506375706,0.3049509580724181,-0.5478871432986376,-0.4149444037353705,0.48125732922926545,0.17565097310580313,0.14813148113898933,Class2 -2.276686030364662,1.8071015562916941,0.0729103181334634,0.23057134451649516,-1.6972215504034847,-0.640125902531749,-0.2599722478315184,-1.5578690821012877,0.1910563719631139,-1.9082734542536866,0.2377529358927554,-0.7259570870017057,0.4991007447242737,0.7639208748005331,0.6555099948309362,Class2 -1.1154506235837789,0.611835969717615,0.06031206784860888,0.5845331807949521,2.014738484160784,-0.11473101448503256,0.1413445777331109,0.6795115766827705,0.25721186321837125,0.7476506716557094,-0.15003621030184258,-0.7179952998654742,-0.17563417134806514,0.529295566258952,0.33228577789850533,Class2 --0.2833904204192546,0.14165048294002475,-0.26492301660479883,-1.362483107778669,-0.3930874720398709,-0.506425223446036,-0.5608402828636083,0.388756523394816,-0.5239578598078575,1.2816448569292807,-0.968894387950745,0.10332640370366433,-0.21183630917221308,0.3596714346203953,0.30826059775426984,Class2 -0.060228199205381805,0.8962334335506139,-1.2118937136473902,2.0212370185138253,-0.36341310593303816,-1.3485025804804027,0.7243597834405303,1.0674049554689204,3.168752855444795,-0.1968677427483194,-1.6588578084296124,0.2492702702081866,0.03873771941289306,0.7563926435541362,0.15553926932625473,Class2 -1.193593879096366,-0.7001471213297237,-1.0886354970664414,-0.66464276377837594,0.7609796283544794,-0.527557761873907,-0.22203081895478885,-0.5749192439944364,-1.3271296287822856,-1.3803623725353713,2.489261399118092,-1.1431728449866159,0.7645543268881738,0.2764031831175089,0.3196499787736684,Class2 --1.6570857214988366,-1.6653858426330022,-1.400798933921007,0.9990268750294113,-0.6236010697204685,0.9049278568821459,-1.1296173382506436,1.9335632323354,-1.7053236857858356,0.11655848319423184,0.5466068721813934,-1.8516328952666603,0.0777948615141213,0.8596306361723691,0.36723027215339243,Class2 -0.36684636948301524,0.09123193827585419,0.07191712793567417,-0.9346742344395631,-0.9968218674894894,0.43176748354517375,-0.4350999258650653,-0.05124521331223152,-0.5142960316219969,0.43577163509329553,1.3418341863407954,0.5436347187762406,-0.13742567133158445,0.61646483768709,0.9965465446002781,Class2 --1.4992709306828564,-1.5574336819885144,-0.48726584377236265,-0.10904069295967689,0.9584545884840182,0.42043249430371166,-1.564529785484814,-0.22260348360461976,0.7108083365282558,-0.673314562922952,0.20985196346493515,-0.7106849574866587,-0.023519066162407398,0.2952598542906344,0.683912230655551,Class2 -0.9538531745143528,0.11140076925083114,0.148654249699053,1.6159897562999053,1.931298077317254,-0.060241213588737796,2.7537740509024746,0.6558238248072837,-2.7495966570084502,0.4547137784483357,-1.1269794695960766,-0.2975915025292707,-0.2163714449852705,0.49025189713574946,0.9522451881784946,Class2 -2.5517361633221904,1.9471765023331804,-1.4516950011766923,-3.206163242439084,0.6354109216807196,0.43262127787621873,0.6075876864834382,-0.5105363717596885,-0.30827486470639803,1.0157576293839863,-1.88946581635433,1.4077645969894421,0.01075646048411727,0.29101997683756053,0.4389709383249283,Class2 -0.2223210280402268,-2.414412080495989,0.2708117503941648,-0.23155361055900417,0.48245270362342724,-1.7676828066792303,1.8448912872207655,-0.12535876208723884,-0.42164203929676414,-0.2103943811825335,-0.7542921288341281,-0.6888239037660344,0.33756214240565896,0.4997943288180977,0.1658914654981345,Class2 --1.9762589017056202,-2.3030917340611063,0.1872641845729829,0.2028552274630339,0.5788484767323745,0.42920049800157134,0.5742847783518908,0.754704118290045,0.7090825077712858,0.432872541530241,-1.3365571762161135,0.3855719170294892,-0.9735812046565115,0.7270947177894413,0.26018461445346475,Class2 -0.46160507461142875,1.3948205128981268,-0.27715927660780226,1.5807776085632028,0.4128071395885389,0.9702763235935852,-0.5559374371565697,0.9199161544721911,-1.165144308237977,0.24749603439333995,1.3992436619306623,-1.050799027945866,0.6364924483932555,0.5550568061880767,0.7610209533013403,Class2 --0.7329716646989961,0.3238752366204769,-1.4277016671972833,0.19909729950130062,1.0111020444668934,-1.2294757614121274,-0.527959722826538,-1.1258392604225487,-0.21704360373598414,-0.8477389936843727,0.05280603955301534,-0.9195333239975898,-0.30685539077967405,0.2921865163370967,0.3737265532836318,Class2 -1.6563341142108656,3.7543897327277245,-0.49106375338001074,0.8548226987349578,-1.3743605939961592,0.658575362259705,0.3242311026887594,0.390444079925463,-0.7496030522124161,3.1123162654498215,-8.40041507537157e-4,0.4290588181494585,0.3559617227874696,0.76608104724437,0.1012437588069588,Class2 --0.5361587254737363,0.443180340609344,-0.4464632178644451,0.8118000013289525,-0.0835517538884629,2.6837214947897845,0.08644321291302082,0.6016487661960482,-2.4375351011918496,-2.5736387325714998,0.05848847240702602,-0.9981961859611945,0.044492899905890226,0.3846884814556688,0.8276914451271296,Class2 --0.6441882932735987,-0.7910381137226259,0.22478267869903615,0.9737974644204489,0.32216983178534736,0.1562698006258433,-0.04202096378435949,0.8085292839817488,-1.0500775677794234,-1.1716764568117635,0.5075813105157445,-0.751807574942136,0.5989514398388565,0.2865988214034587,0.6002407558262348,Class2 --0.8622410568441992,1.059817686586113,-1.9173659919570265,-1.4872175970127157,-1.6316208704517638,0.6327540554435716,1.474764569669183,-0.6022088494216875,-0.529067126230998,-0.6334688520334586,-1.399017208775326,-2.3497259492345757,-0.6470175832509995,0.62396748829633,0.9495272871572524,Class2 --1.0842708506096708,-2.465978095073034,-1.7607648674740197,-0.7157004692335187,0.09281065860407134,0.3961598802606885,-0.4620781465682957,1.3709180775861387,-1.5202572364747637,1.2070051934765484,1.184022319249011,0.7474899207327109,0.9757085111923516,0.8824827817734331,0.31437758798711,Class2 -0.2885648129283129,-1.2531945215743814,0.9054765020073955,-0.31526048831015346,-0.8855466257976796,-0.7370157211133846,-0.26755956796215197,-0.02652682390979541,3.1651027329239234,-0.028949243614766678,0.5473137295112896,0.9667267751277347,0.05250312387943268,0.04055203776806593,0.883597532985732,Class2 -0.51803784194023506,-0.17346391858089882,-0.417127008467764,-1.1109532132533806,-1.2109988846869524,-0.7112655518571861,1.0867335173496377,-1.9364423459465863,-0.6468612822248553,1.0167910157989817,2.1458695258230125,-1.3392841217838902,0.3646737998351455,0.0536665937397629,0.5651696112472564,Class2 -0.18250792750455536,-0.4266401391740501,0.7879748214653914,0.17663876835416378,2.5437130523507885,-0.07643775470545047,-0.6364627424358565,-0.9930767825367283,-1.6268902451144316,1.3203525862996957,-0.10937621432736831,0.481460782676185,-0.5240185623988509,0.1702992133796215,0.42806206550449133,Class2 --0.7930292132597624,-1.1759101959651774,-0.26092571809209003,1.0389133697240642,0.33493290036371554,-0.4924665204326359,0.3899224053971275,1.6993467424097688,2.6556193878093732,1.8801938121173911,-0.6395437537905836,-0.8289175416244016,-0.9512401791289449,0.9847604464739561,0.10516106826253235,Class2 --0.10223127031119275,-0.13296527202535072,1.5237211756556421,0.6599081362937641,-0.06438389787486529,-0.2651386824690022,0.10593108299052449,0.784344507824032,0.0698007807613323,-0.6654342400499105,-0.27209562748504096,-1.3656994868480774,0.8804521542042494,0.7431488197762519,0.39221431058831513,Class2 --0.6740769643819979,-0.7856474228112162,0.1678611250708829,0.16479483554093216,-0.10842009271158969,-1.1264252879384313,-2.2649785564517835,-1.1302762484574425,-1.246329155463812,-0.07246368537360455,0.9102281875380155,1.5823890780766676,-0.7209686413407326,0.7617363948374987,0.4475744136143476,Class2 -0.2553436477592693,-0.8732805251009635,-0.9834009922413793,-0.2026887009587912,-0.9081988435670876,0.6335320309347661,0.8447946031286108,-1.287312381738629,-1.094371285881427,-1.130419388734218,0.4700901336806597,-0.29053764465787596,0.5460992348380387,0.1585662798024714,0.6059729203116149,Class2 -0.3687875309810349,2.4812328162069432,-1.0811804176325297,-0.6853720450563846,2.1966652961242423,-0.7934275163559136,-0.28652263973493947,-1.2632897499830018,0.6399680346958075,-0.5498914718931723,-0.05153419637095975,1.8968953941900883,0.021245463751256466,0.7839742337819189,0.898694139206782,Class2 -1.6730983953664569,-0.44335324977969615,0.5963550470615906,-0.2437609353138279,1.5627178776373336,0.7399098408477861,-0.39558863923543575,0.048270280830343584,0.14020980978769068,-0.5838951688094374,0.18607021932785198,-1.343060877331853,0.008518543094396591,0.21703282976523042,0.6290447243954986,Class2 --2.2278430920540315,-1.4587797076704228,0.1076829945710627,-0.4077171053183717,-0.6883394256675599,0.49598697587309587,-0.6272590131998359,-0.4263977737479715,-0.24258181297753026,-1.9688190818249949,-0.42526219800869847,1.1251889858278368,0.43537635169923306,0.40531844552606344,0.2549582461360842,Class2 -0.956553932917649,1.8285129209821194,0.2860481617123364,1.2487056226700761,-7.195102747571567e-4,-0.21445952657391243,-0.7520110120224761,-1.3314090146155322,-0.9445665199710461,-1.317915338029051,-0.3856090312928665,-0.3229435909685327,0.21680657658725977,0.05796172213740647,0.6108500405680388,Class2 -1.211339391898824,0.28280464355744345,1.703912213114363,3.0054229504640935,-0.4109066929687329,-1.1669322989443502,1.0821513265282532,-1.193946949724021,1.258415185456181,-0.6031733469201547,0.015903652561700694,-1.0486229042333852,-0.8773077209480107,0.7500334389042109,0.5512725366279483,Class2 -1.2748672513200316,1.1763626265228886,-1.3672765931454642,-0.7667937465192518,-0.469888167518674,-1.2028144402906158,-0.4203817389329146,0.4013617803635663,-2.553259611737189,-0.994258772979062,0.24467283885800586,1.3207674892269912,-0.513293270021677,0.13966737734153867,0.03074747882783413,Class2 --2.930121047008466,-2.127767445635083,0.24604303586343138,-0.8606402421356234,1.2229003963727982,0.7715246292644377,-1.3840251250780664,-2.214179202813572,-1.5705473251612099,0.005592417341704998,-0.7154064138208587,-0.43220087430734233,-0.2629860029555857,0.15042553283274174,0.655232553370297,Class2 --2.3396415387845675,-2.8004497899437872,1.6034918732448469,-2.1741191558360073,0.1968247379843211,1.4438223407441695,-0.20354847411185562,-1.7959303855686595,1.2248865479202156,-0.43317213360304396,-0.2611548602039707,-0.17017109790941057,0.3237256440334022,0.3107986336108297,0.41891292901709676,Class2 -0.590106104404812,0.598088521215215,2.0174475861340952,1.2739230078036317,0.38792670298828213,-0.7450738579557803,1.2910852858924244,0.6364201024662216,0.4485260989921935,-1.2715069557305496,1.499154116307036,-0.3579146399258559,0.5245988960377872,0.15111608989536762,0.6670591314323246,Class2 --1.1121990407491056,0.026832116836103648,1.1085731350650168,-0.4735904196144535,0.09204340813287826,0.5841995868876608,-0.26087836019550853,-1.033823295035078,-1.6185326518082757,-0.47313065870162657,0.6188637013079545,-0.704535846432533,0.36161087080836296,0.865810108371079,0.3759553674608469,Class2 -0.14259958271115575,0.7375532587899056,0.7716161667577639,0.26533183193999466,0.5810790220404906,-0.5863903869500189,0.30890919451939103,-0.8749448299393634,-1.0696879715634096,1.986722168469415,-0.6772962232669698,0.2331292530746081,0.1745419572107494,0.6945182566996664,0.8340162152890116,Class2 -1.3702905823717213,-0.1823360290591104,0.11709064952140529,1.5995468406603346,-1.4718748015672065,-0.10121877038279294,0.046027357474167384,1.7646584050049645,2.1907844493374444,-0.21567094431466485,0.49977652061316735,-0.850665120618536,-0.7960109184496105,0.23633199441246688,0.9279258139431477,Class1 -2.3879519079901375,0.46987284014689756,0.5983722278125564,1.8366592171010792,1.1893117017148536,0.5642253852334138,1.110193277461571,-0.5056647132337381,2.2756617358376894,0.19860809295734763,-1.823107282979828,0.19633341876159355,-0.6035179058089852,0.08420913363806903,0.6185453361831605,Class2 -0.5672135892560372,0.12807243159063672,1.3462294685375347,1.7703779869143421,-1.0160424445886989,0.47557419299622916,0.6445920127782737,1.2171690720690689,0.012222329380532148,-0.4928654605070822,1.7119748600887925,0.1409540713612328,-0.13202786818146706,0.8534896285273135,0.8971256611403078,Class2 -0.49167427046169104,-0.4327636717772097,0.4614961740558054,1.0023527803935577,0.08419996365327644,0.0025048817137900024,-0.9024145718044493,-0.31751317457310874,0.037511133197596885,1.6695359021606557,0.32730391121007646,-0.5722523546504935,-0.6464413478970528,0.4312005895189941,0.3230708467308432,Class2 --0.2826300869726309,0.030712819436719135,-0.06959853306070511,0.9142582004284081,0.9446216000737466,-0.5491660189559763,0.9956253474012192,1.4684753017930614,1.1849624801351533,0.8148026237451631,0.6927090813651671,0.3388779492169413,0.9697500290349126,0.2576960271690041,0.0059029709082096815,Class2 -0.36608429526915953,-0.8441266841364546,1.6397974299378941,-0.2921271183940795,0.25855226778991997,0.6841353027762584,0.05826687040876069,0.9792965601430729,-0.40499201707380916,2.0988987501493273,-0.4460677847323718,-1.0912373556130013,0.301219183485955,0.5132489635143429,0.10469628195278347,Class2 --0.7094250205154466,0.49311694873234274,0.582458224214846,-0.37511023272527416,1.3959069549346366,0.5316111804464282,-1.7020913167972493,-0.24936110671269918,0.6611321701325221,-0.1232251809751093,-1.1781671589852882,-0.2855817311565829,-0.23419391410425305,0.34900114382617176,0.5539753022603691,Class2 --0.3208110466129627,1.1389415691035172,-1.0741007950754917,-0.09155270200667345,1.2957681538528025,0.5825258092348038,-1.9124760245161632,-1.1470697438300559,-0.22341947861761297,0.9879607992965524,2.52216807565111,0.2651514331855063,0.8605658770538867,0.7778294037561864,0.9813407955225557,Class2 --1.1456865134241265,-1.3360292002381706,-1.1938381731823124,0.8217208706903321,0.5782395590788552,0.9168012465566657,1.7266060160740837,0.10151019817046801,0.7950833883494673,0.8218062072797494,-0.8308000791041599,0.6032053798610997,0.629121778998524,0.5003833819646388,0.7310597957111895,Class2 -0.9046564545268487,0.42463143203401116,0.8052707213907773,0.19392537360289594,0.381726033766046,-0.9020841041929818,-0.8342731052867622,0.24993827211282255,1.5627283476218787,-1.5582941441029783,1.2763105836934554,-1.2584706335657017,-0.1776746571995318,0.3053028122521937,0.624098408035934,Class2 --0.02409425171948454,1.4493624505014902,-0.5046463734241308,-0.7927159599901997,-0.4028987597135224,0.4274454165623561,0.38251098933709926,0.5683756330855935,0.3431353549122091,-0.6646403188494474,0.654889107137137,0.13201442327685417,-0.30057905800640583,0.27389340614899993,0.4573930341284722,Class2 -0.6141291516026479,0.42619157467900615,-1.6712303347746966,-1.0031487223778572,1.3461979172866128,-0.21409614304187274,-1.2410525164227124,0.7928679826161648,-1.3608324743514875,0.38729655277757913,1.3406868887113508,-0.05812091403596769,0.7278551524505019,0.29418484307825565,0.6176787354052067,Class2 --0.2948454485967324,-0.01821119085984585,2.292281850013138,0.24441331152736215,-1.4035554714919793,0.6281347404457872,-0.9016670863624885,0.40085328424511674,0.24378323007204095,-0.7667247065080313,1.3738672653983357,-0.7521883897655507,-0.43044739589095116,0.9872753180097789,0.018441494088619947,Class2 --1.1713740746185684,-0.8267684191574521,0.3963784155076789,-0.3245799418058162,-1.5243600685078835,1.2672422741805462,0.3625464745982117,-0.5121309627110899,0.19443441508146148,1.6410366985005496,0.6302477558958383,1.6181578688832159,0.16052685724571347,0.8729044659994543,0.7766291287261993,Class2 -0.29947920009215323,1.1381706157293467,0.35466163583384325,-2.4733721357569745,-0.010724003417853801,-1.6261350850291743,-0.9390104514541,-0.5495267732814743,1.3600904849380202,-0.2860310227413548,-1.029749882092171,-0.7454978717398538,0.6915976018644869,0.8238364960998297,0.12271585315465927,Class2 -0.3479832505165482,-0.5255983071307347,-1.8046842505846563,0.8136461781649759,-2.1330372146070267,-0.3017902658942095,2.82893030743151,-0.40226441362204624,-0.5125575738580034,-0.21210142831837206,0.34697292748988146,-1.8253968765910902,-0.7833545156754553,0.28269525128416717,0.4783676937222481,Class2 --2.481279058880061,-3.0535211635028685,-0.03731959284786313,-0.013806732729368413,0.7746824850865619,1.6791333596834546,-0.353716403375886,0.6513505238299349,-2.2099280348451873,0.857178074467605,0.8455357879793063,-0.9795484655691794,-0.16307786060497165,0.6045809737406671,0.0743311457335949,Class2 --0.9441548695722872,-1.5273860016354788,-1.1045000456410219,-0.6654912143616788,-0.5174673957932644,-1.70876016887675,1.3554451559662362,-0.8519459451408666,-0.7000238521125901,0.49567914474741287,0.6807823772783876,-0.4653189395081199,-0.6879579802043736,0.44689679774455726,0.26027748454362154,Class2 --0.9322646407232807,-1.3061655186335286,2.126549396303232,0.5282850026627712,0.1024333840898868,-0.7786405490921512,0.1422554267026328,0.8723056363940521,0.7006862982540493,2.0265912889066477,-1.1275375179576355,1.3437135477932411,-0.17093577282503247,0.11361635662615299,0.7720022925641388,Class2 --0.37672679783431784,1.2416989159765663,1.7682234976701623,0.0025793390798367196,-0.43274538773986865,-1.0401044485416462,1.8201813169610797,-1.627627009655601,-0.800986035412573,0.24774729258025752,-0.45173315387280344,-0.5107013331040273,0.9055305188521743,0.5757341820280999,0.7208895233925432,Class2 --0.5739597849387601,-2.1960398948842585,-1.577247686850861,1.6036822038137248,1.0092438674064794,-0.9241931724038072,-0.6829222688257135,-0.5705336172573588,-1.7494426681053512,0.7716752005665444,0.5299327395133212,-1.4089330340259885,0.5143596497364342,0.2679677908308804,0.8307370960246772,Class2 -1.4124540608073743,0.6642516125209752,0.9847086550487277,0.1592592114829789,0.7259158426558052,0.9175807300386882,-0.6061639220456393,0.5267475279036273,-0.331371306738495,1.6353288973024938,0.10086458757374529,-0.3557464430695032,0.7625013156794012,0.23563340492546558,0.7947003964800388,Class2 --0.1699924753861323,-0.20433489258539497,0.43839708839250396,-0.41090542421373416,0.8740778035657268,1.436418366249012,-0.04557049881657386,-0.17437009517343813,-0.564359111345968,-0.6672979069058557,1.096772643088005,1.54921002975344,-0.17395269498229027,0.6608499076683074,0.06146510015241802,Class2 --0.4664038799880966,1.144315893831266,0.5443001256635757,-0.7858957521224713,-0.27809465515802534,-0.7501456693572253,-0.7177715360870967,0.09401403573716781,-0.11698345040592242,0.3582306177293953,-0.727492283555387,-1.4843625418487516,0.8815393084660172,0.6347567208576947,0.7560659109149128,Class2 -0.15973517136922227,-1.034831652540585,2.0555799296519344,0.5788359287481921,1.9061879360430478,0.8246339010008952,-0.8963755753367649,-0.05362579831304546,0.15509361086788656,0.07473673353256502,-0.22280434506136246,-0.9018365882699885,-0.9644538066349924,0.30663579632528126,0.40745527716353536,Class2 --2.9748151117923527,0.00301843169226812,0.09737199362585001,-0.2787283877777656,0.7628497390731704,2.203233402153586,0.8157135008131697,-0.02692151256874901,1.4758821742739516,-0.9752136384541379,1.2954114900127802,-0.6908419735749162,-0.9428226719610393,0.7168687263038009,0.8202245938591659,Class2 -0.9445337963424891,1.8372737431284594,0.3354762744770903,0.839527807364011,-0.1492161236754933,0.4721908199498197,0.6621901568408574,0.492895240678558,1.284089337607671,-0.24355660386509412,0.8466544838990466,0.523039587774901,-0.6076199053786695,0.5646456691902131,0.10188873391598463,Class2 -0.12121167085592083,-1.0096730637392863,0.033277983384680826,-0.21896445814579069,1.4929274414779945,0.31988507805512784,0.10311638911612254,1.423528586506216,0.7220897717330016,-0.17841102821570767,1.915594576278611,1.100890829290774,0.754713355563581,0.945156421046704,0.44587802584283054,Class2 --1.2170223629215435,-1.2638859843997534,1.8781919555080224,-0.8597041090554862,-1.2407445276791607,1.3453096922216221,-0.42168493902515114,0.635405164612933,1.1013307958827023,-0.8910244798965671,-0.4149520333126227,-1.0946799038747914,0.027159094344824553,0.8493407568894327,0.05818396434187889,Class2 -1.9690999831047113,1.630692919376135,0.14400600759909346,-1.0108553880340139,-0.7553015426210175,0.8465077151788533,-1.7913155075025742,-0.08131932124474522,0.8018431146894096,0.4960517125484805,-1.1190721575687108,-0.6139342336019464,0.7606234978884459,0.05890214047394693,0.30819601379334927,Class2 --0.09634746072242162,-0.7031485010042652,-0.19545842840603292,-0.632184155699514,0.5344846209735555,1.456097594318122,0.15059288179605612,0.23790213630739288,2.154321024868364,0.07903017494709297,1.1586616105732923,2.323900528117938,-0.28843069775030017,0.04116127151064575,0.7790906010195613,Class2 --0.6885813304714811,-1.6346154370601127,-1.3192060168455315,-1.3469476040852582,-0.6870500708860594,1.0621072344040101,2.2717255655183775,0.08410439563653441,1.1936473156227543,-1.0305336984889761,1.1431584115809992,2.032592335027947,-0.9270720058120787,0.5821814839728177,0.09840119769796729,Class2 --1.4316678231759958,-1.0469346581977512,-0.3201110237893899,-1.2417901136436051,-0.012006259364476269,0.6383967126824441,0.41319238962516547,1.3826552625963546,0.208480418767692,0.48393151314472554,0.5513966108839632,0.20739651868655234,-0.7502469252794981,0.6124956447165459,0.3082101894542575,Class2 --0.11991031671571914,0.0805055517766079,-0.06682262052173224,0.6328866460072324,-0.7888338966292207,0.8584550011179324,-1.0075748723559923,1.0854195563987363,1.5064744384223205,0.19480890184131902,-0.32414336184136056,-0.40393812796478307,-0.50946542667225,0.41919077723287046,0.9047881765291095,Class2 --0.25306699983150216,-2.0002299472654794,1.7402043774635878,0.013694493257783722,-0.00880453142217658,-0.4643799814520271,1.053719456376288,-0.3723175503614848,-1.5630067225995983,0.2714833141087849,0.781567696641147,-0.2190153307229781,-0.40987476985901594,0.7987668255809695,0.38418634003028274,Class2 --0.27448942510171437,0.9083235890365534,-1.0243433485263613,-0.6414223623305386,-1.9527555485918051,-2.7716507030651836,-1.2760148913100235,0.6156071460353246,0.784598711007815,0.8474033407860608,0.13117998094372568,0.12100045111558963,-0.4942399086430669,0.09532732958905399,0.5603955462574959,Class2 -0.6028305527645961,-0.7211184146435301,-0.04853957138035013,-0.5853337462261773,0.9700359483889077,0.532118920692602,-0.9684842453529886,-2.5400078485643185,1.658227822130169,-0.09857857835718672,0.26803701196392504,-0.6633965491209611,0.8419418074190617,0.6593876632396132,0.293605450540781,Class2 -0.43574999467626685,2.0699815691926746,0.1101041115251155,0.24203834423372497,-0.47955626321597117,-2.6675750095614474,-0.35351140707513434,-0.7196507780939609,-1.4793425043868982,0.33666015732709886,1.5327522315840811,-0.23998006307178957,-0.622468760702759,0.12553971889428794,0.8344445575494319,Class2 --0.5580551694211078,0.02377816324244625,2.12840727017312,-0.6093846807006423,0.6761170215725527,-1.5479893262286937,0.8547352913508284,0.7918238974043845,0.9735227106438202,-0.4414398489667678,0.1511812135701475,-0.4232723279723773,0.31343492679297924,0.17705128411762416,0.007213236065581441,Class2 -0.6640550312608613,0.9297764756842264,-0.9356395155660878,1.9576626426035317,0.7819738840839887,-1.924245666301327,1.7634812890673441,0.22648340194988195,-0.5471849667386778,-0.334007336881892,0.1550514508908443,1.6492327700745566,-0.028052001725882292,0.6935124918818474,0.14846808090806007,Class2 -0.6469553663806534,1.1317119089440009,0.8737324516432213,0.4420400004290958,0.7519357598397672,0.08582963098518752,-0.5793750077126975,-0.5421817227663325,-3.0813844097325833,-1.2322051649089993,-2.4227532450640576,1.6814713760324873,-0.18798660533502698,0.6998975079040974,0.5252704706508666,Class2 --1.8560772450141128,0.8392370444358368,-1.2929970448517334,-1.3485277607569681,1.4532468847502986,0.628243975566274,-0.33293436521985115,0.5704526160227895,0.9577843173825039,-0.441792251933458,0.30296702183551244,1.4302206914221378,-0.4376934189349413,0.24359494261443615,0.7504561480600387,Class2 --1.255672088775569,-0.530118163985821,2.4049413494852607,-0.2386565072073102,-0.6339002460509118,1.4705534167173926,0.11349301536982101,0.5735323925288791,1.4011682993117849,0.27606344951722767,0.654859855712751,-1.7668343641841302,0.98975335992872715,0.7126398156397045,0.8563637980259955,Class2 -2.222947399685897,1.440727862052595,-0.1966907676792796,0.17271193976974353,-1.8785552140640631,-0.9617030752906602,0.5546545518961223,0.3580286360663811,-1.0882234393175687,-0.2922600203615863,0.6102742602557641,1.102634299233389,-0.9445006595924497,0.2462087469175458,0.9077574810944498,Class2 --1.0954649530315608,-0.9713898938179596,0.9729386515638633,-0.33538619444638057,-2.217896169550742,0.43713306595579404,0.5045422563537849,-0.5948601367933639,0.32238060846099514,0.6104968435102145,-0.3173344973843924,-0.19890304546384538,0.17528430372476578,0.9982335194945335,0.9638920214492828,Class2 --0.6291766433698494,-1.4693055381751103,0.9883703935861151,-2.360283449568869,-0.922896883110931,-0.47649669886384105,0.34243003052447574,0.4639361794377731,0.9474422761308948,-1.2855464555821121,0.18733697490877052,-0.08188558460413745,0.16844271076843143,0.5062629643362015,0.0876090235542506,Class2 --0.8212359062304435,-2.6076878536059738,-0.40768751205497,1.0840199357784845,2.148147921835111,-0.45087404082012095,-0.24696906768027396,0.3655822382246992,-2.021634140163027,-1.8185714951840795,1.0108211623714367,1.4419221844889132,0.30498601496219635,0.8111195138189942,0.910540115321055,Class2 -0.030207596811144266,-1.586316874563578,-0.06797114857436758,0.33295278138819834,-0.5412068228840327,0.9701461252242948,0.35619628448646445,-1.212537411207599,0.0881299831931325,1.1016296621808879,-0.3613818687465298,-1.6854673642105247,0.06090885028243065,0.1561812418513,0.9578888136893511,Class2 -1.6279707507425025,-0.04111916146504413,-0.3515174709415353,-0.20687104702722622,-0.33464112394334145,1.0651094234675302,-0.9038458441651045,-0.6647561637158077,-1.1776505755000723,-0.5308304955985981,-0.12985959992485716,-1.626909735651001,-0.19998923782259226,0.19466231809929013,0.04555561626330018,Class2 -0.7521173304744568,1.3220420040792074,-0.5789413440085802,-0.5207216473358635,-0.30692732099330183,1.5410096117563008,1.148907640422735,-1.013895555959263,-0.8062794903759601,0.5071099840924113,-0.9924353622302512,2.0060852067868837,0.7067012786865234,0.36651862133294344,0.47359425760805607,Class2 --0.9527052715396104,-1.361758338962717,1.5447900002541124,-0.1619065502310342,0.9470237697939531,0.6587176055744167,1.1383153817507439,-0.26641536566370866,0.5562190948838375,-1.8394674671752587,1.1563783561168899,1.0237170254674146,0.6624032282270491,0.5732871373184025,0.9516165060922503,Class2 --1.5589598008963585,-1.1981917691498147,0.6772221065949968,1.0806025647554587,-0.17084561965081158,-0.8672731742712471,-2.0063851665462344,0.4480691651884542,-1.505966562661957,-0.4451934284747769,-0.9259405729883179,-0.018921605001006793,-0.014782315585762262,0.684742488199845,0.6856713031884283,Class2 -1.2337682969219623,0.24014144340451188,-0.7143179647039388,1.159706160392989,-1.292862059105576,-0.9438633353967604,1.0907629153228613,-0.4294465036125732,0.3831390423440324,-1.010665912984419,1.0937072904371268,-0.7837563780078826,0.24777352530509233,0.5121911456808448,0.4022976541891694,Class2 --2.6574354580119515,-1.7241102614379236,-2.800437843399564,0.9921649909067046,1.5400751446497745,0.8291400806369511,0.17564342520316958,-1.8352967816964671,-0.7783415728423765,0.38606341636234015,0.9226945132990474,1.117616949129069,-0.5300149563699961,0.4261558169964701,0.9684239833150059,Class2 --2.2347003040212643,-1.425321547270822,-0.2889241345366349,-0.9351451905349144,-0.02050083701162229,-0.07288182662429207,0.26109919249906455,0.4469763600903287,-1.0386361761555245,-0.595559956548403,-1.663014253701603,-0.9147247226159453,-0.32381413597613573,0.7990735147614032,0.5501713091507554,Class2 --2.2517117532569864,-0.6896722917088662,1.4756152699698937,-1.0083242262879084,-1.1895102198382703,0.02506871773813359,-0.9240610351379733,0.6426070569621359,-1.5066186879084291,0.40152355672019574,1.806517305185331,-0.0031181636584895387,-0.34837699914351106,0.027161465492099524,0.2441715570166707,Class2 -0.6570095580639606,-0.4707961426031142,0.5708596993623403,1.4249477733931224,-1.561913650863787,0.3536376639564857,0.008597193050600806,-1.6593654403222853,0.6547081876687274,-3.258261319430832,-0.9425588446201298,-0.5994464179731919,0.9423576011322439,0.7496294870506972,0.5971482379827648,Class2 --3.3994287537798615,-3.183455180146136,-0.6090690043552504,-0.5275897419866215,-0.7982253880211624,-0.5799360130265588,-0.2169926816358293,-0.8141364739640496,1.3680560462417666,-0.13310738955783755,0.5889496814043346,-0.42252020606955,0.731920822057873,0.5218160629738122,0.6321376916021109,Class2 -0.14381222225831242,-0.667065636956669,0.4380618511066192,1.419009859452638,-0.642519393668057,0.2018712656374661,-0.8152493803373825,-1.6993757278045283,0.5256161081799843,-0.8617864235735101,0.5400622147457423,-0.9421754914737531,0.5701627060770988,0.012870596023276448,0.45084145525470376,Class2 -0.0805853548338219,1.4759369793802306,0.2058458283030493,-0.005574738952699169,1.0498450832025978,0.38038736390673655,0.32201160638376963,0.09249783430115975,-0.4297004811926969,1.542453590083271,-1.715213547854569,-0.9636295865824289,0.004408928565680981,0.18575192405842245,0.7202965228352696,Class2 --2.28599054321857,-0.9200758106202991,0.04836815696090922,-0.454230457482677,0.5164497247533402,1.6693752814731524,-1.5219400170177826,0.5399270648727581,-0.5797229099248236,0.14961276146620944,1.6214066156029117,1.3847822021288505,-0.8232066826894879,0.48912463686428964,0.3867708989419043,Class2 -0.8186184827794389,-2.0597875294354258,1.631878152058605,0.05583336703412863,0.7690567529540993,0.9196646673377961,1.9234674975159607,-0.18226082972410051,0.2396033536990574,-0.5582933020642592,0.15757259058819448,-0.4865978191179029,0.46828447422012687,0.18273416627198458,0.7470398428849876,Class2 --0.5829646203852985,-1.6655778258864895,0.3005805580858647,-1.076222792136679,-1.3910553898417475,-1.0105049153057306,1.087771855438358,-0.19797529741333034,0.6941971763727844,-0.12184594824042247,1.253954481671404,-1.5968400270068122,-0.967227072454989,0.21772058680653572,0.9383473990019411,Class2 --2.9428623666791274,-2.1988163145455597,0.2558260078715297,-0.39727413019658614,-1.7534349707557726,1.5312457884266955,0.6895107946843463,-1.0374784699139017,-1.233633552703207,-0.7752456369257227,0.3247585689791058,0.49658652169048606,-0.5967224780470133,0.2598613139707595,0.4635926936753094,Class2 -1.134506331775666,-0.9597475719918253,-1.6922273560961698,1.19922628712918,-0.4219414350051616,1.009252718067007,-1.9886352124617082,0.41099268683519136,0.34919404846027086,1.626143077343392,-0.6184987995697971,-0.29173303921502103,0.960332312155515,0.5149238707963377,0.5773657574318349,Class1 -0.013162750137554857,0.8704974893236226,0.6171691276341635,0.287391031500109,0.921075890788046,-2.1000101344046938,0.6541485326503783,-0.010076527823974476,0.5528580924908767,0.27945049606721634,-1.7427437151897762,-1.608935675478922,-0.8811864904128015,0.09573980211280286,0.15774108190089464,Class2 -1.5268468959364792,-0.2662448746049938,-1.1946829816079076,1.6005413088186258,0.4360738664950311,-0.40686962916903235,-1.4786305523954741,0.6571478333074675,0.8178973739784667,-0.6088066718614282,-1.5358210293727355,0.7405963938129665,-0.6298410929739475,0.83714295970276,0.5127516612410545,Class1 -0.8601833403730152,-0.20817180685340175,0.14018577052599002,-1.1388284941643145,-0.06785097174908865,-0.6765535634574175,-1.2480223304590041,-0.21854158503922289,1.7537997609428817,0.49704275285124916,-1.2158347511519934,-0.9187960369620342,-0.8701421767473221,0.40269292308948934,0.5762199277523905,Class2 -0.1452412488785572,-0.35130607469367336,-0.7498563901365165,-0.04899014983208803,2.0135463992976788,0.4049247428638121,1.0329847334378732,0.16664911377838362,0.43149672376065534,0.6382025830382304,0.09835688005398956,-0.31815741264799097,0.11408664984628558,0.8147547186817974,0.21873644925653934,Class2 --2.527390366890416,-1.232506570291386,1.6305894104959762,1.8761679903212354,0.8854934729862077,-1.0021774916120754,0.19706870601796764,-0.11274152224103055,-0.15607338838591994,0.9717212060132264,0.2475125736369208,-0.49983254401962823,-0.4268786138854921,0.131384224165231,0.6888915153685957,Class2 -0.10063389725246175,1.5495027193011013,-0.9679502923027925,0.897994112074635,0.243174760132842,1.1569089200208875,0.4212841040818851,7.231777277306902e-4,-1.1042596095535266,-1.2040862454206163,-0.15677197604934806,-0.6197549892552053,-0.994200199842453,0.9263048353604972,0.8813572567887604,Class2 -0.7798267373937162,0.9124007216660996,0.2439077219395489,0.29061998484890705,-0.8224954655328057,1.2523136374147192,0.2503011404550362,1.700437058512977,1.129534150408502,-0.3807949604340405,0.682996969647329,-0.3272166335198171,-0.8414498888887465,0.2934264987707138,0.2447861353866756,Class2 --0.005493514434321445,-0.7616864381307196,0.05938008104598514,0.4335654337862178,-1.2736956737570793,0.8804384377262672,0.28597014833297907,0.6106518118563016,0.09085572235601685,-1.3080628628460786,-0.8352037327520418,0.21413904515247792,-0.26486588176339865,0.9694082424975932,0.3205572778824717,Class2 --1.5119097325259743,-0.9192634897401115,0.7385924962331236,0.09936793753886097,-1.3360374236069483,-1.5579059876102077,-0.08794150229852338,0.03301104587796925,1.1249285636983999,1.068540558738945,0.10417565804595412,0.21535669920013248,-0.6677571721374989,0.4348795001860708,0.9511518587823957,Class2 -2.0206594481066724,2.0232299488858305,0.6226626189476113,0.6304671052151127,-1.1699296818637606,1.2638019769563085,-0.7423748014205981,2.04644796036052,0.84627982994611894,-0.17845120042725332,-0.5767703240264878,-0.010790522506836538,-0.9978649327531457,0.17379020131193101,0.7455487898550928,Class2 -1.981876938727233,3.520953388161437,-1.1461345441670208,-1.5494487804480992,-1.585715947556146,-2.235575080278794,-0.29190053429662893,0.2001028876699312,-0.38707709217230696,-0.3928956522624342,-0.10108464447755078,-1.3799403999724535,0.5526364846155047,0.4141515747178346,0.9632538552395999,Class2 -1.679280405949425,2.753883735786164,0.3651141823133446,-0.20416043120981775,-0.09008361885273418,-2.587064937565455,0.5346916537067524,-0.0990622934511104,0.41377165437224644,1.6392816900035108,-0.15977006050937073,-0.42308867046622495,-0.16729392809793353,0.9982603688258678,0.9724473266396672,Class2 -0.023241622930822103,-0.10663778207728963,3.419394188291463,1.2260433087017537,-0.567886776313794,-0.9800592916379487,-0.15006910464960915,-0.13514012306595602,-0.17669101976118726,-1.5322178563788016,0.09161155274866363,-0.24195337142704837,-0.8311509066261351,0.31117416452616453,0.2109355479478836,Class2 --2.117935269655817,-1.5397698677762337,-0.8525497617974134,-1.1585934542845937,-0.1880370980610111,-1.4160704656573342,-0.6395867569722609,1.8640587058662594,-2.6199310868843506,0.6947382344264658,-1.9018853456189686,0.19690734092524548,-0.9438408175483346,0.6704854171257466,0.5313638267107308,Class2 -1.619135308248942,1.131918601833243,0.7757969777904627,1.3421676891415257,0.02540369307614207,-0.22850136350523367,1.7376378523382534,-0.39761210176556483,0.6283209045335861,0.0026613573332815764,1.3708448978685859,0.8900437729105132,0.6862114169634879,0.31379894469864666,0.8392919315956533,Class2 -1.8693109668646137,0.9594778149848158,1.0940006277739038,-0.08354517087476425,-1.8615607276715354,-0.2712258025472607,0.6374145001759404,0.8779651169112235,-0.42788285074310306,-0.9292204881543268,-0.9685437262109113,0.10757688187597483,0.898422678001225,0.9564175948034972,0.11296938522718847,Class2 --0.30445746060419676,-0.9173912578304984,0.24381160333582017,-0.6710733201168542,-1.4284179219489836,0.8690747203578507,-0.7001397047596338,1.4757909625742875,2.155730254960066,-1.4392118360609616,1.9896378323040371,0.15296988971113987,-0.4449275638908148,0.8647819920442998,0.448086733231321,Class2 --3.8095902025311665,-2.5887335603247235,1.1001899449408743,2.1054440498923275,-0.07902501578649539,-0.5951030613142202,0.8663317489987227,-0.8668415115562593,1.2040007463671727,-0.49994262888695273,1.7629888052989493,0.8978691675000907,0.8995285253040493,0.8700180184096098,0.48601303715258837,Class2 -1.3581917599379605,0.8301097444449603,1.16867751059483,-1.18285633138835,0.8585203947648354,0.08987783977300644,-0.5373933196950806,0.16357540177243346,-0.7438632995020404,-0.4123403815228725,-0.9430595125865863,0.35694848706990845,-0.2822151146829128,0.31962386844679713,0.7694082900416106,Class2 --0.7856770843986408,-2.606584565700029,0.19395129904211605,1.1683790447956175,1.3861300683323428,-2.801614487363698,-0.25200545961291965,-1.176628711686243,-0.9784587036071887,0.060221180299886316,-0.23911111339382954,0.47988584850132965,-0.263866423163563,0.3047561729326844,0.09673858573660254,Class2 --1.7940857113750197,-0.4765339850518534,-0.06738077139777233,-1.2340370661587043,0.1921468533479452,0.014408914234706702,-1.4937183892492565,-0.55290574319823,-0.9454307108154871,0.41456904174762643,0.45721819149996973,0.9378386157066317,-0.5823189355432987,0.8663095913361758,0.5710067618638277,Class2 --1.6082657237947666,-0.6422774835492356,0.7260204828228995,0.22574489574025922,-1.4134057777611027,1.5087945708749533,-1.343864328791737,-0.08827402738960803,-0.3759269314134873,1.152165064648134,1.0553131941927785,-1.3561838533701775,0.9455575156025589,0.42153298598714173,0.07186803733929992,Class2 -1.2538391004467015,2.438982910791764,0.5298946671196125,-1.5788775779597337,1.121891000171373,2.080488480032546,-0.6818040976500568,-1.9762417318379983,-0.3103546634076312,-0.4550760533668272,0.09957841167318736,-1.0808457196330368,0.3848026394844055,0.7285070337820798,0.18168203439563513,Class2 -0.5050569713182786,0.27417984085027275,0.9457189715556877,-0.3059167673132222,0.537288360025558,0.003567136889379853,0.9374661238127805,1.2857635054977112,-0.5707205325201214,-0.737307310591596,1.324935084597814,0.027652852250902513,0.12336835358291864,0.05876705702394247,0.7646801054943353,Class2 -1.214498280271028,0.6175699464270653,-0.9026741747207927,0.04226705220618142,1.0559494668049985,0.885763518838975,0.8362475510318991,1.5233578376002717,-0.21014332454241352,-0.6942390887697788,1.2556274514843533,-1.0888338192552853,-0.012232711538672447,0.09509074757806957,0.9130503875203431,Class2 -1.15670705108069,1.2060579500369675,-0.9639694422523936,-1.7033483572761474,-0.0943253133511828,0.33405926773501077,-0.7270734894976403,-0.2509978561574518,-1.055238971592251,-1.543193615805953,1.306298930763597,1.5806030431003775,-0.812762375921011,0.8040552726015449,0.06667491304688156,Class2 -0.5639301398781006,-0.3164129154376294,1.503506666014819,0.3414170135306054,-0.5320124231939761,-0.5084412326274382,-1.0845243696909013,-0.7022475893907657,-0.4463779558213398,-0.13463875667143296,1.6333716601697454,1.333015751628017,-0.3349782843142748,0.1126699298620224,0.1594905259553343,Class2 -0.5098378094865919,1.0743560983625,0.48494056617474157,1.8948077247812338,-1.1032719241765208,-1.4476713426704713,0.2970190084130661,0.8742589140974543,-0.7300752411201558,1.0887296662978094,0.23169068389191294,-0.028817834355438426,0.8894895217381418,0.8401702695991844,0.42790558794513345,Class2 --0.8423597881260761,-0.6712459800225824,-0.20563457123613688,1.2969165544437364,0.7632754891616945,-1.5703256931567655,-0.3673916441667838,-0.5581603815314222,-0.5121032593224323,0.25075369092332517,-0.37562192443674147,0.5518826062412917,0.682043984066695,0.5883499393239617,0.06130298878997564,Class2 -0.23339263518595857,0.16817003107671333,-0.8678688983773238,-2.2023565795148907,-0.19055438345368161,2.955756371778217,1.4239420389832416,0.9897103729905243,-0.09622975743691542,0.18272414990826094,-0.8654308150575761,-1.0592253954869102,0.3972074771299958,0.1770533414091915,0.2835110912565142,Class2 -0.7794950946809491,-1.18871383213305,-0.8736203607356187,0.2505229787444498,-0.48536231045369316,-0.718636016413743,0.31109044624338555,-0.6284598013788353,-0.6096081972947855,2.1476317534552307,-0.14298700054317764,1.0549123092564519,-0.015734254382550716,0.3782092861365527,0.909939551493153,Class2 --1.4420751628332908,-1.032741233224733,0.30503165274561345,-0.9083957454675128,0.9867493653392115,1.0947976551515293,-0.11596773530194053,-0.47282396011941236,-0.31144693622116904,1.0402666207920481,-1.0431011077929784,1.0542095465615369,-0.01831468055024743,0.49082601768895984,0.67298892326653,Class2 --0.6082198989885387,0.37295684306528665,0.30958223239298377,-0.7584767602379116,0.27264887435841745,-0.688090444369507,-1.47731036468623,-1.2615693159210262,-0.2030752572347349,-0.1821435330555523,0.2541130756404603,0.5374117400243009,0.8650653520599008,0.37395120575092733,0.14334525191225111,Class2 --0.8918751937973272,0.5071919253221119,0.15892585313024027,0.16914792317031593,-0.6798853778239268,1.1787458956070944,0.17236986927700407,0.01560839903814206,-0.9239460252890304,-0.0803598172500133,0.17639715876367829,-0.9093830311004132,0.26735333167016506,0.326038182945922,0.5928803430870175,Class2 diff --git a/static/fonts/lato-v16-latin-300.eot b/static/fonts/lato-v16-latin-300.eot deleted file mode 100644 index 78dbe12d..00000000 Binary files a/static/fonts/lato-v16-latin-300.eot and /dev/null differ diff --git a/static/fonts/lato-v16-latin-300.svg b/static/fonts/lato-v16-latin-300.svg deleted file mode 100644 index 11b626f8..00000000 --- a/static/fonts/lato-v16-latin-300.svg +++ /dev/null @@ -1,435 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/static/fonts/lato-v16-latin-300.ttf b/static/fonts/lato-v16-latin-300.ttf deleted file mode 100644 index 45f7947e..00000000 Binary files a/static/fonts/lato-v16-latin-300.ttf and /dev/null differ diff --git a/static/fonts/lato-v16-latin-300.woff b/static/fonts/lato-v16-latin-300.woff deleted file mode 100644 index a18ddb12..00000000 Binary files a/static/fonts/lato-v16-latin-300.woff and /dev/null differ diff --git a/static/fonts/lato-v16-latin-300.woff2 b/static/fonts/lato-v16-latin-300.woff2 deleted file mode 100644 index a7f756c4..00000000 Binary files a/static/fonts/lato-v16-latin-300.woff2 and /dev/null differ diff --git a/static/fonts/lato-v16-latin-700.eot b/static/fonts/lato-v16-latin-700.eot deleted file mode 100644 index 0d9dac2f..00000000 Binary files a/static/fonts/lato-v16-latin-700.eot and /dev/null differ diff --git a/static/fonts/lato-v16-latin-700.svg b/static/fonts/lato-v16-latin-700.svg deleted file mode 100644 index 077653d2..00000000 --- a/static/fonts/lato-v16-latin-700.svg +++ /dev/null @@ -1,438 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/static/fonts/lato-v16-latin-700.ttf b/static/fonts/lato-v16-latin-700.ttf deleted file mode 100644 index 4f3d8448..00000000 Binary files a/static/fonts/lato-v16-latin-700.ttf and /dev/null differ diff --git a/static/fonts/lato-v16-latin-700.woff b/static/fonts/lato-v16-latin-700.woff deleted file mode 100644 index abf0196d..00000000 Binary files a/static/fonts/lato-v16-latin-700.woff and /dev/null differ diff --git a/static/fonts/lato-v16-latin-700.woff2 b/static/fonts/lato-v16-latin-700.woff2 deleted file mode 100644 index f8c37ecd..00000000 Binary files a/static/fonts/lato-v16-latin-700.woff2 and /dev/null differ diff --git a/static/fonts/lato-v16-latin-italic.eot b/static/fonts/lato-v16-latin-italic.eot deleted file mode 100644 index 8e32803d..00000000 Binary files a/static/fonts/lato-v16-latin-italic.eot and /dev/null differ diff --git a/static/fonts/lato-v16-latin-italic.svg b/static/fonts/lato-v16-latin-italic.svg deleted file mode 100644 index e288645b..00000000 --- a/static/fonts/lato-v16-latin-italic.svg +++ /dev/null @@ -1,450 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/static/fonts/lato-v16-latin-italic.ttf b/static/fonts/lato-v16-latin-italic.ttf deleted file mode 100644 index cf3da8be..00000000 Binary files a/static/fonts/lato-v16-latin-italic.ttf and /dev/null differ diff --git a/static/fonts/lato-v16-latin-italic.woff b/static/fonts/lato-v16-latin-italic.woff deleted file mode 100644 index 95251da0..00000000 Binary files a/static/fonts/lato-v16-latin-italic.woff and /dev/null differ diff --git a/static/fonts/lato-v16-latin-italic.woff2 b/static/fonts/lato-v16-latin-italic.woff2 deleted file mode 100644 index 3246c128..00000000 Binary files a/static/fonts/lato-v16-latin-italic.woff2 and /dev/null differ diff --git a/static/fonts/lato-v16-latin-regular.eot b/static/fonts/lato-v16-latin-regular.eot deleted file mode 100644 index c6413069..00000000 Binary files a/static/fonts/lato-v16-latin-regular.eot and /dev/null differ diff --git a/static/fonts/lato-v16-latin-regular.svg b/static/fonts/lato-v16-latin-regular.svg deleted file mode 100644 index 55b43fb8..00000000 --- a/static/fonts/lato-v16-latin-regular.svg +++ /dev/null @@ -1,435 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/static/fonts/lato-v16-latin-regular.ttf b/static/fonts/lato-v16-latin-regular.ttf deleted file mode 100644 index 3c2d417e..00000000 Binary files a/static/fonts/lato-v16-latin-regular.ttf and /dev/null differ diff --git a/static/fonts/lato-v16-latin-regular.woff b/static/fonts/lato-v16-latin-regular.woff deleted file mode 100644 index 189a0feb..00000000 Binary files a/static/fonts/lato-v16-latin-regular.woff and /dev/null differ diff --git a/static/fonts/lato-v16-latin-regular.woff2 b/static/fonts/lato-v16-latin-regular.woff2 deleted file mode 100644 index 6904b664..00000000 Binary files a/static/fonts/lato-v16-latin-regular.woff2 and /dev/null differ diff --git a/static/js/tm.js b/static/js/tm.js deleted file mode 100644 index 416f3818..00000000 --- a/static/js/tm.js +++ /dev/null @@ -1,47 +0,0 @@ -/* Clipboard --------------------------*/ -/* This copy-paste button (with tooltips) requires ClipboardJS, JQuery, & Bootstrap JS, linked in head_includes.html */ - -$(document).ready(function() { - - function changeTooltipMessage(element, msg) { - var tooltipOriginalTitle=element.getAttribute('data-original-title'); - element.setAttribute('data-original-title', msg); - $(element).tooltip('show'); - element.setAttribute('data-original-title', tooltipOriginalTitle); - } - - if(ClipboardJS.isSupported()) { - $(document).ready(function() { - var copyButton = ""; - - $(".highlight > pre").addClass("hasCopyButton"); - - // Insert copy buttons: - $(copyButton).prependTo(".hasCopyButton"); - - // Initialize tooltips: - $('.btn-copy-ex').tooltip({container: 'body'}); - - // Initialize clipboard: - var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { - text: function(trigger) { - return trigger.parentNode.textContent; - } - }); - - clipboardBtnCopies.on('success', function(e) { - changeTooltipMessage(e.trigger, 'Copied!'); - e.clearSelection(); - }); - - clipboardBtnCopies.on('error', function() { - changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); - }); - }); - } -}) - - - - - diff --git a/static/rmarkdown-libs/crosstalk/css/crosstalk.css b/static/rmarkdown-libs/crosstalk/css/crosstalk.css deleted file mode 100644 index 46befd2e..00000000 --- a/static/rmarkdown-libs/crosstalk/css/crosstalk.css +++ /dev/null @@ -1,27 +0,0 @@ -/* Adjust margins outwards, so column contents line up with the edges of the - parent of container-fluid. */ -.container-fluid.crosstalk-bscols { - margin-left: -30px; - margin-right: -30px; - white-space: normal; -} - -/* But don't adjust the margins outwards if we're directly under the body, - i.e. we were the top-level of something at the console. */ -body > .container-fluid.crosstalk-bscols { - margin-left: auto; - margin-right: auto; -} - -.crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column { - display: inline-block; - padding-right: 12px; - vertical-align: top; -} - -@media only screen and (max-width:480px) { - .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column { - display: block; - padding-right: inherit; - } -} diff --git a/static/rmarkdown-libs/crosstalk/js/crosstalk.js b/static/rmarkdown-libs/crosstalk/js/crosstalk.js deleted file mode 100644 index fd9eb53d..00000000 --- a/static/rmarkdown-libs/crosstalk/js/crosstalk.js +++ /dev/null @@ -1,1474 +0,0 @@ -(function(){function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o b) { - return 1; - } -} - -/** - * @private - */ - -var FilterSet = function () { - function FilterSet() { - _classCallCheck(this, FilterSet); - - this.reset(); - } - - _createClass(FilterSet, [{ - key: "reset", - value: function reset() { - // Key: handle ID, Value: array of selected keys, or null - this._handles = {}; - // Key: key string, Value: count of handles that include it - this._keys = {}; - this._value = null; - this._activeHandles = 0; - } - }, { - key: "update", - value: function update(handleId, keys) { - if (keys !== null) { - keys = keys.slice(0); // clone before sorting - keys.sort(naturalComparator); - } - - var _diffSortedLists = (0, _util.diffSortedLists)(this._handles[handleId], keys), - added = _diffSortedLists.added, - removed = _diffSortedLists.removed; - - this._handles[handleId] = keys; - - for (var i = 0; i < added.length; i++) { - this._keys[added[i]] = (this._keys[added[i]] || 0) + 1; - } - for (var _i = 0; _i < removed.length; _i++) { - this._keys[removed[_i]]--; - } - - this._updateValue(keys); - } - - /** - * @param {string[]} keys Sorted array of strings that indicate - * a superset of possible keys. - * @private - */ - - }, { - key: "_updateValue", - value: function _updateValue() { - var keys = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : this._allKeys; - - var handleCount = Object.keys(this._handles).length; - if (handleCount === 0) { - this._value = null; - } else { - this._value = []; - for (var i = 0; i < keys.length; i++) { - var count = this._keys[keys[i]]; - if (count === handleCount) { - this._value.push(keys[i]); - } - } - } - } - }, { - key: "clear", - value: function clear(handleId) { - if (typeof this._handles[handleId] === "undefined") { - return; - } - - var keys = this._handles[handleId]; - if (!keys) { - keys = []; - } - - for (var i = 0; i < keys.length; i++) { - this._keys[keys[i]]--; - } - delete this._handles[handleId]; - - this._updateValue(); - } - }, { - key: "value", - get: function get() { - return this._value; - } - }, { - key: "_allKeys", - get: function get() { - var allKeys = Object.keys(this._keys); - allKeys.sort(naturalComparator); - return allKeys; - } - }]); - - return FilterSet; -}(); - -exports.default = FilterSet; - -},{"./util":11}],4:[function(require,module,exports){ -(function (global){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); - -var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); - -var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; - -exports.default = group; - -var _var2 = require("./var"); - -var _var3 = _interopRequireDefault(_var2); - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -// Use a global so that multiple copies of crosstalk.js can be loaded and still -// have groups behave as singletons across all copies. -global.__crosstalk_groups = global.__crosstalk_groups || {}; -var groups = global.__crosstalk_groups; - -function group(groupName) { - if (groupName && typeof groupName === "string") { - if (!groups.hasOwnProperty(groupName)) { - groups[groupName] = new Group(groupName); - } - return groups[groupName]; - } else if ((typeof groupName === "undefined" ? "undefined" : _typeof(groupName)) === "object" && groupName._vars && groupName.var) { - // Appears to already be a group object - return groupName; - } else if (Array.isArray(groupName) && groupName.length == 1 && typeof groupName[0] === "string") { - return group(groupName[0]); - } else { - throw new Error("Invalid groupName argument"); - } -} - -var Group = function () { - function Group(name) { - _classCallCheck(this, Group); - - this.name = name; - this._vars = {}; - } - - _createClass(Group, [{ - key: "var", - value: function _var(name) { - if (!name || typeof name !== "string") { - throw new Error("Invalid var name"); - } - - if (!this._vars.hasOwnProperty(name)) this._vars[name] = new _var3.default(this, name); - return this._vars[name]; - } - }, { - key: "has", - value: function has(name) { - if (!name || typeof name !== "string") { - throw new Error("Invalid var name"); - } - - return this._vars.hasOwnProperty(name); - } - }]); - - return Group; -}(); - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./var":12}],5:[function(require,module,exports){ -(function (global){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); - -var _group = require("./group"); - -var _group2 = _interopRequireDefault(_group); - -var _selection = require("./selection"); - -var _filter = require("./filter"); - -var _input = require("./input"); - -require("./input_selectize"); - -require("./input_checkboxgroup"); - -require("./input_slider"); - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -var defaultGroup = (0, _group2.default)("default"); - -function var_(name) { - return defaultGroup.var(name); -} - -function has(name) { - return defaultGroup.has(name); -} - -if (global.Shiny) { - global.Shiny.addCustomMessageHandler("update-client-value", function (message) { - if (typeof message.group === "string") { - (0, _group2.default)(message.group).var(message.name).set(message.value); - } else { - var_(message.name).set(message.value); - } - }); -} - -var crosstalk = { - group: _group2.default, - var: var_, - has: has, - SelectionHandle: _selection.SelectionHandle, - FilterHandle: _filter.FilterHandle, - bind: _input.bind -}; - -/** - * @namespace crosstalk - */ -exports.default = crosstalk; - -global.crosstalk = crosstalk; - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./filter":2,"./group":4,"./input":6,"./input_checkboxgroup":7,"./input_selectize":8,"./input_slider":9,"./selection":10}],6:[function(require,module,exports){ -(function (global){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); -exports.register = register; -exports.bind = bind; -var $ = global.jQuery; - -var bindings = {}; - -function register(reg) { - bindings[reg.className] = reg; - if (global.document && global.document.readyState !== "complete") { - $(function () { - bind(); - }); - } else if (global.document) { - setTimeout(bind, 100); - } -} - -function bind() { - Object.keys(bindings).forEach(function (className) { - var binding = bindings[className]; - $("." + binding.className).not(".crosstalk-input-bound").each(function (i, el) { - bindInstance(binding, el); - }); - }); -} - -// Escape jQuery identifier -function $escape(val) { - return val.replace(/([!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~])/g, "\\$1"); -} - -function bindEl(el) { - var $el = $(el); - Object.keys(bindings).forEach(function (className) { - if ($el.hasClass(className) && !$el.hasClass("crosstalk-input-bound")) { - var binding = bindings[className]; - bindInstance(binding, el); - } - }); -} - -function bindInstance(binding, el) { - var jsonEl = $(el).find("script[type='application/json'][data-for='" + $escape(el.id) + "']"); - var data = JSON.parse(jsonEl[0].innerText); - - var instance = binding.factory(el, data); - $(el).data("crosstalk-instance", instance); - $(el).addClass("crosstalk-input-bound"); -} - -if (global.Shiny) { - var inputBinding = new global.Shiny.InputBinding(); - var _$ = global.jQuery; - _$.extend(inputBinding, { - find: function find(scope) { - return _$(scope).find(".crosstalk-input"); - }, - initialize: function initialize(el) { - if (!_$(el).hasClass("crosstalk-input-bound")) { - bindEl(el); - } - }, - getId: function getId(el) { - return el.id; - }, - getValue: function getValue(el) {}, - setValue: function setValue(el, value) {}, - receiveMessage: function receiveMessage(el, data) {}, - subscribe: function subscribe(el, callback) { - _$(el).data("crosstalk-instance").resume(); - }, - unsubscribe: function unsubscribe(el) { - _$(el).data("crosstalk-instance").suspend(); - } - }); - global.Shiny.inputBindings.register(inputBinding, "crosstalk.inputBinding"); -} - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{}],7:[function(require,module,exports){ -(function (global){ -"use strict"; - -var _input = require("./input"); - -var input = _interopRequireWildcard(_input); - -var _filter = require("./filter"); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -var $ = global.jQuery; - -input.register({ - className: "crosstalk-input-checkboxgroup", - - factory: function factory(el, data) { - /* - * map: {"groupA": ["keyA", "keyB", ...], ...} - * group: "ct-groupname" - */ - var ctHandle = new _filter.FilterHandle(data.group); - - var lastKnownKeys = void 0; - var $el = $(el); - $el.on("change", "input[type='checkbox']", function () { - var checked = $el.find("input[type='checkbox']:checked"); - if (checked.length === 0) { - lastKnownKeys = null; - ctHandle.clear(); - } else { - var keys = {}; - checked.each(function () { - data.map[this.value].forEach(function (key) { - keys[key] = true; - }); - }); - var keyArray = Object.keys(keys); - keyArray.sort(); - lastKnownKeys = keyArray; - ctHandle.set(keyArray); - } - }); - - return { - suspend: function suspend() { - ctHandle.clear(); - }, - resume: function resume() { - if (lastKnownKeys) ctHandle.set(lastKnownKeys); - } - }; - } -}); - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./filter":2,"./input":6}],8:[function(require,module,exports){ -(function (global){ -"use strict"; - -var _input = require("./input"); - -var input = _interopRequireWildcard(_input); - -var _util = require("./util"); - -var util = _interopRequireWildcard(_util); - -var _filter = require("./filter"); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -var $ = global.jQuery; - -input.register({ - className: "crosstalk-input-select", - - factory: function factory(el, data) { - /* - * items: {value: [...], label: [...]} - * map: {"groupA": ["keyA", "keyB", ...], ...} - * group: "ct-groupname" - */ - - var first = [{ value: "", label: "(All)" }]; - var items = util.dataframeToD3(data.items); - var opts = { - options: first.concat(items), - valueField: "value", - labelField: "label", - searchField: "label" - }; - - var select = $(el).find("select")[0]; - - var selectize = $(select).selectize(opts)[0].selectize; - - var ctHandle = new _filter.FilterHandle(data.group); - - var lastKnownKeys = void 0; - selectize.on("change", function () { - if (selectize.items.length === 0) { - lastKnownKeys = null; - ctHandle.clear(); - } else { - var keys = {}; - selectize.items.forEach(function (group) { - data.map[group].forEach(function (key) { - keys[key] = true; - }); - }); - var keyArray = Object.keys(keys); - keyArray.sort(); - lastKnownKeys = keyArray; - ctHandle.set(keyArray); - } - }); - - return { - suspend: function suspend() { - ctHandle.clear(); - }, - resume: function resume() { - if (lastKnownKeys) ctHandle.set(lastKnownKeys); - } - }; - } -}); - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./filter":2,"./input":6,"./util":11}],9:[function(require,module,exports){ -(function (global){ -"use strict"; - -var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }(); - -var _input = require("./input"); - -var input = _interopRequireWildcard(_input); - -var _filter = require("./filter"); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -var $ = global.jQuery; -var strftime = global.strftime; - -input.register({ - className: "crosstalk-input-slider", - - factory: function factory(el, data) { - /* - * map: {"groupA": ["keyA", "keyB", ...], ...} - * group: "ct-groupname" - */ - var ctHandle = new _filter.FilterHandle(data.group); - - var opts = {}; - var $el = $(el).find("input"); - var dataType = $el.data("data-type"); - var timeFormat = $el.data("time-format"); - var round = $el.data("round"); - var timeFormatter = void 0; - - // Set up formatting functions - if (dataType === "date") { - timeFormatter = strftime.utc(); - opts.prettify = function (num) { - return timeFormatter(timeFormat, new Date(num)); - }; - } else if (dataType === "datetime") { - var timezone = $el.data("timezone"); - if (timezone) timeFormatter = strftime.timezone(timezone);else timeFormatter = strftime; - - opts.prettify = function (num) { - return timeFormatter(timeFormat, new Date(num)); - }; - } else if (dataType === "number") { - if (typeof round !== "undefined") opts.prettify = function (num) { - var factor = Math.pow(10, round); - return Math.round(num * factor) / factor; - }; - } - - $el.ionRangeSlider(opts); - - function getValue() { - var result = $el.data("ionRangeSlider").result; - - // Function for converting numeric value from slider to appropriate type. - var convert = void 0; - var dataType = $el.data("data-type"); - if (dataType === "date") { - convert = function convert(val) { - return formatDateUTC(new Date(+val)); - }; - } else if (dataType === "datetime") { - convert = function convert(val) { - // Convert ms to s - return +val / 1000; - }; - } else { - convert = function convert(val) { - return +val; - }; - } - - if ($el.data("ionRangeSlider").options.type === "double") { - return [convert(result.from), convert(result.to)]; - } else { - return convert(result.from); - } - } - - var lastKnownKeys = null; - - $el.on("change.crosstalkSliderInput", function (event) { - if (!$el.data("updating") && !$el.data("animating")) { - var _getValue = getValue(), - _getValue2 = _slicedToArray(_getValue, 2), - from = _getValue2[0], - to = _getValue2[1]; - - var keys = []; - for (var i = 0; i < data.values.length; i++) { - var val = data.values[i]; - if (val >= from && val <= to) { - keys.push(data.keys[i]); - } - } - keys.sort(); - ctHandle.set(keys); - lastKnownKeys = keys; - } - }); - - // let $el = $(el); - // $el.on("change", "input[type="checkbox"]", function() { - // let checked = $el.find("input[type="checkbox"]:checked"); - // if (checked.length === 0) { - // ctHandle.clear(); - // } else { - // let keys = {}; - // checked.each(function() { - // data.map[this.value].forEach(function(key) { - // keys[key] = true; - // }); - // }); - // let keyArray = Object.keys(keys); - // keyArray.sort(); - // ctHandle.set(keyArray); - // } - // }); - - return { - suspend: function suspend() { - ctHandle.clear(); - }, - resume: function resume() { - if (lastKnownKeys) ctHandle.set(lastKnownKeys); - } - }; - } -}); - -// Convert a number to a string with leading zeros -function padZeros(n, digits) { - var str = n.toString(); - while (str.length < digits) { - str = "0" + str; - }return str; -} - -// Given a Date object, return a string in yyyy-mm-dd format, using the -// UTC date. This may be a day off from the date in the local time zone. -function formatDateUTC(date) { - if (date instanceof Date) { - return date.getUTCFullYear() + "-" + padZeros(date.getUTCMonth() + 1, 2) + "-" + padZeros(date.getUTCDate(), 2); - } else { - return null; - } -} - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./filter":2,"./input":6}],10:[function(require,module,exports){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); -exports.SelectionHandle = undefined; - -var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); - -var _events = require("./events"); - -var _events2 = _interopRequireDefault(_events); - -var _group = require("./group"); - -var _group2 = _interopRequireDefault(_group); - -var _util = require("./util"); - -var util = _interopRequireWildcard(_util); - -function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) newObj[key] = obj[key]; } } newObj.default = obj; return newObj; } } - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -/** - * Use this class to read and write (and listen for changes to) the selection - * for a Crosstalk group. This is intended to be used for linked brushing. - * - * If two (or more) `SelectionHandle` instances in the same webpage share the - * same group name, they will share the same state. Setting the selection using - * one `SelectionHandle` instance will result in the `value` property instantly - * changing across the others, and `"change"` event listeners on all instances - * (including the one that initiated the sending) will fire. - * - * @param {string} [group] - The name of the Crosstalk group, or if none, - * null or undefined (or any other falsy value). This can be changed later - * via the [SelectionHandle#setGroup](#setGroup) method. - * @param {Object} [extraInfo] - An object whose properties will be copied to - * the event object whenever an event is emitted. - */ -var SelectionHandle = exports.SelectionHandle = function () { - function SelectionHandle() { - var group = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; - var extraInfo = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null; - - _classCallCheck(this, SelectionHandle); - - this._eventRelay = new _events2.default(); - this._emitter = new util.SubscriptionTracker(this._eventRelay); - - // Name of the group we're currently tracking, if any. Can change over time. - this._group = null; - // The Var we're currently tracking, if any. Can change over time. - this._var = null; - // The event handler subscription we currently have on var.on("change"). - this._varOnChangeSub = null; - - this._extraInfo = util.extend({ sender: this }, extraInfo); - - this.setGroup(group); - } - - /** - * Changes the Crosstalk group membership of this SelectionHandle. The group - * being switched away from (if any) will not have its selection value - * modified as a result of calling `setGroup`, even if this handle was the - * most recent handle to set the selection of the group. - * - * The group being switched to (if any) will also not have its selection value - * modified as a result of calling `setGroup`. If you want to set the - * selection value of the new group, call `set` explicitly. - * - * @param {string} group - The name of the Crosstalk group, or null (or - * undefined) to clear the group. - */ - - - _createClass(SelectionHandle, [{ - key: "setGroup", - value: function setGroup(group) { - var _this = this; - - // If group is unchanged, do nothing - if (this._group === group) return; - // Treat null, undefined, and other falsy values the same - if (!this._group && !group) return; - - if (this._var) { - this._var.off("change", this._varOnChangeSub); - this._var = null; - this._varOnChangeSub = null; - } - - this._group = group; - - if (group) { - this._var = (0, _group2.default)(group).var("selection"); - var sub = this._var.on("change", function (e) { - _this._eventRelay.trigger("change", e, _this); - }); - this._varOnChangeSub = sub; - } - } - - /** - * Retrieves the current selection for the group represented by this - * `SelectionHandle`. - * - * - If no selection is active, then this value will be falsy. - * - If a selection is active, but no data points are selected, then this - * value will be an empty array. - * - If a selection is active, and data points are selected, then the keys - * of the selected data points will be present in the array. - */ - - }, { - key: "_mergeExtraInfo", - - - /** - * Combines the given `extraInfo` (if any) with the handle's default - * `_extraInfo` (if any). - * @private - */ - value: function _mergeExtraInfo(extraInfo) { - // Important incidental effect: shallow clone is returned - return util.extend({}, this._extraInfo ? this._extraInfo : null, extraInfo ? extraInfo : null); - } - - /** - * Overwrites the current selection for the group, and raises the `"change"` - * event among all of the group's '`SelectionHandle` instances (including - * this one). - * - * @fires SelectionHandle#change - * @param {string[]} selectedKeys - Falsy, empty array, or array of keys (see - * {@link SelectionHandle#value}). - * @param {Object} [extraInfo] - Extra properties to be included on the event - * object that's passed to listeners (in addition to any options that were - * passed into the `SelectionHandle` constructor). - */ - - }, { - key: "set", - value: function set(selectedKeys, extraInfo) { - if (this._var) this._var.set(selectedKeys, this._mergeExtraInfo(extraInfo)); - } - - /** - * Overwrites the current selection for the group, and raises the `"change"` - * event among all of the group's '`SelectionHandle` instances (including - * this one). - * - * @fires SelectionHandle#change - * @param {Object} [extraInfo] - Extra properties to be included on the event - * object that's passed to listeners (in addition to any that were passed - * into the `SelectionHandle` constructor). - */ - - }, { - key: "clear", - value: function clear(extraInfo) { - if (this._var) this.set(void 0, this._mergeExtraInfo(extraInfo)); - } - - /** - * Subscribes to events on this `SelectionHandle`. - * - * @param {string} eventType - Indicates the type of events to listen to. - * Currently, only `"change"` is supported. - * @param {SelectionHandle~listener} listener - The callback function that - * will be invoked when the event occurs. - * @return {string} - A token to pass to {@link SelectionHandle#off} to cancel - * this subscription. - */ - - }, { - key: "on", - value: function on(eventType, listener) { - return this._emitter.on(eventType, listener); - } - - /** - * Cancels event subscriptions created by {@link SelectionHandle#on}. - * - * @param {string} eventType - The type of event to unsubscribe. - * @param {string|SelectionHandle~listener} listener - Either the callback - * function previously passed into {@link SelectionHandle#on}, or the - * string that was returned from {@link SelectionHandle#on}. - */ - - }, { - key: "off", - value: function off(eventType, listener) { - return this._emitter.off(eventType, listener); - } - - /** - * Shuts down the `SelectionHandle` object. - * - * Removes all event listeners that were added through this handle. - */ - - }, { - key: "close", - value: function close() { - this._emitter.removeAllListeners(); - this.setGroup(null); - } - }, { - key: "value", - get: function get() { - return this._var ? this._var.get() : null; - } - }]); - - return SelectionHandle; -}(); - -/** - * @callback SelectionHandle~listener - * @param {Object} event - An object containing details of the event. For - * `"change"` events, this includes the properties `value` (the new - * value of the selection, or `undefined` if no selection is active), - * `oldValue` (the previous value of the selection), and `sender` (the - * `SelectionHandle` instance that made the change). - */ - -/** - * @event SelectionHandle#change - * @type {object} - * @property {object} value - The new value of the selection, or `undefined` - * if no selection is active. - * @property {object} oldValue - The previous value of the selection. - * @property {SelectionHandle} sender - The `SelectionHandle` instance that - * changed the value. - */ - -},{"./events":1,"./group":4,"./util":11}],11:[function(require,module,exports){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); - -var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); - -var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; - -exports.extend = extend; -exports.checkSorted = checkSorted; -exports.diffSortedLists = diffSortedLists; -exports.dataframeToD3 = dataframeToD3; - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -function extend(target) { - for (var _len = arguments.length, sources = Array(_len > 1 ? _len - 1 : 0), _key = 1; _key < _len; _key++) { - sources[_key - 1] = arguments[_key]; - } - - for (var i = 0; i < sources.length; i++) { - var src = sources[i]; - if (typeof src === "undefined" || src === null) continue; - - for (var key in src) { - if (src.hasOwnProperty(key)) { - target[key] = src[key]; - } - } - } - return target; -} - -function checkSorted(list) { - for (var i = 1; i < list.length; i++) { - if (list[i] <= list[i - 1]) { - throw new Error("List is not sorted or contains duplicate"); - } - } -} - -function diffSortedLists(a, b) { - var i_a = 0; - var i_b = 0; - - if (!a) a = []; - if (!b) b = []; - - var a_only = []; - var b_only = []; - - checkSorted(a); - checkSorted(b); - - while (i_a < a.length && i_b < b.length) { - if (a[i_a] === b[i_b]) { - i_a++; - i_b++; - } else if (a[i_a] < b[i_b]) { - a_only.push(a[i_a++]); - } else { - b_only.push(b[i_b++]); - } - } - - if (i_a < a.length) a_only = a_only.concat(a.slice(i_a)); - if (i_b < b.length) b_only = b_only.concat(b.slice(i_b)); - return { - removed: a_only, - added: b_only - }; -} - -// Convert from wide: { colA: [1,2,3], colB: [4,5,6], ... } -// to long: [ {colA: 1, colB: 4}, {colA: 2, colB: 5}, ... ] -function dataframeToD3(df) { - var names = []; - var length = void 0; - for (var name in df) { - if (df.hasOwnProperty(name)) names.push(name); - if (_typeof(df[name]) !== "object" || typeof df[name].length === "undefined") { - throw new Error("All fields must be arrays"); - } else if (typeof length !== "undefined" && length !== df[name].length) { - throw new Error("All fields must be arrays of the same length"); - } - length = df[name].length; - } - var results = []; - var item = void 0; - for (var row = 0; row < length; row++) { - item = {}; - for (var col = 0; col < names.length; col++) { - item[names[col]] = df[names[col]][row]; - } - results.push(item); - } - return results; -} - -/** - * Keeps track of all event listener additions/removals and lets all active - * listeners be removed with a single operation. - * - * @private - */ - -var SubscriptionTracker = exports.SubscriptionTracker = function () { - function SubscriptionTracker(emitter) { - _classCallCheck(this, SubscriptionTracker); - - this._emitter = emitter; - this._subs = {}; - } - - _createClass(SubscriptionTracker, [{ - key: "on", - value: function on(eventType, listener) { - var sub = this._emitter.on(eventType, listener); - this._subs[sub] = eventType; - return sub; - } - }, { - key: "off", - value: function off(eventType, listener) { - var sub = this._emitter.off(eventType, listener); - if (sub) { - delete this._subs[sub]; - } - return sub; - } - }, { - key: "removeAllListeners", - value: function removeAllListeners() { - var _this = this; - - var current_subs = this._subs; - this._subs = {}; - Object.keys(current_subs).forEach(function (sub) { - _this._emitter.off(current_subs[sub], sub); - }); - } - }]); - - return SubscriptionTracker; -}(); - -},{}],12:[function(require,module,exports){ -(function (global){ -"use strict"; - -Object.defineProperty(exports, "__esModule", { - value: true -}); - -var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; - -var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); - -var _events = require("./events"); - -var _events2 = _interopRequireDefault(_events); - -function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -var Var = function () { - function Var(group, name, /*optional*/value) { - _classCallCheck(this, Var); - - this._group = group; - this._name = name; - this._value = value; - this._events = new _events2.default(); - } - - _createClass(Var, [{ - key: "get", - value: function get() { - return this._value; - } - }, { - key: "set", - value: function set(value, /*optional*/event) { - if (this._value === value) { - // Do nothing; the value hasn't changed - return; - } - var oldValue = this._value; - this._value = value; - // Alert JavaScript listeners that the value has changed - var evt = {}; - if (event && (typeof event === "undefined" ? "undefined" : _typeof(event)) === "object") { - for (var k in event) { - if (event.hasOwnProperty(k)) evt[k] = event[k]; - } - } - evt.oldValue = oldValue; - evt.value = value; - this._events.trigger("change", evt, this); - - // TODO: Make this extensible, to let arbitrary back-ends know that - // something has changed - if (global.Shiny && global.Shiny.onInputChange) { - global.Shiny.onInputChange(".clientValue-" + (this._group.name !== null ? this._group.name + "-" : "") + this._name, typeof value === "undefined" ? null : value); - } - } - }, { - key: "on", - value: function on(eventType, listener) { - return this._events.on(eventType, listener); - } - }, { - key: "off", - value: function off(eventType, listener) { - return this._events.off(eventType, listener); - } - }]); - - return Var; -}(); - -exports.default = Var; - -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) - -},{"./events":1}]},{},[5]) -//# sourceMappingURL=crosstalk.js.map diff --git a/static/rmarkdown-libs/crosstalk/js/crosstalk.js.map b/static/rmarkdown-libs/crosstalk/js/crosstalk.js.map deleted file mode 100644 index cff94f08..00000000 --- a/static/rmarkdown-libs/crosstalk/js/crosstalk.js.map +++ /dev/null @@ -1,37 +0,0 @@ -{ - "version": 3, - "sources": [ - "node_modules/browser-pack/_prelude.js", - "javascript/src/events.js", - "javascript/src/filter.js", - "javascript/src/filterset.js", - "javascript/src/group.js", - "javascript/src/index.js", - "javascript/src/input.js", - "javascript/src/input_checkboxgroup.js", - "javascript/src/input_selectize.js", - "javascript/src/input_slider.js", - "javascript/src/selection.js", - "javascript/src/util.js", - "javascript/src/var.js" - ], - "names": [], - "mappings": "AAAA;;;;;;;;;;;ICAqB,M;AACnB,oBAAc;AAAA;;AACZ,SAAK,MAAL,GAAc,EAAd;AACA,SAAK,IAAL,GAAY,CAAZ;AACD;;;;uBAEE,S,EAAW,Q,EAAU;AACtB,UAAI,OAAO,KAAK,MAAL,CAAY,SAAZ,CAAX;AACA,UAAI,CAAC,IAAL,EAAW;AACT,eAAO,KAAK,MAAL,CAAY,SAAZ,IAAyB,EAAhC;AACD;AACD,UAAI,MAAM,QAAS,KAAK,IAAL,EAAnB;AACA,WAAK,GAAL,IAAY,QAAZ;AACA,aAAO,GAAP;AACD;;AAED;;;;wBACI,S,EAAW,Q,EAAU;AACvB,UAAI,OAAO,KAAK,MAAL,CAAY,SAAZ,CAAX;AACA,UAAI,OAAO,QAAP,KAAqB,UAAzB,EAAqC;AACnC,aAAK,IAAI,GAAT,IAAgB,IAAhB,EAAsB;AACpB,cAAI,KAAK,cAAL,CAAoB,GAApB,CAAJ,EAA8B;AAC5B,gBAAI,KAAK,GAAL,MAAc,QAAlB,EAA4B;AAC1B,qBAAO,KAAK,GAAL,CAAP;AACA,qBAAO,GAAP;AACD;AACF;AACF;AACD,eAAO,KAAP;AACD,OAVD,MAUO,IAAI,OAAO,QAAP,KAAqB,QAAzB,EAAmC;AACxC,YAAI,QAAQ,KAAK,QAAL,CAAZ,EAA4B;AAC1B,iBAAO,KAAK,QAAL,CAAP;AACA,iBAAO,QAAP;AACD;AACD,eAAO,KAAP;AACD,OANM,MAMA;AACL,cAAM,IAAI,KAAJ,CAAU,8BAAV,CAAN;AACD;AACF;;;4BAEO,S,EAAW,G,EAAK,O,EAAS;AAC/B,UAAI,OAAO,KAAK,MAAL,CAAY,SAAZ,CAAX;AACA,WAAK,IAAI,GAAT,IAAgB,IAAhB,EAAsB;AACpB,YAAI,KAAK,cAAL,CAAoB,GAApB,CAAJ,EAA8B;AAC5B,eAAK,GAAL,EAAU,IAAV,CAAe,OAAf,EAAwB,GAAxB;AACD;AACF;AACF;;;;;;kBA/CkB,M;;;;;;;;;;;;ACArB;;;;AACA;;;;AACA;;;;AACA;;IAAY,I;;;;;;;;AAEZ,SAAS,YAAT,CAAsB,KAAtB,EAA6B;AAC3B,MAAI,QAAQ,MAAM,GAAN,CAAU,WAAV,CAAZ;AACA,MAAI,SAAS,MAAM,GAAN,EAAb;AACA,MAAI,CAAC,MAAL,EAAa;AACX,aAAS,yBAAT;AACA,UAAM,GAAN,CAAU,MAAV;AACD;AACD,SAAO,MAAP;AACD;;AAED,IAAI,KAAK,CAAT;AACA,SAAS,MAAT,GAAkB;AAChB,SAAO,IAAP;AACD;;AAED;;;;;;;;;;;;;;;;;;;;;;;;;IAwBa,Y,WAAA,Y;AACX,wBAAY,KAAZ,EAAmB,SAAnB,EAA8B;AAAA;;AAC5B,SAAK,WAAL,GAAmB,sBAAnB;AACA,SAAK,QAAL,GAAgB,IAAI,KAAK,mBAAT,CAA6B,KAAK,WAAlC,CAAhB;;AAEA;AACA,SAAK,MAAL,GAAc,IAAd;AACA;AACA,SAAK,UAAL,GAAkB,IAAlB;AACA;AACA,SAAK,UAAL,GAAkB,IAAlB;AACA;AACA,SAAK,eAAL,GAAuB,IAAvB;;AAEA,SAAK,UAAL,GAAkB,KAAK,MAAL,CAAY,EAAE,QAAQ,IAAV,EAAZ,EAA8B,SAA9B,CAAlB;;AAEA,SAAK,GAAL,GAAW,WAAW,QAAtB;;AAEA,SAAK,QAAL,CAAc,KAAd;AACD;;AAED;;;;;;;;;;;;;;6BAUS,K,EAAO;AAAA;;AACd;AACA,UAAI,KAAK,MAAL,KAAgB,KAApB,EACE;AACF;AACA,UAAI,CAAC,KAAK,MAAN,IAAgB,CAAC,KAArB,EACE;;AAEF,UAAI,KAAK,UAAT,EAAqB;AACnB,aAAK,UAAL,CAAgB,GAAhB,CAAoB,QAApB,EAA8B,KAAK,eAAnC;AACA,aAAK,KAAL;AACA,aAAK,eAAL,GAAuB,IAAvB;AACA,aAAK,UAAL,GAAkB,IAAlB;AACA,aAAK,UAAL,GAAkB,IAAlB;AACD;;AAED,WAAK,MAAL,GAAc,KAAd;;AAEA,UAAI,KAAJ,EAAW;AACT,gBAAQ,qBAAI,KAAJ,CAAR;AACA,aAAK,UAAL,GAAkB,aAAa,KAAb,CAAlB;AACA,aAAK,UAAL,GAAkB,qBAAI,KAAJ,EAAW,GAAX,CAAe,QAAf,CAAlB;AACA,YAAI,MAAM,KAAK,UAAL,CAAgB,EAAhB,CAAmB,QAAnB,EAA6B,UAAC,CAAD,EAAO;AAC5C,gBAAK,WAAL,CAAiB,OAAjB,CAAyB,QAAzB,EAAmC,CAAnC;AACD,SAFS,CAAV;AAGA,aAAK,eAAL,GAAuB,GAAvB;AACD;AACF;;AAED;;;;;;;;oCAKgB,S,EAAW;AACzB,aAAO,KAAK,MAAL,CAAY,EAAZ,EACL,KAAK,UAAL,GAAkB,KAAK,UAAvB,GAAoC,IAD/B,EAEL,YAAY,SAAZ,GAAwB,IAFnB,CAAP;AAGD;;AAED;;;;;;;4BAIQ;AACN,WAAK,QAAL,CAAc,kBAAd;AACA,WAAK,KAAL;AACA,WAAK,QAAL,CAAc,IAAd;AACD;;AAED;;;;;;;;;;;;0BASM,S,EAAW;AACf,UAAI,CAAC,KAAK,UAAV,EACE;AACF,WAAK,UAAL,CAAgB,KAAhB,CAAsB,KAAK,GAA3B;AACA,WAAK,SAAL,CAAe,SAAf;AACD;;AAED;;;;;;;;;;;;;;;;;;;;wBAiBI,I,EAAM,S,EAAW;AACnB,UAAI,CAAC,KAAK,UAAV,EACE;AACF,WAAK,UAAL,CAAgB,MAAhB,CAAuB,KAAK,GAA5B,EAAiC,IAAjC;AACA,WAAK,SAAL,CAAe,SAAf;AACD;;AAED;;;;;;;;;;AASA;;;;;;;;;;uBAUG,S,EAAW,Q,EAAU;AACtB,aAAO,KAAK,QAAL,CAAc,EAAd,CAAiB,SAAjB,EAA4B,QAA5B,CAAP;AACD;;AAED;;;;;;;;;;;wBAQI,S,EAAW,Q,EAAU;AACvB,aAAO,KAAK,QAAL,CAAc,GAAd,CAAkB,SAAlB,EAA6B,QAA7B,CAAP;AACD;;;8BAES,S,EAAW;AACnB,UAAI,CAAC,KAAK,UAAV,EACE;AACF,WAAK,UAAL,CAAgB,GAAhB,CAAoB,KAAK,UAAL,CAAgB,KAApC,EAA2C,KAAK,eAAL,CAAqB,SAArB,CAA3C;AACD;;AAED;;;;;;;;;;;wBApCmB;AACjB,aAAO,KAAK,UAAL,GAAkB,KAAK,UAAL,CAAgB,KAAlC,GAA0C,IAAjD;AACD;;;;;;AA6CH;;;;;;;;;;;;;;;;;;;ACzNA;;;;AAEA,SAAS,iBAAT,CAA2B,CAA3B,EAA8B,CAA9B,EAAiC;AAC/B,MAAI,MAAM,CAAV,EAAa;AACX,WAAO,CAAP;AACD,GAFD,MAEO,IAAI,IAAI,CAAR,EAAW;AAChB,WAAO,CAAC,CAAR;AACD,GAFM,MAEA,IAAI,IAAI,CAAR,EAAW;AAChB,WAAO,CAAP;AACD;AACF;;AAED;;;;IAGqB,S;AACnB,uBAAc;AAAA;;AACZ,SAAK,KAAL;AACD;;;;4BAEO;AACN;AACA,WAAK,QAAL,GAAgB,EAAhB;AACA;AACA,WAAK,KAAL,GAAa,EAAb;AACA,WAAK,MAAL,GAAc,IAAd;AACA,WAAK,cAAL,GAAsB,CAAtB;AACD;;;2BAMM,Q,EAAU,I,EAAM;AACrB,UAAI,SAAS,IAAb,EAAmB;AACjB,eAAO,KAAK,KAAL,CAAW,CAAX,CAAP,CADiB,CACK;AACtB,aAAK,IAAL,CAAU,iBAAV;AACD;;AAJoB,6BAME,2BAAgB,KAAK,QAAL,CAAc,QAAd,CAAhB,EAAyC,IAAzC,CANF;AAAA,UAMhB,KANgB,oBAMhB,KANgB;AAAA,UAMT,OANS,oBAMT,OANS;;AAOrB,WAAK,QAAL,CAAc,QAAd,IAA0B,IAA1B;;AAEA,WAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,MAAM,MAA1B,EAAkC,GAAlC,EAAuC;AACrC,aAAK,KAAL,CAAW,MAAM,CAAN,CAAX,IAAuB,CAAC,KAAK,KAAL,CAAW,MAAM,CAAN,CAAX,KAAwB,CAAzB,IAA8B,CAArD;AACD;AACD,WAAK,IAAI,KAAI,CAAb,EAAgB,KAAI,QAAQ,MAA5B,EAAoC,IAApC,EAAyC;AACvC,aAAK,KAAL,CAAW,QAAQ,EAAR,CAAX;AACD;;AAED,WAAK,YAAL,CAAkB,IAAlB;AACD;;AAED;;;;;;;;mCAKmC;AAAA,UAAtB,IAAsB,uEAAf,KAAK,QAAU;;AACjC,UAAI,cAAc,OAAO,IAAP,CAAY,KAAK,QAAjB,EAA2B,MAA7C;AACA,UAAI,gBAAgB,CAApB,EAAuB;AACrB,aAAK,MAAL,GAAc,IAAd;AACD,OAFD,MAEO;AACL,aAAK,MAAL,GAAc,EAAd;AACA,aAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,KAAK,MAAzB,EAAiC,GAAjC,EAAsC;AACpC,cAAI,QAAQ,KAAK,KAAL,CAAW,KAAK,CAAL,CAAX,CAAZ;AACA,cAAI,UAAU,WAAd,EAA2B;AACzB,iBAAK,MAAL,CAAY,IAAZ,CAAiB,KAAK,CAAL,CAAjB;AACD;AACF;AACF;AACF;;;0BAEK,Q,EAAU;AACd,UAAI,OAAO,KAAK,QAAL,CAAc,QAAd,CAAP,KAAoC,WAAxC,EAAqD;AACnD;AACD;;AAED,UAAI,OAAO,KAAK,QAAL,CAAc,QAAd,CAAX;AACA,UAAI,CAAC,IAAL,EAAW;AACT,eAAO,EAAP;AACD;;AAED,WAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,KAAK,MAAzB,EAAiC,GAAjC,EAAsC;AACpC,aAAK,KAAL,CAAW,KAAK,CAAL,CAAX;AACD;AACD,aAAO,KAAK,QAAL,CAAc,QAAd,CAAP;;AAEA,WAAK,YAAL;AACD;;;wBA3DW;AACV,aAAO,KAAK,MAAZ;AACD;;;wBA2Dc;AACb,UAAI,UAAU,OAAO,IAAP,CAAY,KAAK,KAAjB,CAAd;AACA,cAAQ,IAAR,CAAa,iBAAb;AACA,aAAO,OAAP;AACD;;;;;;kBA/EkB,S;;;;;;;;;;;;;;kBCRG,K;;AAPxB;;;;;;;;AAEA;AACA;AACA,OAAO,kBAAP,GAA4B,OAAO,kBAAP,IAA6B,EAAzD;AACA,IAAI,SAAS,OAAO,kBAApB;;AAEe,SAAS,KAAT,CAAe,SAAf,EAA0B;AACvC,MAAI,aAAa,OAAO,SAAP,KAAsB,QAAvC,EAAiD;AAC/C,QAAI,CAAC,OAAO,cAAP,CAAsB,SAAtB,CAAL,EAAuC;AACrC,aAAO,SAAP,IAAoB,IAAI,KAAJ,CAAU,SAAV,CAApB;AACD;AACD,WAAO,OAAO,SAAP,CAAP;AACD,GALD,MAKO,IAAI,QAAO,SAAP,yCAAO,SAAP,OAAsB,QAAtB,IAAkC,UAAU,KAA5C,IAAqD,UAAU,GAAnE,EAAwE;AAC7E;AACA,WAAO,SAAP;AACD,GAHM,MAGA,IAAI,MAAM,OAAN,CAAc,SAAd,KACP,UAAU,MAAV,IAAoB,CADb,IAEP,OAAO,UAAU,CAAV,CAAP,KAAyB,QAFtB,EAEgC;AACrC,WAAO,MAAM,UAAU,CAAV,CAAN,CAAP;AACD,GAJM,MAIA;AACL,UAAM,IAAI,KAAJ,CAAU,4BAAV,CAAN;AACD;AACF;;IAEK,K;AACJ,iBAAY,IAAZ,EAAkB;AAAA;;AAChB,SAAK,IAAL,GAAY,IAAZ;AACA,SAAK,KAAL,GAAa,EAAb;AACD;;;;yBAEG,I,EAAM;AACR,UAAI,CAAC,IAAD,IAAS,OAAO,IAAP,KAAiB,QAA9B,EAAwC;AACtC,cAAM,IAAI,KAAJ,CAAU,kBAAV,CAAN;AACD;;AAED,UAAI,CAAC,KAAK,KAAL,CAAW,cAAX,CAA0B,IAA1B,CAAL,EACE,KAAK,KAAL,CAAW,IAAX,IAAmB,kBAAQ,IAAR,EAAc,IAAd,CAAnB;AACF,aAAO,KAAK,KAAL,CAAW,IAAX,CAAP;AACD;;;wBAEG,I,EAAM;AACR,UAAI,CAAC,IAAD,IAAS,OAAO,IAAP,KAAiB,QAA9B,EAAwC;AACtC,cAAM,IAAI,KAAJ,CAAU,kBAAV,CAAN;AACD;;AAED,aAAO,KAAK,KAAL,CAAW,cAAX,CAA0B,IAA1B,CAAP;AACD;;;;;;;;;;;;;;;;AC/CH;;;;AACA;;AACA;;AACA;;AACA;;AACA;;AACA;;;;AAEA,IAAM,eAAe,qBAAM,SAAN,CAArB;;AAEA,SAAS,IAAT,CAAc,IAAd,EAAoB;AAClB,SAAO,aAAa,GAAb,CAAiB,IAAjB,CAAP;AACD;;AAED,SAAS,GAAT,CAAa,IAAb,EAAmB;AACjB,SAAO,aAAa,GAAb,CAAiB,IAAjB,CAAP;AACD;;AAED,IAAI,OAAO,KAAX,EAAkB;AAChB,SAAO,KAAP,CAAa,uBAAb,CAAqC,qBAArC,EAA4D,UAAS,OAAT,EAAkB;AAC5E,QAAI,OAAO,QAAQ,KAAf,KAA0B,QAA9B,EAAwC;AACtC,2BAAM,QAAQ,KAAd,EAAqB,GAArB,CAAyB,QAAQ,IAAjC,EAAuC,GAAvC,CAA2C,QAAQ,KAAnD;AACD,KAFD,MAEO;AACL,WAAK,QAAQ,IAAb,EAAmB,GAAnB,CAAuB,QAAQ,KAA/B;AACD;AACF,GAND;AAOD;;AAED,IAAM,YAAY;AAChB,wBADgB;AAEhB,OAAK,IAFW;AAGhB,OAAK,GAHW;AAIhB,6CAJgB;AAKhB,oCALgB;AAMhB;AANgB,CAAlB;;AASA;;;kBAGe,S;;AACf,OAAO,SAAP,GAAmB,SAAnB;;;;;;;;;;;QCrCgB,Q,GAAA,Q;QAWA,I,GAAA,I;AAfhB,IAAI,IAAI,OAAO,MAAf;;AAEA,IAAI,WAAW,EAAf;;AAEO,SAAS,QAAT,CAAkB,GAAlB,EAAuB;AAC5B,WAAS,IAAI,SAAb,IAA0B,GAA1B;AACA,MAAI,OAAO,QAAP,IAAmB,OAAO,QAAP,CAAgB,UAAhB,KAA+B,UAAtD,EAAkE;AAChE,MAAE,YAAM;AACN;AACD,KAFD;AAGD,GAJD,MAIO,IAAI,OAAO,QAAX,EAAqB;AAC1B,eAAW,IAAX,EAAiB,GAAjB;AACD;AACF;;AAEM,SAAS,IAAT,GAAgB;AACrB,SAAO,IAAP,CAAY,QAAZ,EAAsB,OAAtB,CAA8B,UAAS,SAAT,EAAoB;AAChD,QAAI,UAAU,SAAS,SAAT,CAAd;AACA,MAAE,MAAM,QAAQ,SAAhB,EAA2B,GAA3B,CAA+B,wBAA/B,EAAyD,IAAzD,CAA8D,UAAS,CAAT,EAAY,EAAZ,EAAgB;AAC5E,mBAAa,OAAb,EAAsB,EAAtB;AACD,KAFD;AAGD,GALD;AAMD;;AAED;AACA,SAAS,OAAT,CAAiB,GAAjB,EAAsB;AACpB,SAAO,IAAI,OAAJ,CAAY,uCAAZ,EAAqD,MAArD,CAAP;AACD;;AAED,SAAS,MAAT,CAAgB,EAAhB,EAAoB;AAClB,MAAI,MAAM,EAAE,EAAF,CAAV;AACA,SAAO,IAAP,CAAY,QAAZ,EAAsB,OAAtB,CAA8B,UAAS,SAAT,EAAoB;AAChD,QAAI,IAAI,QAAJ,CAAa,SAAb,KAA2B,CAAC,IAAI,QAAJ,CAAa,uBAAb,CAAhC,EAAuE;AACrE,UAAI,UAAU,SAAS,SAAT,CAAd;AACA,mBAAa,OAAb,EAAsB,EAAtB;AACD;AACF,GALD;AAMD;;AAED,SAAS,YAAT,CAAsB,OAAtB,EAA+B,EAA/B,EAAmC;AACjC,MAAI,SAAS,EAAE,EAAF,EAAM,IAAN,CAAW,+CAA+C,QAAQ,GAAG,EAAX,CAA/C,GAAgE,IAA3E,CAAb;AACA,MAAI,OAAO,KAAK,KAAL,CAAW,OAAO,CAAP,EAAU,SAArB,CAAX;;AAEA,MAAI,WAAW,QAAQ,OAAR,CAAgB,EAAhB,EAAoB,IAApB,CAAf;AACA,IAAE,EAAF,EAAM,IAAN,CAAW,oBAAX,EAAiC,QAAjC;AACA,IAAE,EAAF,EAAM,QAAN,CAAe,uBAAf;AACD;;AAED,IAAI,OAAO,KAAX,EAAkB;AAChB,MAAI,eAAe,IAAI,OAAO,KAAP,CAAa,YAAjB,EAAnB;AACA,MAAI,KAAI,OAAO,MAAf;AACA,KAAE,MAAF,CAAS,YAAT,EAAuB;AACrB,UAAM,cAAS,KAAT,EAAgB;AACpB,aAAO,GAAE,KAAF,EAAS,IAAT,CAAc,kBAAd,CAAP;AACD,KAHoB;AAIrB,gBAAY,oBAAS,EAAT,EAAa;AACvB,UAAI,CAAC,GAAE,EAAF,EAAM,QAAN,CAAe,uBAAf,CAAL,EAA8C;AAC5C,eAAO,EAAP;AACD;AACF,KARoB;AASrB,WAAO,eAAS,EAAT,EAAa;AAClB,aAAO,GAAG,EAAV;AACD,KAXoB;AAYrB,cAAU,kBAAS,EAAT,EAAa,CAEtB,CAdoB;AAerB,cAAU,kBAAS,EAAT,EAAa,KAAb,EAAoB,CAE7B,CAjBoB;AAkBrB,oBAAgB,wBAAS,EAAT,EAAa,IAAb,EAAmB,CAElC,CApBoB;AAqBrB,eAAW,mBAAS,EAAT,EAAa,QAAb,EAAuB;AAChC,SAAE,EAAF,EAAM,IAAN,CAAW,oBAAX,EAAiC,MAAjC;AACD,KAvBoB;AAwBrB,iBAAa,qBAAS,EAAT,EAAa;AACxB,SAAE,EAAF,EAAM,IAAN,CAAW,oBAAX,EAAiC,OAAjC;AACD;AA1BoB,GAAvB;AA4BA,SAAO,KAAP,CAAa,aAAb,CAA2B,QAA3B,CAAoC,YAApC,EAAkD,wBAAlD;AACD;;;;;;;;AChFD;;IAAY,K;;AACZ;;;;AAEA,IAAI,IAAI,OAAO,MAAf;;AAEA,MAAM,QAAN,CAAe;AACb,aAAW,+BADE;;AAGb,WAAS,iBAAS,EAAT,EAAa,IAAb,EAAmB;AAC1B;;;;AAIA,QAAI,WAAW,yBAAiB,KAAK,KAAtB,CAAf;;AAEA,QAAI,sBAAJ;AACA,QAAI,MAAM,EAAE,EAAF,CAAV;AACA,QAAI,EAAJ,CAAO,QAAP,EAAiB,wBAAjB,EAA2C,YAAW;AACpD,UAAI,UAAU,IAAI,IAAJ,CAAS,gCAAT,CAAd;AACA,UAAI,QAAQ,MAAR,KAAmB,CAAvB,EAA0B;AACxB,wBAAgB,IAAhB;AACA,iBAAS,KAAT;AACD,OAHD,MAGO;AACL,YAAI,OAAO,EAAX;AACA,gBAAQ,IAAR,CAAa,YAAW;AACtB,eAAK,GAAL,CAAS,KAAK,KAAd,EAAqB,OAArB,CAA6B,UAAS,GAAT,EAAc;AACzC,iBAAK,GAAL,IAAY,IAAZ;AACD,WAFD;AAGD,SAJD;AAKA,YAAI,WAAW,OAAO,IAAP,CAAY,IAAZ,CAAf;AACA,iBAAS,IAAT;AACA,wBAAgB,QAAhB;AACA,iBAAS,GAAT,CAAa,QAAb;AACD;AACF,KAjBD;;AAmBA,WAAO;AACL,eAAS,mBAAW;AAClB,iBAAS,KAAT;AACD,OAHI;AAIL,cAAQ,kBAAW;AACjB,YAAI,aAAJ,EACE,SAAS,GAAT,CAAa,aAAb;AACH;AAPI,KAAP;AASD;AAxCY,CAAf;;;;;;;;ACLA;;IAAY,K;;AACZ;;IAAY,I;;AACZ;;;;AAEA,IAAI,IAAI,OAAO,MAAf;;AAEA,MAAM,QAAN,CAAe;AACb,aAAW,wBADE;;AAGb,WAAS,iBAAS,EAAT,EAAa,IAAb,EAAmB;AAC1B;;;;;;AAMA,QAAI,QAAQ,CAAC,EAAC,OAAO,EAAR,EAAY,OAAO,OAAnB,EAAD,CAAZ;AACA,QAAI,QAAQ,KAAK,aAAL,CAAmB,KAAK,KAAxB,CAAZ;AACA,QAAI,OAAO;AACT,eAAS,MAAM,MAAN,CAAa,KAAb,CADA;AAET,kBAAY,OAFH;AAGT,kBAAY,OAHH;AAIT,mBAAa;AAJJ,KAAX;;AAOA,QAAI,SAAS,EAAE,EAAF,EAAM,IAAN,CAAW,QAAX,EAAqB,CAArB,CAAb;;AAEA,QAAI,YAAY,EAAE,MAAF,EAAU,SAAV,CAAoB,IAApB,EAA0B,CAA1B,EAA6B,SAA7C;;AAEA,QAAI,WAAW,yBAAiB,KAAK,KAAtB,CAAf;;AAEA,QAAI,sBAAJ;AACA,cAAU,EAAV,CAAa,QAAb,EAAuB,YAAW;AAChC,UAAI,UAAU,KAAV,CAAgB,MAAhB,KAA2B,CAA/B,EAAkC;AAChC,wBAAgB,IAAhB;AACA,iBAAS,KAAT;AACD,OAHD,MAGO;AACL,YAAI,OAAO,EAAX;AACA,kBAAU,KAAV,CAAgB,OAAhB,CAAwB,UAAS,KAAT,EAAgB;AACtC,eAAK,GAAL,CAAS,KAAT,EAAgB,OAAhB,CAAwB,UAAS,GAAT,EAAc;AACpC,iBAAK,GAAL,IAAY,IAAZ;AACD,WAFD;AAGD,SAJD;AAKA,YAAI,WAAW,OAAO,IAAP,CAAY,IAAZ,CAAf;AACA,iBAAS,IAAT;AACA,wBAAgB,QAAhB;AACA,iBAAS,GAAT,CAAa,QAAb;AACD;AACF,KAhBD;;AAkBA,WAAO;AACL,eAAS,mBAAW;AAClB,iBAAS,KAAT;AACD,OAHI;AAIL,cAAQ,kBAAW;AACjB,YAAI,aAAJ,EACE,SAAS,GAAT,CAAa,aAAb;AACH;AAPI,KAAP;AASD;AArDY,CAAf;;;;;;;;;;ACNA;;IAAY,K;;AACZ;;;;AAEA,IAAI,IAAI,OAAO,MAAf;AACA,IAAI,WAAW,OAAO,QAAtB;;AAEA,MAAM,QAAN,CAAe;AACb,aAAW,wBADE;;AAGb,WAAS,iBAAS,EAAT,EAAa,IAAb,EAAmB;AAC1B;;;;AAIA,QAAI,WAAW,yBAAiB,KAAK,KAAtB,CAAf;;AAEA,QAAI,OAAO,EAAX;AACA,QAAI,MAAM,EAAE,EAAF,EAAM,IAAN,CAAW,OAAX,CAAV;AACA,QAAI,WAAW,IAAI,IAAJ,CAAS,WAAT,CAAf;AACA,QAAI,aAAa,IAAI,IAAJ,CAAS,aAAT,CAAjB;AACA,QAAI,QAAQ,IAAI,IAAJ,CAAS,OAAT,CAAZ;AACA,QAAI,sBAAJ;;AAEA;AACA,QAAI,aAAa,MAAjB,EAAyB;AACvB,sBAAgB,SAAS,GAAT,EAAhB;AACA,WAAK,QAAL,GAAgB,UAAS,GAAT,EAAc;AAC5B,eAAO,cAAc,UAAd,EAA0B,IAAI,IAAJ,CAAS,GAAT,CAA1B,CAAP;AACD,OAFD;AAID,KAND,MAMO,IAAI,aAAa,UAAjB,EAA6B;AAClC,UAAI,WAAW,IAAI,IAAJ,CAAS,UAAT,CAAf;AACA,UAAI,QAAJ,EACE,gBAAgB,SAAS,QAAT,CAAkB,QAAlB,CAAhB,CADF,KAGE,gBAAgB,QAAhB;;AAEF,WAAK,QAAL,GAAgB,UAAS,GAAT,EAAc;AAC5B,eAAO,cAAc,UAAd,EAA0B,IAAI,IAAJ,CAAS,GAAT,CAA1B,CAAP;AACD,OAFD;AAGD,KAVM,MAUA,IAAI,aAAa,QAAjB,EAA2B;AAChC,UAAI,OAAO,KAAP,KAAiB,WAArB,EACE,KAAK,QAAL,GAAgB,UAAS,GAAT,EAAc;AAC5B,YAAI,SAAS,KAAK,GAAL,CAAS,EAAT,EAAa,KAAb,CAAb;AACA,eAAO,KAAK,KAAL,CAAW,MAAM,MAAjB,IAA2B,MAAlC;AACD,OAHD;AAIH;;AAED,QAAI,cAAJ,CAAmB,IAAnB;;AAEA,aAAS,QAAT,GAAoB;AAClB,UAAI,SAAS,IAAI,IAAJ,CAAS,gBAAT,EAA2B,MAAxC;;AAEA;AACA,UAAI,gBAAJ;AACA,UAAI,WAAW,IAAI,IAAJ,CAAS,WAAT,CAAf;AACA,UAAI,aAAa,MAAjB,EAAyB;AACvB,kBAAU,iBAAS,GAAT,EAAc;AACtB,iBAAO,cAAc,IAAI,IAAJ,CAAS,CAAC,GAAV,CAAd,CAAP;AACD,SAFD;AAGD,OAJD,MAIO,IAAI,aAAa,UAAjB,EAA6B;AAClC,kBAAU,iBAAS,GAAT,EAAc;AACtB;AACA,iBAAO,CAAC,GAAD,GAAO,IAAd;AACD,SAHD;AAID,OALM,MAKA;AACL,kBAAU,iBAAS,GAAT,EAAc;AAAE,iBAAO,CAAC,GAAR;AAAc,SAAxC;AACD;;AAED,UAAI,IAAI,IAAJ,CAAS,gBAAT,EAA2B,OAA3B,CAAmC,IAAnC,KAA4C,QAAhD,EAA0D;AACxD,eAAO,CAAC,QAAQ,OAAO,IAAf,CAAD,EAAuB,QAAQ,OAAO,EAAf,CAAvB,CAAP;AACD,OAFD,MAEO;AACL,eAAO,QAAQ,OAAO,IAAf,CAAP;AACD;AACF;;AAED,QAAI,gBAAgB,IAApB;;AAEA,QAAI,EAAJ,CAAO,6BAAP,EAAsC,UAAS,KAAT,EAAgB;AACpD,UAAI,CAAC,IAAI,IAAJ,CAAS,UAAT,CAAD,IAAyB,CAAC,IAAI,IAAJ,CAAS,WAAT,CAA9B,EAAqD;AAAA,wBAClC,UADkC;AAAA;AAAA,YAC9C,IAD8C;AAAA,YACxC,EADwC;;AAEnD,YAAI,OAAO,EAAX;AACA,aAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,KAAK,MAAL,CAAY,MAAhC,EAAwC,GAAxC,EAA6C;AAC3C,cAAI,MAAM,KAAK,MAAL,CAAY,CAAZ,CAAV;AACA,cAAI,OAAO,IAAP,IAAe,OAAO,EAA1B,EAA8B;AAC5B,iBAAK,IAAL,CAAU,KAAK,IAAL,CAAU,CAAV,CAAV;AACD;AACF;AACD,aAAK,IAAL;AACA,iBAAS,GAAT,CAAa,IAAb;AACA,wBAAgB,IAAhB;AACD;AACF,KAdD;;AAiBA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAEA,WAAO;AACL,eAAS,mBAAW;AAClB,iBAAS,KAAT;AACD,OAHI;AAIL,cAAQ,kBAAW;AACjB,YAAI,aAAJ,EACE,SAAS,GAAT,CAAa,aAAb;AACH;AAPI,KAAP;AASD;AApHY,CAAf;;AAwHA;AACA,SAAS,QAAT,CAAkB,CAAlB,EAAqB,MAArB,EAA6B;AAC3B,MAAI,MAAM,EAAE,QAAF,EAAV;AACA,SAAO,IAAI,MAAJ,GAAa,MAApB;AACE,UAAM,MAAM,GAAZ;AADF,GAEA,OAAO,GAAP;AACD;;AAED;AACA;AACA,SAAS,aAAT,CAAuB,IAAvB,EAA6B;AAC3B,MAAI,gBAAgB,IAApB,EAA0B;AACxB,WAAO,KAAK,cAAL,KAAwB,GAAxB,GACA,SAAS,KAAK,WAAL,KAAmB,CAA5B,EAA+B,CAA/B,CADA,GACoC,GADpC,GAEA,SAAS,KAAK,UAAL,EAAT,EAA4B,CAA5B,CAFP;AAID,GALD,MAKO;AACL,WAAO,IAAP;AACD;AACF;;;;;;;;;;;;;;ACjJD;;;;AACA;;;;AACA;;IAAY,I;;;;;;;;AAEZ;;;;;;;;;;;;;;;;IAgBa,e,WAAA,e;AAEX,6BAA4C;AAAA,QAAhC,KAAgC,uEAAxB,IAAwB;AAAA,QAAlB,SAAkB,uEAAN,IAAM;;AAAA;;AAC1C,SAAK,WAAL,GAAmB,sBAAnB;AACA,SAAK,QAAL,GAAgB,IAAI,KAAK,mBAAT,CAA6B,KAAK,WAAlC,CAAhB;;AAEA;AACA,SAAK,MAAL,GAAc,IAAd;AACA;AACA,SAAK,IAAL,GAAY,IAAZ;AACA;AACA,SAAK,eAAL,GAAuB,IAAvB;;AAEA,SAAK,UAAL,GAAkB,KAAK,MAAL,CAAY,EAAE,QAAQ,IAAV,EAAZ,EAA8B,SAA9B,CAAlB;;AAEA,SAAK,QAAL,CAAc,KAAd;AACD;;AAED;;;;;;;;;;;;;;;;;6BAaS,K,EAAO;AAAA;;AACd;AACA,UAAI,KAAK,MAAL,KAAgB,KAApB,EACE;AACF;AACA,UAAI,CAAC,KAAK,MAAN,IAAgB,CAAC,KAArB,EACE;;AAEF,UAAI,KAAK,IAAT,EAAe;AACb,aAAK,IAAL,CAAU,GAAV,CAAc,QAAd,EAAwB,KAAK,eAA7B;AACA,aAAK,IAAL,GAAY,IAAZ;AACA,aAAK,eAAL,GAAuB,IAAvB;AACD;;AAED,WAAK,MAAL,GAAc,KAAd;;AAEA,UAAI,KAAJ,EAAW;AACT,aAAK,IAAL,GAAY,qBAAI,KAAJ,EAAW,GAAX,CAAe,WAAf,CAAZ;AACA,YAAI,MAAM,KAAK,IAAL,CAAU,EAAV,CAAa,QAAb,EAAuB,UAAC,CAAD,EAAO;AACtC,gBAAK,WAAL,CAAiB,OAAjB,CAAyB,QAAzB,EAAmC,CAAnC;AACD,SAFS,CAAV;AAGA,aAAK,eAAL,GAAuB,GAAvB;AACD;AACF;;AAED;;;;;;;;;;;;;;;AAcA;;;;;oCAKgB,S,EAAW;AACzB;AACA,aAAO,KAAK,MAAL,CAAY,EAAZ,EACL,KAAK,UAAL,GAAkB,KAAK,UAAvB,GAAoC,IAD/B,EAEL,YAAY,SAAZ,GAAwB,IAFnB,CAAP;AAGD;;AAED;;;;;;;;;;;;;;;wBAYI,Y,EAAc,S,EAAW;AAC3B,UAAI,KAAK,IAAT,EACE,KAAK,IAAL,CAAU,GAAV,CAAc,YAAd,EAA4B,KAAK,eAAL,CAAqB,SAArB,CAA5B;AACH;;AAED;;;;;;;;;;;;;0BAUM,S,EAAW;AACf,UAAI,KAAK,IAAT,EACE,KAAK,GAAL,CAAS,KAAK,CAAd,EAAiB,KAAK,eAAL,CAAqB,SAArB,CAAjB;AACH;;AAED;;;;;;;;;;;;;uBAUG,S,EAAW,Q,EAAU;AACtB,aAAO,KAAK,QAAL,CAAc,EAAd,CAAiB,SAAjB,EAA4B,QAA5B,CAAP;AACD;;AAED;;;;;;;;;;;wBAQI,S,EAAW,Q,EAAU;AACvB,aAAO,KAAK,QAAL,CAAc,GAAd,CAAkB,SAAlB,EAA6B,QAA7B,CAAP;AACD;;AAED;;;;;;;;4BAKQ;AACN,WAAK,QAAL,CAAc,kBAAd;AACA,WAAK,QAAL,CAAc,IAAd;AACD;;;wBAlFW;AACV,aAAO,KAAK,IAAL,GAAY,KAAK,IAAL,CAAU,GAAV,EAAZ,GAA8B,IAArC;AACD;;;;;;AAmFH;;;;;;;;;AASA;;;;;;;;;;;;;;;;;;;;;QCpLgB,M,GAAA,M;QAeA,W,GAAA,W;QAQA,e,GAAA,e;QAoCA,a,GAAA,a;;;;AA3DT,SAAS,MAAT,CAAgB,MAAhB,EAAoC;AAAA,oCAAT,OAAS;AAAT,WAAS;AAAA;;AACzC,OAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,QAAQ,MAA5B,EAAoC,GAApC,EAAyC;AACvC,QAAI,MAAM,QAAQ,CAAR,CAAV;AACA,QAAI,OAAO,GAAP,KAAgB,WAAhB,IAA+B,QAAQ,IAA3C,EACE;;AAEF,SAAK,IAAI,GAAT,IAAgB,GAAhB,EAAqB;AACnB,UAAI,IAAI,cAAJ,CAAmB,GAAnB,CAAJ,EAA6B;AAC3B,eAAO,GAAP,IAAc,IAAI,GAAJ,CAAd;AACD;AACF;AACF;AACD,SAAO,MAAP;AACD;;AAEM,SAAS,WAAT,CAAqB,IAArB,EAA2B;AAChC,OAAK,IAAI,IAAI,CAAb,EAAgB,IAAI,KAAK,MAAzB,EAAiC,GAAjC,EAAsC;AACpC,QAAI,KAAK,CAAL,KAAW,KAAK,IAAE,CAAP,CAAf,EAA0B;AACxB,YAAM,IAAI,KAAJ,CAAU,0CAAV,CAAN;AACD;AACF;AACF;;AAEM,SAAS,eAAT,CAAyB,CAAzB,EAA4B,CAA5B,EAA+B;AACpC,MAAI,MAAM,CAAV;AACA,MAAI,MAAM,CAAV;;AAEA,MAAI,CAAC,CAAL,EAAQ,IAAI,EAAJ;AACR,MAAI,CAAC,CAAL,EAAQ,IAAI,EAAJ;;AAER,MAAI,SAAS,EAAb;AACA,MAAI,SAAS,EAAb;;AAEA,cAAY,CAAZ;AACA,cAAY,CAAZ;;AAEA,SAAO,MAAM,EAAE,MAAR,IAAkB,MAAM,EAAE,MAAjC,EAAyC;AACvC,QAAI,EAAE,GAAF,MAAW,EAAE,GAAF,CAAf,EAAuB;AACrB;AACA;AACD,KAHD,MAGO,IAAI,EAAE,GAAF,IAAS,EAAE,GAAF,CAAb,EAAqB;AAC1B,aAAO,IAAP,CAAY,EAAE,KAAF,CAAZ;AACD,KAFM,MAEA;AACL,aAAO,IAAP,CAAY,EAAE,KAAF,CAAZ;AACD;AACF;;AAED,MAAI,MAAM,EAAE,MAAZ,EACE,SAAS,OAAO,MAAP,CAAc,EAAE,KAAF,CAAQ,GAAR,CAAd,CAAT;AACF,MAAI,MAAM,EAAE,MAAZ,EACE,SAAS,OAAO,MAAP,CAAc,EAAE,KAAF,CAAQ,GAAR,CAAd,CAAT;AACF,SAAO;AACL,aAAS,MADJ;AAEL,WAAO;AAFF,GAAP;AAID;;AAED;AACA;AACO,SAAS,aAAT,CAAuB,EAAvB,EAA2B;AAChC,MAAI,QAAQ,EAAZ;AACA,MAAI,eAAJ;AACA,OAAK,IAAI,IAAT,IAAiB,EAAjB,EAAqB;AACnB,QAAI,GAAG,cAAH,CAAkB,IAAlB,CAAJ,EACE,MAAM,IAAN,CAAW,IAAX;AACF,QAAI,QAAO,GAAG,IAAH,CAAP,MAAqB,QAArB,IAAiC,OAAO,GAAG,IAAH,EAAS,MAAhB,KAA4B,WAAjE,EAA8E;AAC5E,YAAM,IAAI,KAAJ,CAAU,2BAAV,CAAN;AACD,KAFD,MAEO,IAAI,OAAO,MAAP,KAAmB,WAAnB,IAAkC,WAAW,GAAG,IAAH,EAAS,MAA1D,EAAkE;AACvE,YAAM,IAAI,KAAJ,CAAU,8CAAV,CAAN;AACD;AACD,aAAS,GAAG,IAAH,EAAS,MAAlB;AACD;AACD,MAAI,UAAU,EAAd;AACA,MAAI,aAAJ;AACA,OAAK,IAAI,MAAM,CAAf,EAAkB,MAAM,MAAxB,EAAgC,KAAhC,EAAuC;AACrC,WAAO,EAAP;AACA,SAAK,IAAI,MAAM,CAAf,EAAkB,MAAM,MAAM,MAA9B,EAAsC,KAAtC,EAA6C;AAC3C,WAAK,MAAM,GAAN,CAAL,IAAmB,GAAG,MAAM,GAAN,CAAH,EAAe,GAAf,CAAnB;AACD;AACD,YAAQ,IAAR,CAAa,IAAb;AACD;AACD,SAAO,OAAP;AACD;;AAED;;;;;;;IAMa,mB,WAAA,mB;AACX,+BAAY,OAAZ,EAAqB;AAAA;;AACnB,SAAK,QAAL,GAAgB,OAAhB;AACA,SAAK,KAAL,GAAa,EAAb;AACD;;;;uBAEE,S,EAAW,Q,EAAU;AACtB,UAAI,MAAM,KAAK,QAAL,CAAc,EAAd,CAAiB,SAAjB,EAA4B,QAA5B,CAAV;AACA,WAAK,KAAL,CAAW,GAAX,IAAkB,SAAlB;AACA,aAAO,GAAP;AACD;;;wBAEG,S,EAAW,Q,EAAU;AACvB,UAAI,MAAM,KAAK,QAAL,CAAc,GAAd,CAAkB,SAAlB,EAA6B,QAA7B,CAAV;AACA,UAAI,GAAJ,EAAS;AACP,eAAO,KAAK,KAAL,CAAW,GAAX,CAAP;AACD;AACD,aAAO,GAAP;AACD;;;yCAEoB;AAAA;;AACnB,UAAI,eAAe,KAAK,KAAxB;AACA,WAAK,KAAL,GAAa,EAAb;AACA,aAAO,IAAP,CAAY,YAAZ,EAA0B,OAA1B,CAAkC,UAAC,GAAD,EAAS;AACzC,cAAK,QAAL,CAAc,GAAd,CAAkB,aAAa,GAAb,CAAlB,EAAqC,GAArC;AACD,OAFD;AAGD;;;;;;;;;;;;;;;;;;ACpHH;;;;;;;;IAEqB,G;AACnB,eAAY,KAAZ,EAAmB,IAAnB,EAAyB,YAAa,KAAtC,EAA6C;AAAA;;AAC3C,SAAK,MAAL,GAAc,KAAd;AACA,SAAK,KAAL,GAAa,IAAb;AACA,SAAK,MAAL,GAAc,KAAd;AACA,SAAK,OAAL,GAAe,sBAAf;AACD;;;;0BAEK;AACJ,aAAO,KAAK,MAAZ;AACD;;;wBAEG,K,EAAO,YAAa,K,EAAO;AAC7B,UAAI,KAAK,MAAL,KAAgB,KAApB,EAA2B;AACzB;AACA;AACD;AACD,UAAI,WAAW,KAAK,MAApB;AACA,WAAK,MAAL,GAAc,KAAd;AACA;AACA,UAAI,MAAM,EAAV;AACA,UAAI,SAAS,QAAO,KAAP,yCAAO,KAAP,OAAkB,QAA/B,EAAyC;AACvC,aAAK,IAAI,CAAT,IAAc,KAAd,EAAqB;AACnB,cAAI,MAAM,cAAN,CAAqB,CAArB,CAAJ,EACE,IAAI,CAAJ,IAAS,MAAM,CAAN,CAAT;AACH;AACF;AACD,UAAI,QAAJ,GAAe,QAAf;AACA,UAAI,KAAJ,GAAY,KAAZ;AACA,WAAK,OAAL,CAAa,OAAb,CAAqB,QAArB,EAA+B,GAA/B,EAAoC,IAApC;;AAEA;AACA;AACA,UAAI,OAAO,KAAP,IAAgB,OAAO,KAAP,CAAa,aAAjC,EAAgD;AAC9C,eAAO,KAAP,CAAa,aAAb,CACE,mBACG,KAAK,MAAL,CAAY,IAAZ,KAAqB,IAArB,GAA4B,KAAK,MAAL,CAAY,IAAZ,GAAmB,GAA/C,GAAqD,EADxD,IAEE,KAAK,KAHT,EAIE,OAAO,KAAP,KAAkB,WAAlB,GAAgC,IAAhC,GAAuC,KAJzC;AAMD;AACF;;;uBAEE,S,EAAW,Q,EAAU;AACtB,aAAO,KAAK,OAAL,CAAa,EAAb,CAAgB,SAAhB,EAA2B,QAA3B,CAAP;AACD;;;wBAEG,S,EAAW,Q,EAAU;AACvB,aAAO,KAAK,OAAL,CAAa,GAAb,CAAiB,SAAjB,EAA4B,QAA5B,CAAP;AACD;;;;;;kBAjDkB,G", - "file": "generated.js", - "sourceRoot": "", - "sourcesContent": [ - "(function(){function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require==\"function\"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error(\"Cannot find module '\"+o+\"'\");throw f.code=\"MODULE_NOT_FOUND\",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require==\"function\"&&require;for(var o=0;o {\n this._eventRelay.trigger(\"change\", e, this);\n });\n this._varOnChangeSub = sub;\n }\n }\n\n /**\n * Combine the given `extraInfo` (if any) with the handle's default\n * `_extraInfo` (if any).\n * @private\n */\n _mergeExtraInfo(extraInfo) {\n return util.extend({},\n this._extraInfo ? this._extraInfo : null,\n extraInfo ? extraInfo : null);\n }\n\n /**\n * Close the handle. This clears this handle's contribution to the filter set,\n * and unsubscribes all event listeners.\n */\n close() {\n this._emitter.removeAllListeners();\n this.clear();\n this.setGroup(null);\n }\n\n /**\n * Clear this handle's contribution to the filter set.\n *\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `FilterHandle` constructor).\n * \n * @fires FilterHandle#change\n */\n clear(extraInfo) {\n if (!this._filterSet)\n return;\n this._filterSet.clear(this._id);\n this._onChange(extraInfo);\n }\n\n /**\n * Set this handle's contribution to the filter set. This array should consist\n * of the keys of the rows that _should_ be displayed; any keys that are not\n * present in the array will be considered _filtered out_. Note that multiple\n * `FilterHandle` instances in the group may each contribute an array of keys,\n * and only those keys that appear in _all_ of the arrays make it through the\n * filter.\n *\n * @param {string[]} keys - Empty array, or array of keys. To clear the\n * filter, don't pass an empty array; instead, use the\n * {@link FilterHandle#clear} method.\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `FilterHandle` constructor).\n * \n * @fires FilterHandle#change\n */\n set(keys, extraInfo) {\n if (!this._filterSet)\n return;\n this._filterSet.update(this._id, keys);\n this._onChange(extraInfo);\n }\n\n /**\n * @return {string[]|null} - Either: 1) an array of keys that made it through\n * all of the `FilterHandle` instances, or, 2) `null`, which means no filter\n * is being applied (all data should be displayed).\n */\n get filteredKeys() {\n return this._filterSet ? this._filterSet.value : null;\n }\n\n /**\n * Subscribe to events on this `FilterHandle`.\n *\n * @param {string} eventType - Indicates the type of events to listen to.\n * Currently, only `\"change\"` is supported.\n * @param {FilterHandle~listener} listener - The callback function that\n * will be invoked when the event occurs.\n * @return {string} - A token to pass to {@link FilterHandle#off} to cancel\n * this subscription.\n */\n on(eventType, listener) {\n return this._emitter.on(eventType, listener);\n }\n\n /**\n * Cancel event subscriptions created by {@link FilterHandle#on}.\n *\n * @param {string} eventType - The type of event to unsubscribe.\n * @param {string|FilterHandle~listener} listener - Either the callback\n * function previously passed into {@link FilterHandle#on}, or the\n * string that was returned from {@link FilterHandle#on}.\n */\n off(eventType, listener) {\n return this._emitter.off(eventType, listener);\n }\n\n _onChange(extraInfo) {\n if (!this._filterSet)\n return;\n this._filterVar.set(this._filterSet.value, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * @callback FilterHandle~listener\n * @param {Object} event - An object containing details of the event. For\n * `\"change\"` events, this includes the properties `value` (the new\n * value of the filter set, or `null` if no filter set is active),\n * `oldValue` (the previous value of the filter set), and `sender` (the\n * `FilterHandle` instance that made the change).\n */\n\n}\n\n/**\n * @event FilterHandle#change\n * @type {object}\n * @property {object} value - The new value of the filter set, or `null`\n * if no filter set is active.\n * @property {object} oldValue - The previous value of the filter set.\n * @property {FilterHandle} sender - The `FilterHandle` instance that\n * changed the value.\n */\n", - "import { diffSortedLists } from \"./util\";\n\nfunction naturalComparator(a, b) {\n if (a === b) {\n return 0;\n } else if (a < b) {\n return -1;\n } else if (a > b) {\n return 1;\n }\n}\n\n/**\n * @private\n */\nexport default class FilterSet {\n constructor() {\n this.reset();\n }\n\n reset() {\n // Key: handle ID, Value: array of selected keys, or null\n this._handles = {};\n // Key: key string, Value: count of handles that include it\n this._keys = {};\n this._value = null;\n this._activeHandles = 0;\n }\n\n get value() {\n return this._value;\n }\n\n update(handleId, keys) {\n if (keys !== null) {\n keys = keys.slice(0); // clone before sorting\n keys.sort(naturalComparator);\n }\n\n let {added, removed} = diffSortedLists(this._handles[handleId], keys);\n this._handles[handleId] = keys;\n\n for (let i = 0; i < added.length; i++) {\n this._keys[added[i]] = (this._keys[added[i]] || 0) + 1;\n }\n for (let i = 0; i < removed.length; i++) {\n this._keys[removed[i]]--;\n }\n\n this._updateValue(keys);\n }\n\n /**\n * @param {string[]} keys Sorted array of strings that indicate\n * a superset of possible keys.\n * @private\n */\n _updateValue(keys = this._allKeys) {\n let handleCount = Object.keys(this._handles).length;\n if (handleCount === 0) {\n this._value = null;\n } else {\n this._value = [];\n for (let i = 0; i < keys.length; i++) {\n let count = this._keys[keys[i]];\n if (count === handleCount) {\n this._value.push(keys[i]);\n }\n }\n }\n }\n\n clear(handleId) {\n if (typeof(this._handles[handleId]) === \"undefined\") {\n return;\n }\n\n let keys = this._handles[handleId];\n if (!keys) {\n keys = [];\n }\n\n for (let i = 0; i < keys.length; i++) {\n this._keys[keys[i]]--;\n }\n delete this._handles[handleId];\n\n this._updateValue();\n }\n\n get _allKeys() {\n let allKeys = Object.keys(this._keys);\n allKeys.sort(naturalComparator);\n return allKeys;\n }\n}\n", - "import Var from \"./var\";\n\n// Use a global so that multiple copies of crosstalk.js can be loaded and still\n// have groups behave as singletons across all copies.\nglobal.__crosstalk_groups = global.__crosstalk_groups || {};\nlet groups = global.__crosstalk_groups;\n\nexport default function group(groupName) {\n if (groupName && typeof(groupName) === \"string\") {\n if (!groups.hasOwnProperty(groupName)) {\n groups[groupName] = new Group(groupName);\n }\n return groups[groupName];\n } else if (typeof(groupName) === \"object\" && groupName._vars && groupName.var) {\n // Appears to already be a group object\n return groupName;\n } else if (Array.isArray(groupName) &&\n groupName.length == 1 &&\n typeof(groupName[0]) === \"string\") {\n return group(groupName[0]);\n } else {\n throw new Error(\"Invalid groupName argument\");\n }\n}\n\nclass Group {\n constructor(name) {\n this.name = name;\n this._vars = {};\n }\n\n var(name) {\n if (!name || typeof(name) !== \"string\") {\n throw new Error(\"Invalid var name\");\n }\n\n if (!this._vars.hasOwnProperty(name))\n this._vars[name] = new Var(this, name);\n return this._vars[name];\n }\n\n has(name) {\n if (!name || typeof(name) !== \"string\") {\n throw new Error(\"Invalid var name\");\n }\n\n return this._vars.hasOwnProperty(name);\n }\n}\n", - "import group from \"./group\";\nimport { SelectionHandle } from \"./selection\";\nimport { FilterHandle } from \"./filter\";\nimport { bind } from \"./input\";\nimport \"./input_selectize\";\nimport \"./input_checkboxgroup\";\nimport \"./input_slider\";\n\nconst defaultGroup = group(\"default\");\n\nfunction var_(name) {\n return defaultGroup.var(name);\n}\n\nfunction has(name) {\n return defaultGroup.has(name);\n}\n\nif (global.Shiny) {\n global.Shiny.addCustomMessageHandler(\"update-client-value\", function(message) {\n if (typeof(message.group) === \"string\") {\n group(message.group).var(message.name).set(message.value);\n } else {\n var_(message.name).set(message.value);\n }\n });\n}\n\nconst crosstalk = {\n group: group,\n var: var_,\n has: has,\n SelectionHandle: SelectionHandle,\n FilterHandle: FilterHandle,\n bind: bind\n};\n\n/**\n * @namespace crosstalk\n */\nexport default crosstalk;\nglobal.crosstalk = crosstalk;\n", - "let $ = global.jQuery;\n\nlet bindings = {};\n\nexport function register(reg) {\n bindings[reg.className] = reg;\n if (global.document && global.document.readyState !== \"complete\") {\n $(() => {\n bind();\n });\n } else if (global.document) {\n setTimeout(bind, 100);\n }\n}\n\nexport function bind() {\n Object.keys(bindings).forEach(function(className) {\n let binding = bindings[className];\n $(\".\" + binding.className).not(\".crosstalk-input-bound\").each(function(i, el) {\n bindInstance(binding, el);\n });\n });\n}\n\n// Escape jQuery identifier\nfunction $escape(val) {\n return val.replace(/([!\"#$%&'()*+,./:;<=>?@[\\\\\\]^`{|}~])/g, \"\\\\$1\");\n}\n\nfunction bindEl(el) {\n let $el = $(el);\n Object.keys(bindings).forEach(function(className) {\n if ($el.hasClass(className) && !$el.hasClass(\"crosstalk-input-bound\")) {\n let binding = bindings[className];\n bindInstance(binding, el);\n }\n });\n}\n\nfunction bindInstance(binding, el) {\n let jsonEl = $(el).find(\"script[type='application/json'][data-for='\" + $escape(el.id) + \"']\");\n let data = JSON.parse(jsonEl[0].innerText);\n\n let instance = binding.factory(el, data);\n $(el).data(\"crosstalk-instance\", instance);\n $(el).addClass(\"crosstalk-input-bound\");\n}\n\nif (global.Shiny) {\n let inputBinding = new global.Shiny.InputBinding();\n let $ = global.jQuery;\n $.extend(inputBinding, {\n find: function(scope) {\n return $(scope).find(\".crosstalk-input\");\n },\n initialize: function(el) {\n if (!$(el).hasClass(\"crosstalk-input-bound\")) {\n bindEl(el);\n }\n },\n getId: function(el) {\n return el.id;\n },\n getValue: function(el) {\n\n },\n setValue: function(el, value) {\n\n },\n receiveMessage: function(el, data) {\n\n },\n subscribe: function(el, callback) {\n $(el).data(\"crosstalk-instance\").resume();\n },\n unsubscribe: function(el) {\n $(el).data(\"crosstalk-instance\").suspend();\n }\n });\n global.Shiny.inputBindings.register(inputBinding, \"crosstalk.inputBinding\");\n}\n", - "import * as input from \"./input\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\n\ninput.register({\n className: \"crosstalk-input-checkboxgroup\",\n\n factory: function(el, data) {\n /*\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n let ctHandle = new FilterHandle(data.group);\n\n let lastKnownKeys;\n let $el = $(el);\n $el.on(\"change\", \"input[type='checkbox']\", function() {\n let checked = $el.find(\"input[type='checkbox']:checked\");\n if (checked.length === 0) {\n lastKnownKeys = null;\n ctHandle.clear();\n } else {\n let keys = {};\n checked.each(function() {\n data.map[this.value].forEach(function(key) {\n keys[key] = true;\n });\n });\n let keyArray = Object.keys(keys);\n keyArray.sort();\n lastKnownKeys = keyArray;\n ctHandle.set(keyArray);\n }\n });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n", - "import * as input from \"./input\";\nimport * as util from \"./util\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\n\ninput.register({\n className: \"crosstalk-input-select\",\n\n factory: function(el, data) {\n /*\n * items: {value: [...], label: [...]}\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n\n let first = [{value: \"\", label: \"(All)\"}];\n let items = util.dataframeToD3(data.items);\n let opts = {\n options: first.concat(items),\n valueField: \"value\",\n labelField: \"label\",\n searchField: \"label\"\n };\n\n let select = $(el).find(\"select\")[0];\n\n let selectize = $(select).selectize(opts)[0].selectize;\n\n let ctHandle = new FilterHandle(data.group);\n\n let lastKnownKeys;\n selectize.on(\"change\", function() {\n if (selectize.items.length === 0) {\n lastKnownKeys = null;\n ctHandle.clear();\n } else {\n let keys = {};\n selectize.items.forEach(function(group) {\n data.map[group].forEach(function(key) {\n keys[key] = true;\n });\n });\n let keyArray = Object.keys(keys);\n keyArray.sort();\n lastKnownKeys = keyArray;\n ctHandle.set(keyArray);\n }\n });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n", - "import * as input from \"./input\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\nlet strftime = global.strftime;\n\ninput.register({\n className: \"crosstalk-input-slider\",\n\n factory: function(el, data) {\n /*\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n let ctHandle = new FilterHandle(data.group);\n\n let opts = {};\n let $el = $(el).find(\"input\");\n let dataType = $el.data(\"data-type\");\n let timeFormat = $el.data(\"time-format\");\n let round = $el.data(\"round\");\n let timeFormatter;\n\n // Set up formatting functions\n if (dataType === \"date\") {\n timeFormatter = strftime.utc();\n opts.prettify = function(num) {\n return timeFormatter(timeFormat, new Date(num));\n };\n\n } else if (dataType === \"datetime\") {\n let timezone = $el.data(\"timezone\");\n if (timezone)\n timeFormatter = strftime.timezone(timezone);\n else\n timeFormatter = strftime;\n\n opts.prettify = function(num) {\n return timeFormatter(timeFormat, new Date(num));\n };\n } else if (dataType === \"number\") {\n if (typeof round !== \"undefined\")\n opts.prettify = function(num) {\n let factor = Math.pow(10, round);\n return Math.round(num * factor) / factor;\n };\n }\n\n $el.ionRangeSlider(opts);\n\n function getValue() {\n let result = $el.data(\"ionRangeSlider\").result;\n\n // Function for converting numeric value from slider to appropriate type.\n let convert;\n let dataType = $el.data(\"data-type\");\n if (dataType === \"date\") {\n convert = function(val) {\n return formatDateUTC(new Date(+val));\n };\n } else if (dataType === \"datetime\") {\n convert = function(val) {\n // Convert ms to s\n return +val / 1000;\n };\n } else {\n convert = function(val) { return +val; };\n }\n\n if ($el.data(\"ionRangeSlider\").options.type === \"double\") {\n return [convert(result.from), convert(result.to)];\n } else {\n return convert(result.from);\n }\n }\n\n let lastKnownKeys = null;\n\n $el.on(\"change.crosstalkSliderInput\", function(event) {\n if (!$el.data(\"updating\") && !$el.data(\"animating\")) {\n let [from, to] = getValue();\n let keys = [];\n for (let i = 0; i < data.values.length; i++) {\n let val = data.values[i];\n if (val >= from && val <= to) {\n keys.push(data.keys[i]);\n }\n }\n keys.sort();\n ctHandle.set(keys);\n lastKnownKeys = keys;\n }\n });\n\n\n // let $el = $(el);\n // $el.on(\"change\", \"input[type=\"checkbox\"]\", function() {\n // let checked = $el.find(\"input[type=\"checkbox\"]:checked\");\n // if (checked.length === 0) {\n // ctHandle.clear();\n // } else {\n // let keys = {};\n // checked.each(function() {\n // data.map[this.value].forEach(function(key) {\n // keys[key] = true;\n // });\n // });\n // let keyArray = Object.keys(keys);\n // keyArray.sort();\n // ctHandle.set(keyArray);\n // }\n // });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n\n\n// Convert a number to a string with leading zeros\nfunction padZeros(n, digits) {\n let str = n.toString();\n while (str.length < digits)\n str = \"0\" + str;\n return str;\n}\n\n// Given a Date object, return a string in yyyy-mm-dd format, using the\n// UTC date. This may be a day off from the date in the local time zone.\nfunction formatDateUTC(date) {\n if (date instanceof Date) {\n return date.getUTCFullYear() + \"-\" +\n padZeros(date.getUTCMonth()+1, 2) + \"-\" +\n padZeros(date.getUTCDate(), 2);\n\n } else {\n return null;\n }\n}\n", - "import Events from \"./events\";\nimport grp from \"./group\";\nimport * as util from \"./util\";\n\n/**\n * Use this class to read and write (and listen for changes to) the selection\n * for a Crosstalk group. This is intended to be used for linked brushing.\n *\n * If two (or more) `SelectionHandle` instances in the same webpage share the\n * same group name, they will share the same state. Setting the selection using\n * one `SelectionHandle` instance will result in the `value` property instantly\n * changing across the others, and `\"change\"` event listeners on all instances\n * (including the one that initiated the sending) will fire.\n *\n * @param {string} [group] - The name of the Crosstalk group, or if none,\n * null or undefined (or any other falsy value). This can be changed later\n * via the [SelectionHandle#setGroup](#setGroup) method.\n * @param {Object} [extraInfo] - An object whose properties will be copied to\n * the event object whenever an event is emitted.\n */\nexport class SelectionHandle {\n\n constructor(group = null, extraInfo = null) {\n this._eventRelay = new Events();\n this._emitter = new util.SubscriptionTracker(this._eventRelay);\n\n // Name of the group we're currently tracking, if any. Can change over time.\n this._group = null;\n // The Var we're currently tracking, if any. Can change over time.\n this._var = null;\n // The event handler subscription we currently have on var.on(\"change\").\n this._varOnChangeSub = null;\n\n this._extraInfo = util.extend({ sender: this }, extraInfo);\n\n this.setGroup(group);\n }\n\n /**\n * Changes the Crosstalk group membership of this SelectionHandle. The group\n * being switched away from (if any) will not have its selection value\n * modified as a result of calling `setGroup`, even if this handle was the\n * most recent handle to set the selection of the group.\n *\n * The group being switched to (if any) will also not have its selection value\n * modified as a result of calling `setGroup`. If you want to set the\n * selection value of the new group, call `set` explicitly.\n *\n * @param {string} group - The name of the Crosstalk group, or null (or\n * undefined) to clear the group.\n */\n setGroup(group) {\n // If group is unchanged, do nothing\n if (this._group === group)\n return;\n // Treat null, undefined, and other falsy values the same\n if (!this._group && !group)\n return;\n\n if (this._var) {\n this._var.off(\"change\", this._varOnChangeSub);\n this._var = null;\n this._varOnChangeSub = null;\n }\n\n this._group = group;\n\n if (group) {\n this._var = grp(group).var(\"selection\");\n let sub = this._var.on(\"change\", (e) => {\n this._eventRelay.trigger(\"change\", e, this);\n });\n this._varOnChangeSub = sub;\n }\n }\n\n /**\n * Retrieves the current selection for the group represented by this\n * `SelectionHandle`.\n *\n * - If no selection is active, then this value will be falsy.\n * - If a selection is active, but no data points are selected, then this\n * value will be an empty array.\n * - If a selection is active, and data points are selected, then the keys\n * of the selected data points will be present in the array.\n */\n get value() {\n return this._var ? this._var.get() : null;\n }\n\n /**\n * Combines the given `extraInfo` (if any) with the handle's default\n * `_extraInfo` (if any).\n * @private\n */\n _mergeExtraInfo(extraInfo) {\n // Important incidental effect: shallow clone is returned\n return util.extend({},\n this._extraInfo ? this._extraInfo : null,\n extraInfo ? extraInfo : null);\n }\n\n /**\n * Overwrites the current selection for the group, and raises the `\"change\"`\n * event among all of the group's '`SelectionHandle` instances (including\n * this one).\n *\n * @fires SelectionHandle#change\n * @param {string[]} selectedKeys - Falsy, empty array, or array of keys (see\n * {@link SelectionHandle#value}).\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `SelectionHandle` constructor).\n */\n set(selectedKeys, extraInfo) {\n if (this._var)\n this._var.set(selectedKeys, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * Overwrites the current selection for the group, and raises the `\"change\"`\n * event among all of the group's '`SelectionHandle` instances (including\n * this one).\n *\n * @fires SelectionHandle#change\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any that were passed\n * into the `SelectionHandle` constructor).\n */\n clear(extraInfo) {\n if (this._var)\n this.set(void 0, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * Subscribes to events on this `SelectionHandle`.\n *\n * @param {string} eventType - Indicates the type of events to listen to.\n * Currently, only `\"change\"` is supported.\n * @param {SelectionHandle~listener} listener - The callback function that\n * will be invoked when the event occurs.\n * @return {string} - A token to pass to {@link SelectionHandle#off} to cancel\n * this subscription.\n */\n on(eventType, listener) {\n return this._emitter.on(eventType, listener);\n }\n\n /**\n * Cancels event subscriptions created by {@link SelectionHandle#on}.\n *\n * @param {string} eventType - The type of event to unsubscribe.\n * @param {string|SelectionHandle~listener} listener - Either the callback\n * function previously passed into {@link SelectionHandle#on}, or the\n * string that was returned from {@link SelectionHandle#on}.\n */\n off(eventType, listener) {\n return this._emitter.off(eventType, listener);\n }\n\n /**\n * Shuts down the `SelectionHandle` object.\n *\n * Removes all event listeners that were added through this handle.\n */\n close() {\n this._emitter.removeAllListeners();\n this.setGroup(null);\n }\n}\n\n/**\n * @callback SelectionHandle~listener\n * @param {Object} event - An object containing details of the event. For\n * `\"change\"` events, this includes the properties `value` (the new\n * value of the selection, or `undefined` if no selection is active),\n * `oldValue` (the previous value of the selection), and `sender` (the\n * `SelectionHandle` instance that made the change).\n */\n\n/**\n * @event SelectionHandle#change\n * @type {object}\n * @property {object} value - The new value of the selection, or `undefined`\n * if no selection is active.\n * @property {object} oldValue - The previous value of the selection.\n * @property {SelectionHandle} sender - The `SelectionHandle` instance that\n * changed the value.\n */\n", - "export function extend(target, ...sources) {\n for (let i = 0; i < sources.length; i++) {\n let src = sources[i];\n if (typeof(src) === \"undefined\" || src === null)\n continue;\n\n for (let key in src) {\n if (src.hasOwnProperty(key)) {\n target[key] = src[key];\n }\n }\n }\n return target;\n}\n\nexport function checkSorted(list) {\n for (let i = 1; i < list.length; i++) {\n if (list[i] <= list[i-1]) {\n throw new Error(\"List is not sorted or contains duplicate\");\n }\n }\n}\n\nexport function diffSortedLists(a, b) {\n let i_a = 0;\n let i_b = 0;\n\n if (!a) a = [];\n if (!b) b = [];\n\n let a_only = [];\n let b_only = [];\n\n checkSorted(a);\n checkSorted(b);\n\n while (i_a < a.length && i_b < b.length) {\n if (a[i_a] === b[i_b]) {\n i_a++;\n i_b++;\n } else if (a[i_a] < b[i_b]) {\n a_only.push(a[i_a++]);\n } else {\n b_only.push(b[i_b++]);\n }\n }\n\n if (i_a < a.length)\n a_only = a_only.concat(a.slice(i_a));\n if (i_b < b.length)\n b_only = b_only.concat(b.slice(i_b));\n return {\n removed: a_only,\n added: b_only\n };\n}\n\n// Convert from wide: { colA: [1,2,3], colB: [4,5,6], ... }\n// to long: [ {colA: 1, colB: 4}, {colA: 2, colB: 5}, ... ]\nexport function dataframeToD3(df) {\n let names = [];\n let length;\n for (let name in df) {\n if (df.hasOwnProperty(name))\n names.push(name);\n if (typeof(df[name]) !== \"object\" || typeof(df[name].length) === \"undefined\") {\n throw new Error(\"All fields must be arrays\");\n } else if (typeof(length) !== \"undefined\" && length !== df[name].length) {\n throw new Error(\"All fields must be arrays of the same length\");\n }\n length = df[name].length;\n }\n let results = [];\n let item;\n for (let row = 0; row < length; row++) {\n item = {};\n for (let col = 0; col < names.length; col++) {\n item[names[col]] = df[names[col]][row];\n }\n results.push(item);\n }\n return results;\n}\n\n/**\n * Keeps track of all event listener additions/removals and lets all active\n * listeners be removed with a single operation.\n *\n * @private\n */\nexport class SubscriptionTracker {\n constructor(emitter) {\n this._emitter = emitter;\n this._subs = {};\n }\n\n on(eventType, listener) {\n let sub = this._emitter.on(eventType, listener);\n this._subs[sub] = eventType;\n return sub;\n }\n\n off(eventType, listener) {\n let sub = this._emitter.off(eventType, listener);\n if (sub) {\n delete this._subs[sub];\n }\n return sub;\n }\n\n removeAllListeners() {\n let current_subs = this._subs;\n this._subs = {};\n Object.keys(current_subs).forEach((sub) => {\n this._emitter.off(current_subs[sub], sub);\n });\n }\n}\n", - "import Events from \"./events\";\n\nexport default class Var {\n constructor(group, name, /*optional*/ value) {\n this._group = group;\n this._name = name;\n this._value = value;\n this._events = new Events();\n }\n\n get() {\n return this._value;\n }\n\n set(value, /*optional*/ event) {\n if (this._value === value) {\n // Do nothing; the value hasn't changed\n return;\n }\n let oldValue = this._value;\n this._value = value;\n // Alert JavaScript listeners that the value has changed\n let evt = {};\n if (event && typeof(event) === \"object\") {\n for (let k in event) {\n if (event.hasOwnProperty(k))\n evt[k] = event[k];\n }\n }\n evt.oldValue = oldValue;\n evt.value = value;\n this._events.trigger(\"change\", evt, this);\n\n // TODO: Make this extensible, to let arbitrary back-ends know that\n // something has changed\n if (global.Shiny && global.Shiny.onInputChange) {\n global.Shiny.onInputChange(\n \".clientValue-\" +\n (this._group.name !== null ? this._group.name + \"-\" : \"\") +\n this._name,\n typeof(value) === \"undefined\" ? null : value\n );\n }\n }\n\n on(eventType, listener) {\n return this._events.on(eventType, listener);\n }\n\n off(eventType, listener) {\n return this._events.off(eventType, listener);\n }\n}\n" - ] -} \ No newline at end of file diff --git a/static/rmarkdown-libs/crosstalk/js/crosstalk.min.js b/static/rmarkdown-libs/crosstalk/js/crosstalk.min.js deleted file mode 100644 index b7ec0ac9..00000000 --- a/static/rmarkdown-libs/crosstalk/js/crosstalk.min.js +++ /dev/null @@ -1,2 +0,0 @@ -!function o(u,a,l){function s(n,e){if(!a[n]){if(!u[n]){var t="function"==typeof require&&require;if(!e&&t)return t(n,!0);if(f)return f(n,!0);var r=new Error("Cannot find module '"+n+"'");throw r.code="MODULE_NOT_FOUND",r}var i=a[n]={exports:{}};u[n][0].call(i.exports,function(e){var t=u[n][1][e];return s(t||e)},i,i.exports,o,u,a,l)}return a[n].exports}for(var f="function"==typeof require&&require,e=0;e?@[\\\]^`{|}~])/g,"\\$1")+"']"),r=JSON.parse(n[0].innerText),i=e.factory(t,r);o(t).data("crosstalk-instance",i),o(t).addClass("crosstalk-input-bound")}if(t.Shiny){var e=new t.Shiny.InputBinding,u=t.jQuery;u.extend(e,{find:function(e){return u(e).find(".crosstalk-input")},initialize:function(e){var t,n;u(e).hasClass("crosstalk-input-bound")||(n=o(t=e),Object.keys(r).forEach(function(e){n.hasClass(e)&&!n.hasClass("crosstalk-input-bound")&&i(r[e],t)}))},getId:function(e){return e.id},getValue:function(e){},setValue:function(e,t){},receiveMessage:function(e,t){},subscribe:function(e,t){u(e).data("crosstalk-instance").resume()},unsubscribe:function(e){u(e).data("crosstalk-instance").suspend()}}),t.Shiny.inputBindings.register(e,"crosstalk.inputBinding")}}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{}],7:[function(r,e,t){(function(e){"use strict";var t=function(e){{if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}}(r("./input")),n=r("./filter");var a=e.jQuery;t.register({className:"crosstalk-input-checkboxgroup",factory:function(e,r){var i=new n.FilterHandle(r.group),o=void 0,u=a(e);return u.on("change","input[type='checkbox']",function(){var e=u.find("input[type='checkbox']:checked");if(0===e.length)o=null,i.clear();else{var t={};e.each(function(){r.map[this.value].forEach(function(e){t[e]=!0})});var n=Object.keys(t);n.sort(),o=n,i.set(n)}}),{suspend:function(){i.clear()},resume:function(){o&&i.set(o)}}}})}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./filter":2,"./input":6}],8:[function(r,e,t){(function(e){"use strict";var t=n(r("./input")),l=n(r("./util")),s=r("./filter");function n(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}var f=e.jQuery;t.register({className:"crosstalk-input-select",factory:function(e,n){var t=l.dataframeToD3(n.items),r={options:[{value:"",label:"(All)"}].concat(t),valueField:"value",labelField:"label",searchField:"label"},i=f(e).find("select")[0],o=f(i).selectize(r)[0].selectize,u=new s.FilterHandle(n.group),a=void 0;return o.on("change",function(){if(0===o.items.length)a=null,u.clear();else{var t={};o.items.forEach(function(e){n.map[e].forEach(function(e){t[e]=!0})});var e=Object.keys(t);e.sort(),a=e,u.set(e)}}),{suspend:function(){u.clear()},resume:function(){a&&u.set(a)}}}})}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./filter":2,"./input":6,"./util":11}],9:[function(n,e,t){(function(e){"use strict";var d=function(e,t){if(Array.isArray(e))return e;if(Symbol.iterator in Object(e))return function(e,t){var n=[],r=!0,i=!1,o=void 0;try{for(var u,a=e[Symbol.iterator]();!(r=(u=a.next()).done)&&(n.push(u.value),!t||n.length!==t);r=!0);}catch(e){i=!0,o=e}finally{try{!r&&a.return&&a.return()}finally{if(i)throw o}}return n}(e,t);throw new TypeError("Invalid attempt to destructure non-iterable instance")},t=function(e){{if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}}(n("./input")),a=n("./filter");var v=e.jQuery,p=e.strftime;function y(e,t){for(var n=e.toString();n.length {\n this._eventRelay.trigger(\"change\", e, this);\n });\n this._varOnChangeSub = sub;\n }\n }\n\n /**\n * Combine the given `extraInfo` (if any) with the handle's default\n * `_extraInfo` (if any).\n * @private\n */\n _mergeExtraInfo(extraInfo) {\n return util.extend({},\n this._extraInfo ? this._extraInfo : null,\n extraInfo ? extraInfo : null);\n }\n\n /**\n * Close the handle. This clears this handle's contribution to the filter set,\n * and unsubscribes all event listeners.\n */\n close() {\n this._emitter.removeAllListeners();\n this.clear();\n this.setGroup(null);\n }\n\n /**\n * Clear this handle's contribution to the filter set.\n *\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `FilterHandle` constructor).\n * \n * @fires FilterHandle#change\n */\n clear(extraInfo) {\n if (!this._filterSet)\n return;\n this._filterSet.clear(this._id);\n this._onChange(extraInfo);\n }\n\n /**\n * Set this handle's contribution to the filter set. This array should consist\n * of the keys of the rows that _should_ be displayed; any keys that are not\n * present in the array will be considered _filtered out_. Note that multiple\n * `FilterHandle` instances in the group may each contribute an array of keys,\n * and only those keys that appear in _all_ of the arrays make it through the\n * filter.\n *\n * @param {string[]} keys - Empty array, or array of keys. To clear the\n * filter, don't pass an empty array; instead, use the\n * {@link FilterHandle#clear} method.\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `FilterHandle` constructor).\n * \n * @fires FilterHandle#change\n */\n set(keys, extraInfo) {\n if (!this._filterSet)\n return;\n this._filterSet.update(this._id, keys);\n this._onChange(extraInfo);\n }\n\n /**\n * @return {string[]|null} - Either: 1) an array of keys that made it through\n * all of the `FilterHandle` instances, or, 2) `null`, which means no filter\n * is being applied (all data should be displayed).\n */\n get filteredKeys() {\n return this._filterSet ? this._filterSet.value : null;\n }\n\n /**\n * Subscribe to events on this `FilterHandle`.\n *\n * @param {string} eventType - Indicates the type of events to listen to.\n * Currently, only `\"change\"` is supported.\n * @param {FilterHandle~listener} listener - The callback function that\n * will be invoked when the event occurs.\n * @return {string} - A token to pass to {@link FilterHandle#off} to cancel\n * this subscription.\n */\n on(eventType, listener) {\n return this._emitter.on(eventType, listener);\n }\n\n /**\n * Cancel event subscriptions created by {@link FilterHandle#on}.\n *\n * @param {string} eventType - The type of event to unsubscribe.\n * @param {string|FilterHandle~listener} listener - Either the callback\n * function previously passed into {@link FilterHandle#on}, or the\n * string that was returned from {@link FilterHandle#on}.\n */\n off(eventType, listener) {\n return this._emitter.off(eventType, listener);\n }\n\n _onChange(extraInfo) {\n if (!this._filterSet)\n return;\n this._filterVar.set(this._filterSet.value, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * @callback FilterHandle~listener\n * @param {Object} event - An object containing details of the event. For\n * `\"change\"` events, this includes the properties `value` (the new\n * value of the filter set, or `null` if no filter set is active),\n * `oldValue` (the previous value of the filter set), and `sender` (the\n * `FilterHandle` instance that made the change).\n */\n\n}\n\n/**\n * @event FilterHandle#change\n * @type {object}\n * @property {object} value - The new value of the filter set, or `null`\n * if no filter set is active.\n * @property {object} oldValue - The previous value of the filter set.\n * @property {FilterHandle} sender - The `FilterHandle` instance that\n * changed the value.\n */\n","import { diffSortedLists } from \"./util\";\n\nfunction naturalComparator(a, b) {\n if (a === b) {\n return 0;\n } else if (a < b) {\n return -1;\n } else if (a > b) {\n return 1;\n }\n}\n\n/**\n * @private\n */\nexport default class FilterSet {\n constructor() {\n this.reset();\n }\n\n reset() {\n // Key: handle ID, Value: array of selected keys, or null\n this._handles = {};\n // Key: key string, Value: count of handles that include it\n this._keys = {};\n this._value = null;\n this._activeHandles = 0;\n }\n\n get value() {\n return this._value;\n }\n\n update(handleId, keys) {\n if (keys !== null) {\n keys = keys.slice(0); // clone before sorting\n keys.sort(naturalComparator);\n }\n\n let {added, removed} = diffSortedLists(this._handles[handleId], keys);\n this._handles[handleId] = keys;\n\n for (let i = 0; i < added.length; i++) {\n this._keys[added[i]] = (this._keys[added[i]] || 0) + 1;\n }\n for (let i = 0; i < removed.length; i++) {\n this._keys[removed[i]]--;\n }\n\n this._updateValue(keys);\n }\n\n /**\n * @param {string[]} keys Sorted array of strings that indicate\n * a superset of possible keys.\n * @private\n */\n _updateValue(keys = this._allKeys) {\n let handleCount = Object.keys(this._handles).length;\n if (handleCount === 0) {\n this._value = null;\n } else {\n this._value = [];\n for (let i = 0; i < keys.length; i++) {\n let count = this._keys[keys[i]];\n if (count === handleCount) {\n this._value.push(keys[i]);\n }\n }\n }\n }\n\n clear(handleId) {\n if (typeof(this._handles[handleId]) === \"undefined\") {\n return;\n }\n\n let keys = this._handles[handleId];\n if (!keys) {\n keys = [];\n }\n\n for (let i = 0; i < keys.length; i++) {\n this._keys[keys[i]]--;\n }\n delete this._handles[handleId];\n\n this._updateValue();\n }\n\n get _allKeys() {\n let allKeys = Object.keys(this._keys);\n allKeys.sort(naturalComparator);\n return allKeys;\n }\n}\n","import Var from \"./var\";\n\n// Use a global so that multiple copies of crosstalk.js can be loaded and still\n// have groups behave as singletons across all copies.\nglobal.__crosstalk_groups = global.__crosstalk_groups || {};\nlet groups = global.__crosstalk_groups;\n\nexport default function group(groupName) {\n if (groupName && typeof(groupName) === \"string\") {\n if (!groups.hasOwnProperty(groupName)) {\n groups[groupName] = new Group(groupName);\n }\n return groups[groupName];\n } else if (typeof(groupName) === \"object\" && groupName._vars && groupName.var) {\n // Appears to already be a group object\n return groupName;\n } else if (Array.isArray(groupName) &&\n groupName.length == 1 &&\n typeof(groupName[0]) === \"string\") {\n return group(groupName[0]);\n } else {\n throw new Error(\"Invalid groupName argument\");\n }\n}\n\nclass Group {\n constructor(name) {\n this.name = name;\n this._vars = {};\n }\n\n var(name) {\n if (!name || typeof(name) !== \"string\") {\n throw new Error(\"Invalid var name\");\n }\n\n if (!this._vars.hasOwnProperty(name))\n this._vars[name] = new Var(this, name);\n return this._vars[name];\n }\n\n has(name) {\n if (!name || typeof(name) !== \"string\") {\n throw new Error(\"Invalid var name\");\n }\n\n return this._vars.hasOwnProperty(name);\n }\n}\n","import group from \"./group\";\nimport { SelectionHandle } from \"./selection\";\nimport { FilterHandle } from \"./filter\";\nimport { bind } from \"./input\";\nimport \"./input_selectize\";\nimport \"./input_checkboxgroup\";\nimport \"./input_slider\";\n\nconst defaultGroup = group(\"default\");\n\nfunction var_(name) {\n return defaultGroup.var(name);\n}\n\nfunction has(name) {\n return defaultGroup.has(name);\n}\n\nif (global.Shiny) {\n global.Shiny.addCustomMessageHandler(\"update-client-value\", function(message) {\n if (typeof(message.group) === \"string\") {\n group(message.group).var(message.name).set(message.value);\n } else {\n var_(message.name).set(message.value);\n }\n });\n}\n\nconst crosstalk = {\n group: group,\n var: var_,\n has: has,\n SelectionHandle: SelectionHandle,\n FilterHandle: FilterHandle,\n bind: bind\n};\n\n/**\n * @namespace crosstalk\n */\nexport default crosstalk;\nglobal.crosstalk = crosstalk;\n","let $ = global.jQuery;\n\nlet bindings = {};\n\nexport function register(reg) {\n bindings[reg.className] = reg;\n if (global.document && global.document.readyState !== \"complete\") {\n $(() => {\n bind();\n });\n } else if (global.document) {\n setTimeout(bind, 100);\n }\n}\n\nexport function bind() {\n Object.keys(bindings).forEach(function(className) {\n let binding = bindings[className];\n $(\".\" + binding.className).not(\".crosstalk-input-bound\").each(function(i, el) {\n bindInstance(binding, el);\n });\n });\n}\n\n// Escape jQuery identifier\nfunction $escape(val) {\n return val.replace(/([!\"#$%&'()*+,./:;<=>?@[\\\\\\]^`{|}~])/g, \"\\\\$1\");\n}\n\nfunction bindEl(el) {\n let $el = $(el);\n Object.keys(bindings).forEach(function(className) {\n if ($el.hasClass(className) && !$el.hasClass(\"crosstalk-input-bound\")) {\n let binding = bindings[className];\n bindInstance(binding, el);\n }\n });\n}\n\nfunction bindInstance(binding, el) {\n let jsonEl = $(el).find(\"script[type='application/json'][data-for='\" + $escape(el.id) + \"']\");\n let data = JSON.parse(jsonEl[0].innerText);\n\n let instance = binding.factory(el, data);\n $(el).data(\"crosstalk-instance\", instance);\n $(el).addClass(\"crosstalk-input-bound\");\n}\n\nif (global.Shiny) {\n let inputBinding = new global.Shiny.InputBinding();\n let $ = global.jQuery;\n $.extend(inputBinding, {\n find: function(scope) {\n return $(scope).find(\".crosstalk-input\");\n },\n initialize: function(el) {\n if (!$(el).hasClass(\"crosstalk-input-bound\")) {\n bindEl(el);\n }\n },\n getId: function(el) {\n return el.id;\n },\n getValue: function(el) {\n\n },\n setValue: function(el, value) {\n\n },\n receiveMessage: function(el, data) {\n\n },\n subscribe: function(el, callback) {\n $(el).data(\"crosstalk-instance\").resume();\n },\n unsubscribe: function(el) {\n $(el).data(\"crosstalk-instance\").suspend();\n }\n });\n global.Shiny.inputBindings.register(inputBinding, \"crosstalk.inputBinding\");\n}\n","import * as input from \"./input\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\n\ninput.register({\n className: \"crosstalk-input-checkboxgroup\",\n\n factory: function(el, data) {\n /*\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n let ctHandle = new FilterHandle(data.group);\n\n let lastKnownKeys;\n let $el = $(el);\n $el.on(\"change\", \"input[type='checkbox']\", function() {\n let checked = $el.find(\"input[type='checkbox']:checked\");\n if (checked.length === 0) {\n lastKnownKeys = null;\n ctHandle.clear();\n } else {\n let keys = {};\n checked.each(function() {\n data.map[this.value].forEach(function(key) {\n keys[key] = true;\n });\n });\n let keyArray = Object.keys(keys);\n keyArray.sort();\n lastKnownKeys = keyArray;\n ctHandle.set(keyArray);\n }\n });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n","import * as input from \"./input\";\nimport * as util from \"./util\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\n\ninput.register({\n className: \"crosstalk-input-select\",\n\n factory: function(el, data) {\n /*\n * items: {value: [...], label: [...]}\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n\n let first = [{value: \"\", label: \"(All)\"}];\n let items = util.dataframeToD3(data.items);\n let opts = {\n options: first.concat(items),\n valueField: \"value\",\n labelField: \"label\",\n searchField: \"label\"\n };\n\n let select = $(el).find(\"select\")[0];\n\n let selectize = $(select).selectize(opts)[0].selectize;\n\n let ctHandle = new FilterHandle(data.group);\n\n let lastKnownKeys;\n selectize.on(\"change\", function() {\n if (selectize.items.length === 0) {\n lastKnownKeys = null;\n ctHandle.clear();\n } else {\n let keys = {};\n selectize.items.forEach(function(group) {\n data.map[group].forEach(function(key) {\n keys[key] = true;\n });\n });\n let keyArray = Object.keys(keys);\n keyArray.sort();\n lastKnownKeys = keyArray;\n ctHandle.set(keyArray);\n }\n });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n","import * as input from \"./input\";\nimport { FilterHandle } from \"./filter\";\n\nlet $ = global.jQuery;\nlet strftime = global.strftime;\n\ninput.register({\n className: \"crosstalk-input-slider\",\n\n factory: function(el, data) {\n /*\n * map: {\"groupA\": [\"keyA\", \"keyB\", ...], ...}\n * group: \"ct-groupname\"\n */\n let ctHandle = new FilterHandle(data.group);\n\n let opts = {};\n let $el = $(el).find(\"input\");\n let dataType = $el.data(\"data-type\");\n let timeFormat = $el.data(\"time-format\");\n let round = $el.data(\"round\");\n let timeFormatter;\n\n // Set up formatting functions\n if (dataType === \"date\") {\n timeFormatter = strftime.utc();\n opts.prettify = function(num) {\n return timeFormatter(timeFormat, new Date(num));\n };\n\n } else if (dataType === \"datetime\") {\n let timezone = $el.data(\"timezone\");\n if (timezone)\n timeFormatter = strftime.timezone(timezone);\n else\n timeFormatter = strftime;\n\n opts.prettify = function(num) {\n return timeFormatter(timeFormat, new Date(num));\n };\n } else if (dataType === \"number\") {\n if (typeof round !== \"undefined\")\n opts.prettify = function(num) {\n let factor = Math.pow(10, round);\n return Math.round(num * factor) / factor;\n };\n }\n\n $el.ionRangeSlider(opts);\n\n function getValue() {\n let result = $el.data(\"ionRangeSlider\").result;\n\n // Function for converting numeric value from slider to appropriate type.\n let convert;\n let dataType = $el.data(\"data-type\");\n if (dataType === \"date\") {\n convert = function(val) {\n return formatDateUTC(new Date(+val));\n };\n } else if (dataType === \"datetime\") {\n convert = function(val) {\n // Convert ms to s\n return +val / 1000;\n };\n } else {\n convert = function(val) { return +val; };\n }\n\n if ($el.data(\"ionRangeSlider\").options.type === \"double\") {\n return [convert(result.from), convert(result.to)];\n } else {\n return convert(result.from);\n }\n }\n\n let lastKnownKeys = null;\n\n $el.on(\"change.crosstalkSliderInput\", function(event) {\n if (!$el.data(\"updating\") && !$el.data(\"animating\")) {\n let [from, to] = getValue();\n let keys = [];\n for (let i = 0; i < data.values.length; i++) {\n let val = data.values[i];\n if (val >= from && val <= to) {\n keys.push(data.keys[i]);\n }\n }\n keys.sort();\n ctHandle.set(keys);\n lastKnownKeys = keys;\n }\n });\n\n\n // let $el = $(el);\n // $el.on(\"change\", \"input[type=\"checkbox\"]\", function() {\n // let checked = $el.find(\"input[type=\"checkbox\"]:checked\");\n // if (checked.length === 0) {\n // ctHandle.clear();\n // } else {\n // let keys = {};\n // checked.each(function() {\n // data.map[this.value].forEach(function(key) {\n // keys[key] = true;\n // });\n // });\n // let keyArray = Object.keys(keys);\n // keyArray.sort();\n // ctHandle.set(keyArray);\n // }\n // });\n\n return {\n suspend: function() {\n ctHandle.clear();\n },\n resume: function() {\n if (lastKnownKeys)\n ctHandle.set(lastKnownKeys);\n }\n };\n }\n});\n\n\n// Convert a number to a string with leading zeros\nfunction padZeros(n, digits) {\n let str = n.toString();\n while (str.length < digits)\n str = \"0\" + str;\n return str;\n}\n\n// Given a Date object, return a string in yyyy-mm-dd format, using the\n// UTC date. This may be a day off from the date in the local time zone.\nfunction formatDateUTC(date) {\n if (date instanceof Date) {\n return date.getUTCFullYear() + \"-\" +\n padZeros(date.getUTCMonth()+1, 2) + \"-\" +\n padZeros(date.getUTCDate(), 2);\n\n } else {\n return null;\n }\n}\n","import Events from \"./events\";\nimport grp from \"./group\";\nimport * as util from \"./util\";\n\n/**\n * Use this class to read and write (and listen for changes to) the selection\n * for a Crosstalk group. This is intended to be used for linked brushing.\n *\n * If two (or more) `SelectionHandle` instances in the same webpage share the\n * same group name, they will share the same state. Setting the selection using\n * one `SelectionHandle` instance will result in the `value` property instantly\n * changing across the others, and `\"change\"` event listeners on all instances\n * (including the one that initiated the sending) will fire.\n *\n * @param {string} [group] - The name of the Crosstalk group, or if none,\n * null or undefined (or any other falsy value). This can be changed later\n * via the [SelectionHandle#setGroup](#setGroup) method.\n * @param {Object} [extraInfo] - An object whose properties will be copied to\n * the event object whenever an event is emitted.\n */\nexport class SelectionHandle {\n\n constructor(group = null, extraInfo = null) {\n this._eventRelay = new Events();\n this._emitter = new util.SubscriptionTracker(this._eventRelay);\n\n // Name of the group we're currently tracking, if any. Can change over time.\n this._group = null;\n // The Var we're currently tracking, if any. Can change over time.\n this._var = null;\n // The event handler subscription we currently have on var.on(\"change\").\n this._varOnChangeSub = null;\n\n this._extraInfo = util.extend({ sender: this }, extraInfo);\n\n this.setGroup(group);\n }\n\n /**\n * Changes the Crosstalk group membership of this SelectionHandle. The group\n * being switched away from (if any) will not have its selection value\n * modified as a result of calling `setGroup`, even if this handle was the\n * most recent handle to set the selection of the group.\n *\n * The group being switched to (if any) will also not have its selection value\n * modified as a result of calling `setGroup`. If you want to set the\n * selection value of the new group, call `set` explicitly.\n *\n * @param {string} group - The name of the Crosstalk group, or null (or\n * undefined) to clear the group.\n */\n setGroup(group) {\n // If group is unchanged, do nothing\n if (this._group === group)\n return;\n // Treat null, undefined, and other falsy values the same\n if (!this._group && !group)\n return;\n\n if (this._var) {\n this._var.off(\"change\", this._varOnChangeSub);\n this._var = null;\n this._varOnChangeSub = null;\n }\n\n this._group = group;\n\n if (group) {\n this._var = grp(group).var(\"selection\");\n let sub = this._var.on(\"change\", (e) => {\n this._eventRelay.trigger(\"change\", e, this);\n });\n this._varOnChangeSub = sub;\n }\n }\n\n /**\n * Retrieves the current selection for the group represented by this\n * `SelectionHandle`.\n *\n * - If no selection is active, then this value will be falsy.\n * - If a selection is active, but no data points are selected, then this\n * value will be an empty array.\n * - If a selection is active, and data points are selected, then the keys\n * of the selected data points will be present in the array.\n */\n get value() {\n return this._var ? this._var.get() : null;\n }\n\n /**\n * Combines the given `extraInfo` (if any) with the handle's default\n * `_extraInfo` (if any).\n * @private\n */\n _mergeExtraInfo(extraInfo) {\n // Important incidental effect: shallow clone is returned\n return util.extend({},\n this._extraInfo ? this._extraInfo : null,\n extraInfo ? extraInfo : null);\n }\n\n /**\n * Overwrites the current selection for the group, and raises the `\"change\"`\n * event among all of the group's '`SelectionHandle` instances (including\n * this one).\n *\n * @fires SelectionHandle#change\n * @param {string[]} selectedKeys - Falsy, empty array, or array of keys (see\n * {@link SelectionHandle#value}).\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any options that were\n * passed into the `SelectionHandle` constructor).\n */\n set(selectedKeys, extraInfo) {\n if (this._var)\n this._var.set(selectedKeys, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * Overwrites the current selection for the group, and raises the `\"change\"`\n * event among all of the group's '`SelectionHandle` instances (including\n * this one).\n *\n * @fires SelectionHandle#change\n * @param {Object} [extraInfo] - Extra properties to be included on the event\n * object that's passed to listeners (in addition to any that were passed\n * into the `SelectionHandle` constructor).\n */\n clear(extraInfo) {\n if (this._var)\n this.set(void 0, this._mergeExtraInfo(extraInfo));\n }\n\n /**\n * Subscribes to events on this `SelectionHandle`.\n *\n * @param {string} eventType - Indicates the type of events to listen to.\n * Currently, only `\"change\"` is supported.\n * @param {SelectionHandle~listener} listener - The callback function that\n * will be invoked when the event occurs.\n * @return {string} - A token to pass to {@link SelectionHandle#off} to cancel\n * this subscription.\n */\n on(eventType, listener) {\n return this._emitter.on(eventType, listener);\n }\n\n /**\n * Cancels event subscriptions created by {@link SelectionHandle#on}.\n *\n * @param {string} eventType - The type of event to unsubscribe.\n * @param {string|SelectionHandle~listener} listener - Either the callback\n * function previously passed into {@link SelectionHandle#on}, or the\n * string that was returned from {@link SelectionHandle#on}.\n */\n off(eventType, listener) {\n return this._emitter.off(eventType, listener);\n }\n\n /**\n * Shuts down the `SelectionHandle` object.\n *\n * Removes all event listeners that were added through this handle.\n */\n close() {\n this._emitter.removeAllListeners();\n this.setGroup(null);\n }\n}\n\n/**\n * @callback SelectionHandle~listener\n * @param {Object} event - An object containing details of the event. For\n * `\"change\"` events, this includes the properties `value` (the new\n * value of the selection, or `undefined` if no selection is active),\n * `oldValue` (the previous value of the selection), and `sender` (the\n * `SelectionHandle` instance that made the change).\n */\n\n/**\n * @event SelectionHandle#change\n * @type {object}\n * @property {object} value - The new value of the selection, or `undefined`\n * if no selection is active.\n * @property {object} oldValue - The previous value of the selection.\n * @property {SelectionHandle} sender - The `SelectionHandle` instance that\n * changed the value.\n */\n","export function extend(target, ...sources) {\n for (let i = 0; i < sources.length; i++) {\n let src = sources[i];\n if (typeof(src) === \"undefined\" || src === null)\n continue;\n\n for (let key in src) {\n if (src.hasOwnProperty(key)) {\n target[key] = src[key];\n }\n }\n }\n return target;\n}\n\nexport function checkSorted(list) {\n for (let i = 1; i < list.length; i++) {\n if (list[i] <= list[i-1]) {\n throw new Error(\"List is not sorted or contains duplicate\");\n }\n }\n}\n\nexport function diffSortedLists(a, b) {\n let i_a = 0;\n let i_b = 0;\n\n if (!a) a = [];\n if (!b) b = [];\n\n let a_only = [];\n let b_only = [];\n\n checkSorted(a);\n checkSorted(b);\n\n while (i_a < a.length && i_b < b.length) {\n if (a[i_a] === b[i_b]) {\n i_a++;\n i_b++;\n } else if (a[i_a] < b[i_b]) {\n a_only.push(a[i_a++]);\n } else {\n b_only.push(b[i_b++]);\n }\n }\n\n if (i_a < a.length)\n a_only = a_only.concat(a.slice(i_a));\n if (i_b < b.length)\n b_only = b_only.concat(b.slice(i_b));\n return {\n removed: a_only,\n added: b_only\n };\n}\n\n// Convert from wide: { colA: [1,2,3], colB: [4,5,6], ... }\n// to long: [ {colA: 1, colB: 4}, {colA: 2, colB: 5}, ... ]\nexport function dataframeToD3(df) {\n let names = [];\n let length;\n for (let name in df) {\n if (df.hasOwnProperty(name))\n names.push(name);\n if (typeof(df[name]) !== \"object\" || typeof(df[name].length) === \"undefined\") {\n throw new Error(\"All fields must be arrays\");\n } else if (typeof(length) !== \"undefined\" && length !== df[name].length) {\n throw new Error(\"All fields must be arrays of the same length\");\n }\n length = df[name].length;\n }\n let results = [];\n let item;\n for (let row = 0; row < length; row++) {\n item = {};\n for (let col = 0; col < names.length; col++) {\n item[names[col]] = df[names[col]][row];\n }\n results.push(item);\n }\n return results;\n}\n\n/**\n * Keeps track of all event listener additions/removals and lets all active\n * listeners be removed with a single operation.\n *\n * @private\n */\nexport class SubscriptionTracker {\n constructor(emitter) {\n this._emitter = emitter;\n this._subs = {};\n }\n\n on(eventType, listener) {\n let sub = this._emitter.on(eventType, listener);\n this._subs[sub] = eventType;\n return sub;\n }\n\n off(eventType, listener) {\n let sub = this._emitter.off(eventType, listener);\n if (sub) {\n delete this._subs[sub];\n }\n return sub;\n }\n\n removeAllListeners() {\n let current_subs = this._subs;\n this._subs = {};\n Object.keys(current_subs).forEach((sub) => {\n this._emitter.off(current_subs[sub], sub);\n });\n }\n}\n","import Events from \"./events\";\n\nexport default class Var {\n constructor(group, name, /*optional*/ value) {\n this._group = group;\n this._name = name;\n this._value = value;\n this._events = new Events();\n }\n\n get() {\n return this._value;\n }\n\n set(value, /*optional*/ event) {\n if (this._value === value) {\n // Do nothing; the value hasn't changed\n return;\n }\n let oldValue = this._value;\n this._value = value;\n // Alert JavaScript listeners that the value has changed\n let evt = {};\n if (event && typeof(event) === \"object\") {\n for (let k in event) {\n if (event.hasOwnProperty(k))\n evt[k] = event[k];\n }\n }\n evt.oldValue = oldValue;\n evt.value = value;\n this._events.trigger(\"change\", evt, this);\n\n // TODO: Make this extensible, to let arbitrary back-ends know that\n // something has changed\n if (global.Shiny && global.Shiny.onInputChange) {\n global.Shiny.onInputChange(\n \".clientValue-\" +\n (this._group.name !== null ? this._group.name + \"-\" : \"\") +\n this._name,\n typeof(value) === \"undefined\" ? null : value\n );\n }\n }\n\n on(eventType, listener) {\n return this._events.on(eventType, listener);\n }\n\n off(eventType, listener) {\n return this._events.off(eventType, listener);\n }\n}\n"]} \ No newline at end of file diff --git a/static/rmarkdown-libs/datatables-binding/datatables.js b/static/rmarkdown-libs/datatables-binding/datatables.js deleted file mode 100644 index a21d5692..00000000 --- a/static/rmarkdown-libs/datatables-binding/datatables.js +++ /dev/null @@ -1,1411 +0,0 @@ -(function() { - -// some helper functions: using a global object DTWidget so that it can be used -// in JS() code, e.g. datatable(options = list(foo = JS('code'))); unlike R's -// dynamic scoping, when 'code' is eval()'ed, JavaScript does not know objects -// from the "parent frame", e.g. JS('DTWidget') will not work unless it was made -// a global object -var DTWidget = {}; - -// 123456666.7890 -> 123,456,666.7890 -var markInterval = function(d, digits, interval, mark, decMark, precision) { - x = precision ? d.toPrecision(digits) : d.toFixed(digits); - if (!/^-?[\d.]+$/.test(x)) return x; - var xv = x.split('.'); - if (xv.length > 2) return x; // should have at most one decimal point - xv[0] = xv[0].replace(new RegExp('\\B(?=(\\d{' + interval + '})+(?!\\d))', 'g'), mark); - return xv.join(decMark); -}; - -DTWidget.formatCurrency = function(data, currency, digits, interval, mark, decMark, before) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - var res = markInterval(d, digits, interval, mark, decMark); - res = before ? (/^-/.test(res) ? '-' + currency + res.replace(/^-/, '') : currency + res) : - res + currency; - return res; -}; - -DTWidget.formatString = function(data, prefix, suffix) { - var d = data; - if (d === null) return ''; - return prefix + d + suffix; -}; - -DTWidget.formatPercentage = function(data, digits, interval, mark, decMark) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - return markInterval(d * 100, digits, interval, mark, decMark) + '%'; -}; - -DTWidget.formatRound = function(data, digits, interval, mark, decMark) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - return markInterval(d, digits, interval, mark, decMark); -}; - -DTWidget.formatSignif = function(data, digits, interval, mark, decMark) { - var d = parseFloat(data); - if (isNaN(d)) return ''; - return markInterval(d, digits, interval, mark, decMark, true); -}; - -DTWidget.formatDate = function(data, method, params) { - var d = data; - if (d === null) return ''; - // (new Date('2015-10-28')).toDateString() may return 2015-10-27 because the - // actual time created could be like 'Tue Oct 27 2015 19:00:00 GMT-0500 (CDT)', - // i.e. the date-only string is treated as UTC time instead of local time - if ((method === 'toDateString' || method === 'toLocaleDateString') && /^\d{4,}\D\d{2}\D\d{2}$/.test(d)) { - d = d.split(/\D/); - d = new Date(d[0], d[1] - 1, d[2]); - } else { - d = new Date(d); - } - return d[method].apply(d, params); -}; - -window.DTWidget = DTWidget; - -var transposeArray2D = function(a) { - return a.length === 0 ? a : HTMLWidgets.transposeArray2D(a); -}; - -var crosstalkPluginsInstalled = false; - -function maybeInstallCrosstalkPlugins() { - if (crosstalkPluginsInstalled) - return; - crosstalkPluginsInstalled = true; - - $.fn.dataTable.ext.afnFiltering.push( - function(oSettings, aData, iDataIndex) { - var ctfilter = oSettings.nTable.ctfilter; - if (ctfilter && !ctfilter[iDataIndex]) - return false; - - var ctselect = oSettings.nTable.ctselect; - if (ctselect && !ctselect[iDataIndex]) - return false; - - return true; - } - ); -} - -HTMLWidgets.widget({ - name: "datatables", - type: "output", - renderOnNullValue: true, - initialize: function(el, width, height) { - $(el).html(' '); - return { - data: null, - ctfilterHandle: new crosstalk.FilterHandle(), - ctfilterSubscription: null, - ctselectHandle: new crosstalk.SelectionHandle(), - ctselectSubscription: null - }; - }, - renderValue: function(el, data, instance) { - if (el.offsetWidth === 0 || el.offsetHeight === 0) { - instance.data = data; - return; - } - instance.data = null; - var $el = $(el); - $el.empty(); - - if (data === null) { - $el.append(' '); - // clear previous Shiny inputs (if any) - for (var i in instance.clearInputs) instance.clearInputs[i](); - instance.clearInputs = {}; - return; - } - - var crosstalkOptions = data.crosstalkOptions; - if (!crosstalkOptions) crosstalkOptions = { - 'key': null, 'group': null - }; - if (crosstalkOptions.group) { - maybeInstallCrosstalkPlugins(); - instance.ctfilterHandle.setGroup(crosstalkOptions.group); - instance.ctselectHandle.setGroup(crosstalkOptions.group); - } - - // If we are in a flexdashboard scroll layout then we: - // (a) Always want to use pagination (otherwise we'll have - // a "double scroll bar" effect on the phone); and - // (b) Never want to fill the container (we want the pagination - // level to determine the size of the container) - if (window.FlexDashboard && !window.FlexDashboard.isFillPage()) { - data.options.bPaginate = true; - data.fillContainer = false; - } - - // if we are in the viewer then we always want to fillContainer and - // and autoHideNavigation (unless the user has explicitly set these) - if (window.HTMLWidgets.viewerMode) { - if (!data.hasOwnProperty("fillContainer")) - data.fillContainer = true; - if (!data.hasOwnProperty("autoHideNavigation")) - data.autoHideNavigation = true; - } - - // propagate fillContainer to instance (so we have it in resize) - instance.fillContainer = data.fillContainer; - - var cells = data.data; - - if (cells instanceof Array) cells = transposeArray2D(cells); - - $el.append(data.container); - var $table = $el.find('table'); - if (data.class) $table.addClass(data.class); - if (data.caption) $table.prepend(data.caption); - - if (!data.selection) data.selection = { - mode: 'none', selected: null, target: 'row', selectable: null - }; - if (HTMLWidgets.shinyMode && data.selection.mode !== 'none' && - data.selection.target === 'row+column') { - if ($table.children('tfoot').length === 0) { - $table.append($('')); - $table.find('thead tr').clone().appendTo($table.find('tfoot')); - } - } - - // column filters - var filterRow; - switch (data.filter) { - case 'top': - $table.children('thead').append(data.filterHTML); - filterRow = $table.find('thead tr:last td'); - break; - case 'bottom': - if ($table.children('tfoot').length === 0) { - $table.append($('')); - } - $table.children('tfoot').prepend(data.filterHTML); - filterRow = $table.find('tfoot tr:first td'); - break; - } - - var options = { searchDelay: 1000 }; - if (cells !== null) $.extend(options, { - data: cells - }); - - // options for fillContainer - var bootstrapActive = typeof($.fn.popover) != 'undefined'; - if (instance.fillContainer) { - - // force scrollX/scrollY and turn off autoWidth - options.scrollX = true; - options.scrollY = "100px"; // can be any value, we'll adjust below - - // if we aren't paginating then move around the info/filter controls - // to save space at the bottom and rephrase the info callback - if (data.options.bPaginate === false) { - - // we know how to do this cleanly for bootstrap, not so much - // for other themes/layouts - if (bootstrapActive) { - options.dom = "<'row'<'col-sm-4'i><'col-sm-8'f>>" + - "<'row'<'col-sm-12'tr>>"; - } - - options.fnInfoCallback = function(oSettings, iStart, iEnd, - iMax, iTotal, sPre) { - return Number(iTotal).toLocaleString() + " records"; - }; - } - } - - // auto hide navigation if requested - if (data.autoHideNavigation === true) { - if (bootstrapActive && data.options.bPaginate !== false) { - // strip all nav if length >= cells - if ((cells instanceof Array) && data.options.iDisplayLength >= cells.length) - options.dom = "<'row'<'col-sm-12'tr>>"; - // alternatively lean things out for flexdashboard mobile portrait - else if (window.FlexDashboard && window.FlexDashboard.isMobilePhone()) - options.dom = "<'row'<'col-sm-12'f>>" + - "<'row'<'col-sm-12'tr>>" + - "<'row'<'col-sm-12'p>>"; - } - } - - $.extend(true, options, data.options || {}); - - var searchCols = options.searchCols; - if (searchCols) { - searchCols = searchCols.map(function(x) { - return x === null ? '' : x.search; - }); - // FIXME: this means I don't respect the escapeRegex setting - delete options.searchCols; - } - - // server-side processing? - var server = options.serverSide === true; - - // use the dataSrc function to pre-process JSON data returned from R - var DT_rows_all = [], DT_rows_current = []; - if (server && HTMLWidgets.shinyMode && typeof options.ajax === 'object' && - /^session\/[\da-z]+\/dataobj/.test(options.ajax.url) && !options.ajax.dataSrc) { - options.ajax.dataSrc = function(json) { - DT_rows_all = $.makeArray(json.DT_rows_all); - DT_rows_current = $.makeArray(json.DT_rows_current); - var data = json.data; - if (!colReorderEnabled()) return data; - var table = $table.DataTable(), order = table.colReorder.order(), flag = true, i, j, row; - for (i = 0; i < order.length; ++i) if (order[i] !== i) flag = false; - if (flag) return data; - for (i = 0; i < data.length; ++i) { - row = data[i].slice(); - for (j = 0; j < order.length; ++j) data[i][j] = row[order[j]]; - } - return data; - }; - } - - var thiz = this; - if (instance.fillContainer) $table.on('init.dt', function(e) { - thiz.fillAvailableHeight(el, $(el).innerHeight()); - }); - // If the page contains serveral datatables and one of which enables colReorder, - // the table.colReorder.order() function will exist but throws error when called. - // So it seems like the only way to know if colReorder is enabled or not is to - // check the options. - var colReorderEnabled = function() { return "colReorder" in options; }; - var table = $table.DataTable(options); - $el.data('datatable', table); - - // Unregister previous Crosstalk event subscriptions, if they exist - if (instance.ctfilterSubscription) { - instance.ctfilterHandle.off("change", instance.ctfilterSubscription); - instance.ctfilterSubscription = null; - } - if (instance.ctselectSubscription) { - instance.ctselectHandle.off("change", instance.ctselectSubscription); - instance.ctselectSubscription = null; - } - - if (!crosstalkOptions.group) { - $table[0].ctfilter = null; - $table[0].ctselect = null; - } else { - var key = crosstalkOptions.key; - function keysToMatches(keys) { - if (!keys) { - return null; - } else { - var selectedKeys = {}; - for (var i = 0; i < keys.length; i++) { - selectedKeys[keys[i]] = true; - } - var matches = {}; - for (var j = 0; j < key.length; j++) { - if (selectedKeys[key[j]]) - matches[j] = true; - } - return matches; - } - } - - function applyCrosstalkFilter(e) { - $table[0].ctfilter = keysToMatches(e.value); - table.draw(); - } - instance.ctfilterSubscription = instance.ctfilterHandle.on("change", applyCrosstalkFilter); - applyCrosstalkFilter({value: instance.ctfilterHandle.filteredKeys}); - - function applyCrosstalkSelection(e) { - if (e.sender !== instance.ctselectHandle) { - table - .rows('.' + selClass, {search: 'applied'}) - .nodes() - .to$() - .removeClass(selClass); - if (selectedRows) - changeInput('rows_selected', selectedRows(), void 0, true); - } - - if (e.sender !== instance.ctselectHandle && e.value && e.value.length) { - var matches = keysToMatches(e.value); - - // persistent selection with plotly (& leaflet) - var ctOpts = crosstalk.var("plotlyCrosstalkOpts").get() || {}; - if (ctOpts.persistent === true) { - var matches = $.extend(matches, $table[0].ctselect); - } - - $table[0].ctselect = matches; - table.draw(); - } else { - if ($table[0].ctselect) { - $table[0].ctselect = null; - table.draw(); - } - } - } - instance.ctselectSubscription = instance.ctselectHandle.on("change", applyCrosstalkSelection); - // TODO: This next line doesn't seem to work when renderDataTable is used - applyCrosstalkSelection({value: instance.ctselectHandle.value}); - } - - var inArray = function(val, array) { - return $.inArray(val, $.makeArray(array)) > -1; - }; - - // encode + to %2B when searching in the table on server side, because - // shiny::parseQueryString() treats + as spaces, and DataTables does not - // encode + to %2B (or % to %25) when sending the request - var encode_plus = function(x) { - return server ? x.replace(/%/g, '%25').replace(/\+/g, '%2B') : x; - }; - - // search the i-th column - var searchColumn = function(i, value) { - var regex = false, ci = true; - if (options.search) { - regex = options.search.regex, - ci = options.search.caseInsensitive !== false; - } - return table.column(i).search(encode_plus(value), regex, !regex, ci); - }; - - if (data.filter !== 'none') { - - filterRow.each(function(i, td) { - - var $td = $(td), type = $td.data('type'), filter; - var $input = $td.children('div').first().children('input'); - $input.prop('disabled', !table.settings()[0].aoColumns[i].bSearchable || type === 'disabled'); - $input.on('input blur', function() { - $input.next('span').toggle(Boolean($input.val())); - }); - // Bootstrap sets pointer-events to none and we won't be able to click - // the clear button - $input.next('span').css('pointer-events', 'auto').hide().click(function() { - $(this).hide().prev('input').val('').trigger('input').focus(); - }); - var searchCol; // search string for this column - if (searchCols && searchCols[i]) { - searchCol = searchCols[i]; - $input.val(searchCol).trigger('input'); - } - var $x = $td.children('div').last(); - - // remove the overflow: hidden attribute of the scrollHead - // (otherwise the scrolling table body obscures the filters) - // The workaround and the discussion from - // https://github.com/rstudio/DT/issues/554#issuecomment-518007347 - // Otherwise the filter selection will not be anchored to the values - // when the columns number is many and scrollX is enabled. - var scrollHead = $(el).find('.dataTables_scrollHead,.dataTables_scrollFoot'); - var cssOverflowHead = scrollHead.css('overflow'); - var scrollBody = $(el).find('.dataTables_scrollBody'); - var cssOverflowBody = scrollBody.css('overflow'); - var scrollTable = $(el).find('.dataTables_scroll'); - var cssOverflowTable = scrollTable.css('overflow'); - if (cssOverflowHead === 'hidden') { - $x.on('show hide', function(e) { - if (e.type === 'show') { - scrollHead.css('overflow', 'visible'); - scrollBody.css('overflow', 'visible'); - scrollTable.css('overflow-x', 'scroll'); - } else { - scrollHead.css('overflow', cssOverflowHead); - scrollBody.css('overflow', cssOverflowBody); - scrollTable.css('overflow-x', cssOverflowTable); - } - }); - $x.css('z-index', 25); - } - - if (inArray(type, ['factor', 'logical'])) { - $input.on({ - click: function() { - $input.parent().hide(); $x.show().trigger('show'); filter[0].selectize.focus(); - }, - input: function() { - if ($input.val() === '') filter[0].selectize.setValue([]); - } - }); - var $input2 = $x.children('select'); - filter = $input2.selectize({ - options: $input2.data('options').map(function(v, i) { - return ({text: v, value: v}); - }), - plugins: ['remove_button'], - hideSelected: true, - onChange: function(value) { - if (value === null) value = []; // compatibility with jQuery 3.0 - $input.val(value.length ? JSON.stringify(value) : ''); - if (value.length) $input.trigger('input'); - $input.attr('title', $input.val()); - if (server) { - table.column(i).search(value.length ? encode_plus(JSON.stringify(value)) : '').draw(); - return; - } - // turn off filter if nothing selected - $td.data('filter', value.length > 0); - table.draw(); // redraw table, and filters will be applied - } - }); - if (searchCol) filter[0].selectize.setValue(JSON.parse(searchCol)); - filter[0].selectize.on('blur', function() { - $x.hide().trigger('hide'); $input.parent().show(); $input.trigger('blur'); - }); - filter.next('div').css('margin-bottom', 'auto'); - } else if (type === 'character') { - var fun = function() { - searchColumn(i, $input.val()).draw(); - }; - if (server) { - fun = $.fn.dataTable.util.throttle(fun, options.searchDelay); - } - $input.on('input', fun); - } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { - var $x0 = $x; - $x = $x0.children('div').first(); - $x0.css({ - 'background-color': '#fff', - 'border': '1px #ddd solid', - 'border-radius': '4px', - 'padding': '20px 20px 10px 20px' - }); - var $spans = $x0.children('span').css({ - 'margin-top': '10px', - 'white-space': 'nowrap' - }); - var $span1 = $spans.first(), $span2 = $spans.last(); - var r1 = +$x.data('min'), r2 = +$x.data('max'); - // when the numbers are too small or have many decimal places, the - // slider may have numeric precision problems (#150) - var scale = Math.pow(10, Math.max(0, +$x.data('scale') || 0)); - r1 = Math.round(r1 * scale); r2 = Math.round(r2 * scale); - var scaleBack = function(x, scale) { - if (scale === 1) return x; - var d = Math.round(Math.log(scale) / Math.log(10)); - // to avoid problems like 3.423/100 -> 0.034230000000000003 - return (x / scale).toFixed(d); - }; - var slider_min = function() { - return filter.noUiSlider('options').range.min; - }; - var slider_max = function() { - return filter.noUiSlider('options').range.max; - }; - $input.on({ - focus: function() { - $x0.show().trigger('show'); - // first, make sure the slider div leaves at least 20px between - // the two (slider value) span's - $x0.width(Math.max(160, $span1.outerWidth() + $span2.outerWidth() + 20)); - // then, if the input is really wide, make the slider the same - // width as the input - if ($x0.outerWidth() < $input.outerWidth()) { - $x0.outerWidth($input.outerWidth()); - } - // make sure the slider div does not reach beyond the right margin - if ($(window).width() < $x0.offset().left + $x0.width()) { - $x0.offset({ - 'left': $input.offset().left + $input.outerWidth() - $x0.outerWidth() - }); - } - }, - blur: function() { - $x0.hide().trigger('hide'); - }, - input: function() { - if ($input.val() === '') filter.val([slider_min(), slider_max()]); - }, - change: function() { - var v = $input.val().replace(/\s/g, ''); - if (v === '') return; - v = v.split('...'); - if (v.length !== 2) { - $input.parent().addClass('has-error'); - return; - } - if (v[0] === '') v[0] = slider_min(); - if (v[1] === '') v[1] = slider_max(); - $input.parent().removeClass('has-error'); - // treat date as UTC time at midnight - var strTime = function(x) { - var s = type === 'date' ? 'T00:00:00Z' : ''; - var t = new Date(x + s).getTime(); - // add 10 minutes to date since it does not hurt the date, and - // it helps avoid the tricky floating point arithmetic problems, - // e.g. sometimes the date may be a few milliseconds earlier - // than the midnight due to precision problems in noUiSlider - return type === 'date' ? t + 3600000 : t; - }; - if (inArray(type, ['date', 'time'])) { - v[0] = strTime(v[0]); - v[1] = strTime(v[1]); - } - if (v[0] != slider_min()) v[0] *= scale; - if (v[1] != slider_max()) v[1] *= scale; - filter.val(v); - } - }); - var formatDate = function(d, isoFmt) { - d = scaleBack(d, scale); - if (type === 'number') return d; - if (type === 'integer') return parseInt(d); - var x = new Date(+d); - var fmt = ('filterDateFmt' in data) ? data.filterDateFmt[i] : undefined; - if (fmt !== undefined && isoFmt === false) return x[fmt.method].apply(x, fmt.params); - if (type === 'date') { - var pad0 = function(x) { - return ('0' + x).substr(-2, 2); - }; - return x.getUTCFullYear() + '-' + pad0(1 + x.getUTCMonth()) - + '-' + pad0(x.getUTCDate()); - } else { - return x.toISOString(); - } - }; - var opts = type === 'date' ? { step: 60 * 60 * 1000 } : - type === 'integer' ? { step: 1 } : {}; - filter = $x.noUiSlider($.extend({ - start: [r1, r2], - range: {min: r1, max: r2}, - connect: true - }, opts)); - if (scale > 1) (function() { - var t1 = r1, t2 = r2; - var val = filter.val(); - while (val[0] > r1 || val[1] < r2) { - if (val[0] > r1) { - t1 -= val[0] - r1; - } - if (val[1] < r2) { - t2 += r2 - val[1]; - } - filter = $x.noUiSlider($.extend({ - start: [t1, t2], - range: {min: t1, max: t2}, - connect: true - }, opts), true); - val = filter.val(); - } - r1 = t1; r2 = t2; - })(); - var updateSliderText = function(v1, v2) { - $span1.text(formatDate(v1, false)); $span2.text(formatDate(v2, false)); - }; - updateSliderText(r1, r2); - var updateSlider = function(e) { - var val = filter.val(); - // turn off filter if in full range - $td.data('filter', val[0] > slider_min() || val[1] < slider_max()); - var v1 = formatDate(val[0]), v2 = formatDate(val[1]), ival; - if ($td.data('filter')) { - ival = v1 + ' ... ' + v2; - $input.attr('title', ival).val(ival).trigger('input'); - } else { - $input.attr('title', '').val(''); - } - updateSliderText(val[0], val[1]); - if (e.type === 'slide') return; // no searching when sliding only - if (server) { - table.column(i).search($td.data('filter') ? ival : '').draw(); - return; - } - table.draw(); - }; - filter.on({ - set: updateSlider, - slide: updateSlider - }); - } - - // server-side processing will be handled by R (or whatever server - // language you use); the following code is only needed for client-side - // processing - if (server) { - // if a search string has been pre-set, search now - if (searchCol) searchColumn(i, searchCol).draw(); - return; - } - - var customFilter = function(settings, data, dataIndex) { - // there is no way to attach a search function to a specific table, - // and we need to make sure a global search function is not applied to - // all tables (i.e. a range filter in a previous table should not be - // applied to the current table); we use the settings object to - // determine if we want to perform searching on the current table, - // since settings.sTableId will be different to different tables - if (table.settings()[0] !== settings) return true; - // no filter on this column or no need to filter this column - if (typeof filter === 'undefined' || !$td.data('filter')) return true; - - var r = filter.val(), v, r0, r1; - var i_data = function(i) { - if (!colReorderEnabled()) return i; - var order = table.colReorder.order(), k; - for (k = 0; k < order.length; ++k) if (order[k] === i) return k; - return i; // in theory it will never be here... - } - v = data[i_data(i)]; - if (type === 'number' || type === 'integer') { - v = parseFloat(v); - // how to handle NaN? currently exclude these rows - if (isNaN(v)) return(false); - r0 = parseFloat(scaleBack(r[0], scale)) - r1 = parseFloat(scaleBack(r[1], scale)); - if (v >= r0 && v <= r1) return true; - } else if (type === 'date' || type === 'time') { - v = new Date(v); - r0 = new Date(r[0] / scale); r1 = new Date(r[1] / scale); - if (v >= r0 && v <= r1) return true; - } else if (type === 'factor') { - if (r.length === 0 || inArray(v, r)) return true; - } else if (type === 'logical') { - if (r.length === 0) return true; - if (inArray(v === '' ? 'na' : v, r)) return true; - } - return false; - }; - - $.fn.dataTable.ext.search.push(customFilter); - - // search for the preset search strings if it is non-empty - if (searchCol) { - if (inArray(type, ['factor', 'logical'])) { - filter[0].selectize.setValue(JSON.parse(searchCol)); - } else if (type === 'character') { - $input.trigger('input'); - } else if (inArray(type, ['number', 'integer', 'date', 'time'])) { - $input.trigger('change'); - } - } - - }); - - } - - // highlight search keywords - var highlight = function() { - var body = $(table.table().body()); - // removing the old highlighting first - body.unhighlight(); - - // don't highlight the "not found" row, so we get the rows using the api - if (table.rows({ filter: 'applied' }).data().length === 0) return; - // highlight global search keywords - body.highlight($.trim(table.search()).split(/\s+/)); - // then highlight keywords from individual column filters - if (filterRow) filterRow.each(function(i, td) { - var $td = $(td), type = $td.data('type'); - if (type !== 'character') return; - var $input = $td.children('div').first().children('input'); - var column = table.column(i).nodes().to$(), - val = $.trim($input.val()); - if (type !== 'character' || val === '') return; - column.highlight(val.split(/\s+/)); - }); - }; - - if (options.searchHighlight) { - table - .on('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth', highlight) - .on('destroy', function() { - // remove event handler - table.off('draw.dt.dth column-visibility.dt.dth column-reorder.dt.dth'); - }); - - // Set the option for escaping regex characters in our search string. This will be used - // for all future matching. - jQuery.fn.highlight.options.escapeRegex = (!options.search || !options.search.regex); - - // initial highlight for state saved conditions and initial states - highlight(); - } - - // run the callback function on the table instance - if (typeof data.callback === 'function') data.callback(table); - - // double click to edit the cell, row, column, or all cells - if (data.editable) table.on('dblclick.dt', 'tbody td', function(e) { - // only bring up the editor when the cell itself is dbclicked, and ignore - // other dbclick events bubbled up (e.g. from the ) - if (e.target !== this) return; - var target = [], immediate = false; - switch (data.editable.target) { - case 'cell': - target = [this]; - immediate = true; // edit will take effect immediately - break; - case 'row': - target = table.cells(table.cell(this).index().row, '*').nodes(); - break; - case 'column': - target = table.cells('*', table.cell(this).index().column).nodes(); - break; - case 'all': - target = table.cells().nodes(); - break; - default: - throw 'The editable parameter must be "cell", "row", "column", or "all"'; - } - var disableCols = data.editable.disable ? data.editable.disable.columns : null; - for (var i = 0; i < target.length; i++) { - (function(cell, current) { - var $cell = $(cell), html = $cell.html(); - var _cell = table.cell(cell), value = _cell.data(); - var $input = $(''), changed = false; - if (!immediate) { - $cell.data('input', $input).data('html', html); - $input.attr('title', 'Hit Ctrl+Enter to finish editing, or Esc to cancel'); - } - $input.val(value); - if (inArray(_cell.index().column, disableCols)) { - $input.attr('readonly', '').css('filter', 'invert(25%)'); - } - $cell.empty().append($input); - if (cell === current) $input.focus(); - $input.css('width', '100%'); - - if (immediate) $input.on('change', function() { - changed = true; - var valueNew = $input.val(); - if (valueNew != value) { - _cell.data(valueNew); - if (HTMLWidgets.shinyMode) { - changeInput('cell_edit', [cellInfo(cell)], 'DT.cellInfo', null, {priority: "event"}); - } - // for server-side processing, users have to call replaceData() to update the table - if (!server) table.draw(false); - } else { - $cell.html(html); - } - $input.remove(); - }).on('blur', function() { - if (!changed) $input.trigger('change'); - }).on('keyup', function(e) { - // hit Escape to cancel editing - if (e.keyCode === 27) $input.trigger('blur'); - }); - - // bulk edit (row, column, or all) - if (!immediate) $input.on('keyup', function(e) { - var removeInput = function($cell, restore) { - $cell.data('input').remove(); - if (restore) $cell.html($cell.data('html')); - } - if (e.keyCode === 27) { - for (var i = 0; i < target.length; i++) { - removeInput($(target[i]), true); - } - } else if (e.keyCode === 13 && e.ctrlKey) { - // Ctrl + Enter - var cell, $cell, _cell, cellData = []; - for (var i = 0; i < target.length; i++) { - cell = target[i]; $cell = $(cell); _cell = table.cell(cell); - _cell.data($cell.data('input').val()); - HTMLWidgets.shinyMode && cellData.push(cellInfo(cell)); - removeInput($cell, false); - } - if (HTMLWidgets.shinyMode) { - changeInput('cell_edit', cellData, 'DT.cellInfo', null, {priority: "event"}); - } - if (!server) table.draw(false); - } - }); - })(target[i], this); - } - }); - - // interaction with shiny - if (!HTMLWidgets.shinyMode && !crosstalkOptions.group) return; - - var methods = {}; - var shinyData = {}; - - methods.updateCaption = function(caption) { - if (!caption) return; - $table.children('caption').replaceWith(caption); - } - - // register clear functions to remove input values when the table is removed - instance.clearInputs = {}; - - var changeInput = function(id, value, type, noCrosstalk, opts) { - var event = id; - id = el.id + '_' + id; - if (type) id = id + ':' + type; - // do not update if the new value is the same as old value - if (event !== 'cell_edit' && !/_clicked$/.test(event) && shinyData.hasOwnProperty(id) && shinyData[id] === JSON.stringify(value)) - return; - shinyData[id] = JSON.stringify(value); - if (HTMLWidgets.shinyMode && Shiny.setInputValue) { - Shiny.setInputValue(id, value, opts); - if (!instance.clearInputs[id]) instance.clearInputs[id] = function() { - Shiny.setInputValue(id, null); - } - } - - // HACK - if (event === "rows_selected" && !noCrosstalk) { - if (crosstalkOptions.group) { - var keys = crosstalkOptions.key; - var selectedKeys = null; - if (value) { - selectedKeys = []; - for (var i = 0; i < value.length; i++) { - // The value array's contents use 1-based row numbers, so we must - // convert to 0-based before indexing into the keys array. - selectedKeys.push(keys[value[i] - 1]); - } - } - instance.ctselectHandle.set(selectedKeys); - } - } - }; - - var addOne = function(x) { - return x.map(function(i) { return 1 + i; }); - }; - - var unique = function(x) { - var ux = []; - $.each(x, function(i, el){ - if ($.inArray(el, ux) === -1) ux.push(el); - }); - return ux; - } - - // change the row index of a cell - var tweakCellIndex = function(cell) { - var info = cell.index(); - // some cell may not be valid. e.g, #759 - // when using the RowGroup extension, datatables will - // generate the row label and the cells are not part of - // the data thus contain no row/col info - if (info === undefined) - return {row: null, col: null}; - if (server) { - info.row = DT_rows_current[info.row]; - } else { - info.row += 1; - } - return {row: info.row, col: info.column}; - } - - // a flag to indicates if select extension is initialized or not - var flagSelectExt = table.settings()[0]._select !== undefined; - // the Select extension should only be used in the client mode and - // when the selection.mode is set to none - if (data.selection.mode === 'none' && !server && flagSelectExt) { - var updateRowsSelected = function() { - var rows = table.rows({selected: true}); - var selected = []; - $.each(rows.indexes().toArray(), function(i, v) { - selected.push(v + 1); - }); - changeInput('rows_selected', selected); - } - var updateColsSelected = function() { - var columns = table.columns({selected: true}); - changeInput('columns_selected', columns.indexes().toArray()); - } - var updateCellsSelected = function() { - var cells = table.cells({selected: true}); - var selected = []; - cells.every(function() { - var row = this.index().row; - var col = this.index().column; - selected = selected.concat([[row + 1, col]]); - }); - changeInput('cells_selected', transposeArray2D(selected), 'shiny.matrix'); - } - table.on('select deselect', function(e, dt, type, indexes) { - updateRowsSelected(); - updateColsSelected(); - updateCellsSelected(); - }) - } - - var selMode = data.selection.mode, selTarget = data.selection.target; - var selDisable = data.selection.selectable === false; - if (inArray(selMode, ['single', 'multiple'])) { - var selClass = inArray(data.style, ['bootstrap', 'bootstrap4']) ? 'active' : 'selected'; - // selected1: row indices; selected2: column indices - var initSel = function(x) { - if (x === null || typeof x === 'boolean' || selTarget === 'cell') { - return {rows: [], cols: []}; - } else if (selTarget === 'row') { - return {rows: $.makeArray(x), cols: []}; - } else if (selTarget === 'column') { - return {rows: [], cols: $.makeArray(x)}; - } else if (selTarget === 'row+column') { - return {rows: $.makeArray(x.rows), cols: $.makeArray(x.cols)}; - } - } - var selected = data.selection.selected; - var selected1 = initSel(selected).rows, selected2 = initSel(selected).cols; - // selectable should contain either all positive or all non-positive values, not both - // positive values indicate "selectable" while non-positive values means "nonselectable" - // the assertion is performed on R side. (only column indicides could be zero which indicates - // the row name) - var selectable = data.selection.selectable; - var selectable1 = initSel(selectable).rows, selectable2 = initSel(selectable).cols; - - // After users reorder the rows or filter the table, we cannot use the table index - // directly. Instead, we need this function to find out the rows between the two clicks. - // If user filter the table again between the start click and the end click, the behavior - // would be undefined, but it should not be a problem. - var shiftSelRowsIndex = function(start, end) { - var indexes = server ? DT_rows_all : table.rows({ search: 'applied' }).indexes().toArray(); - start = indexes.indexOf(start); end = indexes.indexOf(end); - // if start is larger than end, we need to swap - if (start > end) { - var tmp = end; end = start; start = tmp; - } - return indexes.slice(start, end + 1); - } - - var serverRowIndex = function(clientRowIndex) { - return server ? DT_rows_current[clientRowIndex] : clientRowIndex + 1; - } - - // row, column, or cell selection - var lastClickedRow; - if (inArray(selTarget, ['row', 'row+column'])) { - // Get the current selected rows. It will also - // update the selected1's value based on the current row selection state - // Note we can't put this function inside selectRows() directly, - // the reason is method.selectRows() will override selected1's value but this - // function will add rows to selected1 (keep the existing selection), which is - // inconsistent with column and cell selection. - var selectedRows = function() { - var rows = table.rows('.' + selClass); - var idx = rows.indexes().toArray(); - if (!server) { - selected1 = addOne(idx); - return selected1; - } - idx = idx.map(function(i) { - return DT_rows_current[i]; - }); - selected1 = selMode === 'multiple' ? unique(selected1.concat(idx)) : idx; - return selected1; - } - // Change selected1's value based on selectable1, then refresh the row state - var onlyKeepSelectableRows = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected1 = []; - return; - } - if (selectable1.length === 0) return; - var nonselectable = selectable1[0] <= 0; - if (nonselectable) { - // should make selectable1 positive - selected1 = $(selected1).not(selectable1.map(function(i) { return -i; })).get(); - } else { - selected1 = $(selected1).filter(selectable1).get(); - } - } - // Change selected1's value based on selectable1, then - // refresh the row selection state according to values in selected1 - var selectRows = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableRows(); - table.$('tr.' + selClass).removeClass(selClass); - if (selected1.length === 0) return; - if (server) { - table.rows({page: 'current'}).every(function() { - if (inArray(DT_rows_current[this.index()], selected1)) { - $(this.node()).addClass(selClass); - } - }); - } else { - var selected0 = selected1.map(function(i) { return i - 1; }); - $(table.rows(selected0).nodes()).addClass(selClass); - } - } - table.on('mousedown.dt', 'tbody tr', function(e) { - var $this = $(this), thisRow = table.row(this); - if (selMode === 'multiple') { - if (e.shiftKey && lastClickedRow !== undefined) { - // select or de-select depends on the last clicked row's status - var flagSel = !$this.hasClass(selClass); - var crtClickedRow = serverRowIndex(thisRow.index()); - if (server) { - var rowsIndex = shiftSelRowsIndex(lastClickedRow, crtClickedRow); - // update current page's selClass - rowsIndex.map(function(i) { - var rowIndex = DT_rows_current.indexOf(i); - if (rowIndex >= 0) { - var row = table.row(rowIndex).nodes().to$(); - var flagRowSel = !row.hasClass(selClass); - if (flagSel === flagRowSel) row.toggleClass(selClass); - } - }); - // update selected1 - if (flagSel) { - selected1 = unique(selected1.concat(rowsIndex)); - } else { - selected1 = selected1.filter(function(index) { - return !inArray(index, rowsIndex); - }); - } - } else { - // js starts from 0 - shiftSelRowsIndex(lastClickedRow - 1, crtClickedRow - 1).map(function(value) { - var row = table.row(value).nodes().to$(); - var flagRowSel = !row.hasClass(selClass); - if (flagSel === flagRowSel) row.toggleClass(selClass); - }); - } - e.preventDefault(); - } else { - $this.toggleClass(selClass); - } - } else { - if ($this.hasClass(selClass)) { - $this.removeClass(selClass); - } else { - table.$('tr.' + selClass).removeClass(selClass); - $this.addClass(selClass); - } - } - if (server && !$this.hasClass(selClass)) { - var id = DT_rows_current[thisRow.index()]; - // remove id from selected1 since its class .selected has been removed - if (inArray(id, selected1)) selected1.splice($.inArray(id, selected1), 1); - } - selectedRows(); // update selected1's value based on selClass - selectRows(false); // only keep the selectable rows - changeInput('rows_selected', selected1); - changeInput('row_last_clicked', serverRowIndex(thisRow.index()), null, null, {priority: 'event'}); - lastClickedRow = serverRowIndex(thisRow.index()); - }); - selectRows(false); // in case users have specified pre-selected rows - // restore selected rows after the table is redrawn (e.g. sort/search/page); - // client-side tables will preserve the selections automatically; for - // server-side tables, we have to *real* row indices are in `selected1` - changeInput('rows_selected', selected1); - if (server) table.on('draw.dt', function(e) { selectRows(false); }); - methods.selectRows = function(selected, ignoreSelectable) { - selected1 = $.makeArray(selected); - selectRows(ignoreSelectable); - changeInput('rows_selected', selected1); - } - } - - if (inArray(selTarget, ['column', 'row+column'])) { - if (selTarget === 'row+column') { - $(table.columns().footer()).css('cursor', 'pointer'); - } - // update selected2's value based on selectable2 - var onlyKeepSelectableCols = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected2 = []; - return; - } - if (selectable2.length === 0) return; - var nonselectable = selectable2[0] <= 0; - if (nonselectable) { - // need to make selectable2 positive - selected2 = $(selected2).not(selectable2.map(function(i) { return -i; })).get(); - } else { - selected2 = $(selected2).filter(selectable2).get(); - } - } - // update selected2 and then - // refresh the col selection state according to values in selected2 - var selectCols = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableCols(); - // if selected2 is not a valide index (e.g., larger than the column number) - // table.columns(selected2) will fail and result in a blank table - // this is different from the table.rows(), where the out-of-range indexes - // doesn't affect at all - selected2 = $(selected2).filter(table.columns().indexes()).get(); - table.columns().nodes().flatten().to$().removeClass(selClass); - if (selected2.length > 0) - table.columns(selected2).nodes().flatten().to$().addClass(selClass); - } - var callback = function() { - var colIdx = selTarget === 'column' ? table.cell(this).index().column : - $.inArray(this, table.columns().footer()), - thisCol = $(table.column(colIdx).nodes()); - if (colIdx === -1) return; - if (thisCol.hasClass(selClass)) { - thisCol.removeClass(selClass); - selected2.splice($.inArray(colIdx, selected2), 1); - } else { - if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); - thisCol.addClass(selClass); - selected2 = selMode === 'single' ? [colIdx] : unique(selected2.concat([colIdx])); - } - selectCols(false); // update selected2 based on selectable - changeInput('columns_selected', selected2); - } - if (selTarget === 'column') { - $(table.table().body()).on('click.dt', 'td', callback); - } else { - $(table.table().footer()).on('click.dt', 'tr th', callback); - } - selectCols(false); // in case users have specified pre-selected columns - changeInput('columns_selected', selected2); - if (server) table.on('draw.dt', function(e) { selectCols(false); }); - methods.selectColumns = function(selected, ignoreSelectable) { - selected2 = $.makeArray(selected); - selectCols(ignoreSelectable); - changeInput('columns_selected', selected2); - } - } - - if (selTarget === 'cell') { - var selected3 = [], selectable3 = []; - if (selected !== null) selected3 = selected; - if (selectable !== null && typeof selectable !== 'boolean') selectable3 = selectable; - var findIndex = function(ij, sel) { - for (var i = 0; i < sel.length; i++) { - if (ij[0] === sel[i][0] && ij[1] === sel[i][1]) return i; - } - return -1; - } - // Change selected3's value based on selectable3, then refresh the cell state - var onlyKeepSelectableCells = function() { - if (selDisable) { // users can't select; useful when only want backend select - selected3 = []; - return; - } - if (selectable3.length === 0) return; - var nonselectable = selectable3[0][0] <= 0; - var out = []; - if (nonselectable) { - selected3.map(function(ij) { - // should make selectable3 positive - if (findIndex([-ij[0], -ij[1]], selectable3) === -1) { out.push(ij); } - }); - } else { - selected3.map(function(ij) { - if (findIndex(ij, selectable3) > -1) { out.push(ij); } - }); - } - selected3 = out; - } - // Change selected3's value based on selectable3, then - // refresh the cell selection state according to values in selected3 - var selectCells = function(ignoreSelectable) { - if (!ignoreSelectable) onlyKeepSelectableCells(); - table.$('td.' + selClass).removeClass(selClass); - if (selected3.length === 0) return; - if (server) { - table.cells({page: 'current'}).every(function() { - var info = tweakCellIndex(this); - if (findIndex([info.row, info.col], selected3) > -1) - $(this.node()).addClass(selClass); - }); - } else { - selected3.map(function(ij) { - $(table.cell(ij[0] - 1, ij[1]).node()).addClass(selClass); - }); - } - }; - table.on('click.dt', 'tbody td', function() { - var $this = $(this), info = tweakCellIndex(table.cell(this)); - if ($this.hasClass(selClass)) { - $this.removeClass(selClass); - selected3.splice(findIndex([info.row, info.col], selected3), 1); - } else { - if (selMode === 'single') $(table.cells().nodes()).removeClass(selClass); - $this.addClass(selClass); - selected3 = selMode === 'single' ? [[info.row, info.col]] : - unique(selected3.concat([[info.row, info.col]])); - } - selectCells(false); // must call this to update selected3 based on selectable3 - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - }); - selectCells(false); // in case users have specified pre-selected columns - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - - if (server) table.on('draw.dt', function(e) { selectCells(false); }); - methods.selectCells = function(selected, ignoreSelectable) { - selected3 = selected ? selected : []; - selectCells(ignoreSelectable); - changeInput('cells_selected', transposeArray2D(selected3), 'shiny.matrix'); - } - } - } - - // expose some table info to Shiny - var updateTableInfo = function(e, settings) { - // TODO: is anyone interested in the page info? - // changeInput('page_info', table.page.info()); - var updateRowInfo = function(id, modifier) { - var idx; - if (server) { - idx = modifier.page === 'current' ? DT_rows_current : DT_rows_all; - } else { - var rows = table.rows($.extend({ - search: 'applied', - page: 'all' - }, modifier)); - idx = addOne(rows.indexes().toArray()); - } - changeInput('rows' + '_' + id, idx); - }; - updateRowInfo('current', {page: 'current'}); - updateRowInfo('all', {}); - } - table.on('draw.dt', updateTableInfo); - updateTableInfo(); - - // state info - table.on('draw.dt column-visibility.dt', function() { - changeInput('state', table.state()); - }); - changeInput('state', table.state()); - - // search info - var updateSearchInfo = function() { - changeInput('search', table.search()); - if (filterRow) changeInput('search_columns', filterRow.toArray().map(function(td) { - return $(td).find('input').first().val(); - })); - } - table.on('draw.dt', updateSearchInfo); - updateSearchInfo(); - - var cellInfo = function(thiz) { - var info = tweakCellIndex(table.cell(thiz)); - info.value = table.cell(thiz).data(); - return info; - } - // the current cell clicked on - table.on('click.dt', 'tbody td', function() { - changeInput('cell_clicked', cellInfo(this), null, null, {priority: 'event'}); - }) - changeInput('cell_clicked', {}); - - // do not trigger table selection when clicking on links unless they have classes - table.on('click.dt', 'tbody td a', function(e) { - if (this.className === '') e.stopPropagation(); - }); - - methods.addRow = function(data, rowname) { - var data0 = table.row(0).data(), n = data0.length, d = n - data.length; - if (d === 1) { - data = rowname.concat(data) - } else if (d !== 0) { - console.log(data); - console.log(data0); - throw 'New data must be of the same length as current data (' + n + ')'; - }; - table.row.add(data).draw(); - } - - methods.updateSearch = function(keywords) { - if (keywords.global !== null) - $(table.table().container()).find('input[type=search]').first() - .val(keywords.global).trigger('input'); - var columns = keywords.columns; - if (!filterRow || columns === null) return; - filterRow.toArray().map(function(td, i) { - var v = typeof columns === 'string' ? columns : columns[i]; - if (typeof v === 'undefined') { - console.log('The search keyword for column ' + i + ' is undefined') - return; - } - $(td).find('input').first().val(v); - searchColumn(i, v); - }); - table.draw(); - } - - methods.hideCols = function(hide, reset) { - if (reset) table.columns().visible(true, false); - table.columns(hide).visible(false); - } - - methods.showCols = function(show, reset) { - if (reset) table.columns().visible(false, false); - table.columns(show).visible(true); - } - - methods.colReorder = function(order, origOrder) { - table.colReorder.order(order, origOrder); - } - - methods.selectPage = function(page) { - if (table.page.info().pages < page || page < 1) { - throw 'Selected page is out of range'; - }; - table.page(page - 1).draw(false); - } - - methods.reloadData = function(resetPaging, clearSelection) { - // empty selections first if necessary - if (methods.selectRows && inArray('row', clearSelection)) methods.selectRows([]); - if (methods.selectColumns && inArray('column', clearSelection)) methods.selectColumns([]); - if (methods.selectCells && inArray('cell', clearSelection)) methods.selectCells([]); - table.ajax.reload(null, resetPaging); - } - - table.shinyMethods = methods; - }, - resize: function(el, width, height, instance) { - if (instance.data) this.renderValue(el, instance.data, instance); - - // dynamically adjust height if fillContainer = TRUE - if (instance.fillContainer) - this.fillAvailableHeight(el, height); - - this.adjustWidth(el); - }, - - // dynamically set the scroll body to fill available height - // (used with fillContainer = TRUE) - fillAvailableHeight: function(el, availableHeight) { - - // see how much of the table is occupied by header/footer elements - // and use that to compute a target scroll body height - var dtWrapper = $(el).find('div.dataTables_wrapper'); - var dtScrollBody = $(el).find($('div.dataTables_scrollBody')); - var framingHeight = dtWrapper.innerHeight() - dtScrollBody.innerHeight(); - var scrollBodyHeight = availableHeight - framingHeight; - - // set the height - dtScrollBody.height(scrollBodyHeight + 'px'); - }, - - // adjust the width of columns; remove the hard-coded widths on table and the - // scroll header when scrollX/Y are enabled - adjustWidth: function(el) { - var $el = $(el), table = $el.data('datatable'); - if (table) table.columns.adjust(); - $el.find('.dataTables_scrollHeadInner').css('width', '') - .children('table').css('margin-left', ''); - } -}); - - if (!HTMLWidgets.shinyMode) return; - - Shiny.addCustomMessageHandler('datatable-calls', function(data) { - var id = data.id; - var el = document.getElementById(id); - var table = el ? $(el).data('datatable') : null; - if (!table) { - console.log("Couldn't find table with id " + id); - return; - } - - var methods = table.shinyMethods, call = data.call; - if (methods[call.method]) { - methods[call.method].apply(table, call.args); - } else { - console.log("Unknown method " + call.method); - } - }); - -})(); diff --git a/static/rmarkdown-libs/datatables-css/datatables-crosstalk.css b/static/rmarkdown-libs/datatables-css/datatables-crosstalk.css deleted file mode 100644 index fb5bae84..00000000 --- a/static/rmarkdown-libs/datatables-css/datatables-crosstalk.css +++ /dev/null @@ -1,23 +0,0 @@ -.dt-crosstalk-fade { - opacity: 0.2; -} - -html body div.DTS div.dataTables_scrollBody { - background: none; -} - - -/* -Fix https://github.com/rstudio/DT/issues/563 -If the `table.display` is set to "block" (e.g., pkgdown), the browser will display -datatable objects strangely. The search panel and the page buttons will still be -in full-width but the table body will be "compact" and shorter. -In therory, having this attributes will affect `dom="t"` -with `display: block` users. But in reality, there should be no one. -We may remove the below lines in the future if the upstream agree to have this there. -See https://github.com/DataTables/DataTablesSrc/issues/160 -*/ - -table.dataTable { - display: table; -} diff --git a/static/rmarkdown-libs/dt-core/css/jquery.dataTables.extra.css b/static/rmarkdown-libs/dt-core/css/jquery.dataTables.extra.css deleted file mode 100644 index b2dd141f..00000000 --- a/static/rmarkdown-libs/dt-core/css/jquery.dataTables.extra.css +++ /dev/null @@ -1,28 +0,0 @@ -/* Selected rows/cells */ -table.dataTable tr.selected td, table.dataTable td.selected { - background-color: #b0bed9 !important; -} -/* In case of scrollX/Y or FixedHeader */ -.dataTables_scrollBody .dataTables_sizing { - visibility: hidden; -} - -/* The datatables' theme CSS file doesn't define -the color but with white background. It leads to an issue that -when the HTML's body color is set to 'white', the user can't -see the text since the background is white. One case happens in the -RStudio's IDE when inline viewing the DT table inside an Rmd file, -if the IDE theme is set to "Cobalt". - -See https://github.com/rstudio/DT/issues/447 for more info - -This fixes should have little side-effects because all the other elements -of the default theme use the #333 font color. - -TODO: The upstream may use relative colors for both the table background -and the color. It means the table can display well without this patch -then. At that time, we need to remove the below CSS attributes. -*/ -div.datatables { - color: #333; -} diff --git a/static/rmarkdown-libs/dt-core/css/jquery.dataTables.min.css b/static/rmarkdown-libs/dt-core/css/jquery.dataTables.min.css deleted file mode 100644 index 71ae98a4..00000000 --- a/static/rmarkdown-libs/dt-core/css/jquery.dataTables.min.css +++ /dev/null @@ -1 +0,0 @@ -table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px 18px;border-bottom:1px solid #111}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 18px 6px 18px;border-top:1px solid #111}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;*cursor:hand;background-repeat:no-repeat;background-position:center right}table.dataTable thead .sorting{background-image:url()}table.dataTable thead .sorting_asc{background-image:url()}table.dataTable thead .sorting_desc{background-image:url()}table.dataTable thead .sorting_asc_disabled{background-image:url()}table.dataTable thead .sorting_desc_disabled{background-image:url()}table.dataTable tbody tr{background-color:#ffffff}table.dataTable tbody tr.selected{background-color:#B0BED9}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid #ddd}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid #ddd;border-right:1px solid #ddd}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid #ddd}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe tbody tr.odd,table.dataTable.display tbody tr.odd{background-color:#f9f9f9}table.dataTable.stripe tbody tr.odd.selected,table.dataTable.display tbody tr.odd.selected{background-color:#acbad4}table.dataTable.hover tbody tr:hover,table.dataTable.display tbody tr:hover{background-color:#f6f6f6}table.dataTable.hover tbody tr:hover.selected,table.dataTable.display tbody tr:hover.selected{background-color:#aab7d1}table.dataTable.order-column tbody tr>.sorting_1,table.dataTable.order-column tbody tr>.sorting_2,table.dataTable.order-column tbody tr>.sorting_3,table.dataTable.display tbody tr>.sorting_1,table.dataTable.display tbody tr>.sorting_2,table.dataTable.display tbody tr>.sorting_3{background-color:#fafafa}table.dataTable.order-column tbody tr.selected>.sorting_1,table.dataTable.order-column tbody tr.selected>.sorting_2,table.dataTable.order-column tbody tr.selected>.sorting_3,table.dataTable.display tbody tr.selected>.sorting_1,table.dataTable.display tbody tr.selected>.sorting_2,table.dataTable.display tbody tr.selected>.sorting_3{background-color:#acbad5}table.dataTable.display tbody tr.odd>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd>.sorting_1{background-color:#f1f1f1}table.dataTable.display tbody tr.odd>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd>.sorting_2{background-color:#f3f3f3}table.dataTable.display tbody tr.odd>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd>.sorting_3{background-color:whitesmoke}table.dataTable.display tbody tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_1{background-color:#a6b4cd}table.dataTable.display tbody tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_2{background-color:#a8b5cf}table.dataTable.display tbody tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_3{background-color:#a9b7d1}table.dataTable.display tbody tr.even>.sorting_1,table.dataTable.order-column.stripe tbody tr.even>.sorting_1{background-color:#fafafa}table.dataTable.display tbody tr.even>.sorting_2,table.dataTable.order-column.stripe tbody tr.even>.sorting_2{background-color:#fcfcfc}table.dataTable.display tbody tr.even>.sorting_3,table.dataTable.order-column.stripe tbody tr.even>.sorting_3{background-color:#fefefe}table.dataTable.display tbody tr.even.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_1{background-color:#acbad5}table.dataTable.display tbody tr.even.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_2{background-color:#aebcd6}table.dataTable.display tbody tr.even.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_3{background-color:#afbdd8}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{background-color:#eaeaea}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{background-color:#ececec}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{background-color:#efefef}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{background-color:#a2aec7}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{background-color:#a3b0c9}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{background-color:#a5b2cb}table.dataTable.no-footer{border-bottom:1px solid #111}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:4px 17px 4px 4px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:4px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable,table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both;*zoom:1;zoom:1}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{margin-left:0.5em}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:0.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:0.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:0.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;*cursor:hand;color:#333 !important;border:1px solid transparent;border-radius:2px}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid #979797;background-color:white;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #fff), color-stop(100%, #dcdcdc));background:-webkit-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-moz-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-ms-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:-o-linear-gradient(top, #fff 0%, #dcdcdc 100%);background:linear-gradient(to bottom, #fff 0%, #dcdcdc 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_processing{position:absolute;top:50%;left:50%;width:100%;height:40px;margin-left:-50%;margin-top:-25px;padding-top:20px;text-align:center;font-size:1.2em;background-color:white;background:-webkit-gradient(linear, left top, right top, color-stop(0%, rgba(255,255,255,0)), color-stop(25%, rgba(255,255,255,0.9)), color-stop(75%, rgba(255,255,255,0.9)), color-stop(100%, rgba(255,255,255,0)));background:-webkit-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-moz-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-ms-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:-o-linear-gradient(left, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%);background:linear-gradient(to right, rgba(255,255,255,0) 0%, rgba(255,255,255,0.9) 25%, rgba(255,255,255,0.9) 75%, rgba(255,255,255,0) 100%)}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{*margin-top:-1px;-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid #111}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:0.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:0.5em}} diff --git a/static/rmarkdown-libs/dt-core/js/jquery.dataTables.min.js b/static/rmarkdown-libs/dt-core/js/jquery.dataTables.min.js deleted file mode 100644 index d297f256..00000000 --- a/static/rmarkdown-libs/dt-core/js/jquery.dataTables.min.js +++ /dev/null @@ -1,180 +0,0 @@ -/*! - Copyright 2008-2019 SpryMedia Ltd. - - This source file is free software, available under the following license: - MIT license - http://datatables.net/license - - This source file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the license files for details. - - For details please refer to: http://www.datatables.net - DataTables 1.10.20 - ©2008-2019 SpryMedia Ltd - datatables.net/license -*/ -var $jscomp=$jscomp||{};$jscomp.scope={};$jscomp.findInternal=function(f,z,y){f instanceof String&&(f=String(f));for(var p=f.length,H=0;H").css({position:"fixed",top:0,left:-1*f(z).scrollLeft(),height:1,width:1, -overflow:"hidden"}).append(f("
    ").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(f("
    ").css({width:"100%",height:10}))).appendTo("body"),d=c.children(),e=d.children();b.barWidth=d[0].offsetWidth-d[0].clientWidth;b.bScrollOversize=100===e[0].offsetWidth&&100!==d[0].clientWidth;b.bScrollbarLeft=1!==Math.round(e.offset().left);b.bBounding=c[0].getBoundingClientRect().width?!0:!1;c.remove()}f.extend(a.oBrowser,q.__browser);a.oScroll.iBarWidth=q.__browser.barWidth} -function mb(a,b,c,d,e,h){var g=!1;if(c!==p){var k=c;g=!0}for(;d!==e;)a.hasOwnProperty(d)&&(k=g?b(k,a[d],d,a):a[d],g=!0,d+=h);return k}function Ia(a,b){var c=q.defaults.column,d=a.aoColumns.length;c=f.extend({},q.models.oColumn,c,{nTh:b?b:y.createElement("th"),sTitle:c.sTitle?c.sTitle:b?b.innerHTML:"",aDataSort:c.aDataSort?c.aDataSort:[d],mData:c.mData?c.mData:d,idx:d});a.aoColumns.push(c);c=a.aoPreSearchCols;c[d]=f.extend({},q.models.oSearch,c[d]);ma(a,d,f(b).data())}function ma(a,b,c){b=a.aoColumns[b]; -var d=a.oClasses,e=f(b.nTh);if(!b.sWidthOrig){b.sWidthOrig=e.attr("width")||null;var h=(e.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/);h&&(b.sWidthOrig=h[1])}c!==p&&null!==c&&(kb(c),L(q.defaults.column,c,!0),c.mDataProp===p||c.mData||(c.mData=c.mDataProp),c.sType&&(b._sManualType=c.sType),c.className&&!c.sClass&&(c.sClass=c.className),c.sClass&&e.addClass(c.sClass),f.extend(b,c),M(b,c,"sWidth","sWidthOrig"),c.iDataSort!==p&&(b.aDataSort=[c.iDataSort]),M(b,c,"aDataSort"));var g=b.mData,k=U(g), -l=b.mRender?U(b.mRender):null;c=function(a){return"string"===typeof a&&-1!==a.indexOf("@")};b._bAttrSrc=f.isPlainObject(g)&&(c(g.sort)||c(g.type)||c(g.filter));b._setter=null;b.fnGetData=function(a,b,c){var d=k(a,b,p,c);return l&&b?l(d,b,a,c):d};b.fnSetData=function(a,b,c){return Q(g)(a,b,c)};"number"!==typeof g&&(a._rowReadObject=!0);a.oFeatures.bSort||(b.bSortable=!1,e.addClass(d.sSortableNone));a=-1!==f.inArray("asc",b.asSorting);c=-1!==f.inArray("desc",b.asSorting);b.bSortable&&(a||c)?a&&!c?(b.sSortingClass= -d.sSortableAsc,b.sSortingClassJUI=d.sSortJUIAscAllowed):!a&&c?(b.sSortingClass=d.sSortableDesc,b.sSortingClassJUI=d.sSortJUIDescAllowed):(b.sSortingClass=d.sSortable,b.sSortingClassJUI=d.sSortJUI):(b.sSortingClass=d.sSortableNone,b.sSortingClassJUI="")}function aa(a){if(!1!==a.oFeatures.bAutoWidth){var b=a.aoColumns;Ja(a);for(var c=0,d=b.length;cn[m])d(k.length+ -n[m],l);else if("string"===typeof n[m]){var w=0;for(g=k.length;wb&&a[e]--; -1!=d&&c===p&&a.splice(d,1)}function ea(a,b,c,d){var e=a.aoData[b],h,g=function(c,d){for(;c.childNodes.length;)c.removeChild(c.firstChild);c.innerHTML=I(a,b,d,"display")};if("dom"!==c&&(c&&"auto"!==c||"dom"!==e.src)){var k=e.anCells;if(k)if(d!==p)g(k[d],d);else for(c=0,h=k.length;c").appendTo(d));var l=0;for(b=k.length;ltr").attr("role","row");f(d).find(">tr>th, >tr>td").addClass(g.sHeaderTH);f(e).find(">tr>th, >tr>td").addClass(g.sFooterTH);if(null!==e)for(a=a.aoFooter[0],l=0,b=a.length;l=a.fnRecordsDisplay()?0:g,a.iInitDisplayStart=-1);g=a._iDisplayStart;var n=a.fnDisplayEnd();if(a.bDeferLoading)a.bDeferLoading=!1,a.iDraw++,K(a,!1);else if(!k)a.iDraw++;else if(!a.bDestroying&&!qb(a))return;if(0!==l.length)for(h=k?a.aoData.length:n,k=k?0:g;k",{"class":e?d[0]:""}).append(f("",{valign:"top",colSpan:W(a),"class":a.oClasses.sRowEmpty}).html(c))[0];A(a,"aoHeaderCallback","header",[f(a.nTHead).children("tr")[0], -Oa(a),g,n,l]);A(a,"aoFooterCallback","footer",[f(a.nTFoot).children("tr")[0],Oa(a),g,n,l]);d=f(a.nTBody);d.children().detach();d.append(f(b));A(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function V(a,b){var c=a.oFeatures,d=c.bFilter;c.bSort&&rb(a);d?ia(a,a.oPreviousSearch):a.aiDisplay=a.aiDisplayMaster.slice();!0!==b&&(a._iDisplayStart=0);a._drawHold=b;S(a);a._drawHold=!1}function sb(a){var b=a.oClasses,c=f(a.nTable);c=f("
    ").insertBefore(c);var d=a.oFeatures,e= -f("
    ",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=e[0];a.nTableReinsertBefore=a.nTable.nextSibling;for(var h=a.sDom.split(""),g,k,l,n,m,p,u=0;u")[0];n=h[u+1];if("'"==n||'"'==n){m="";for(p=2;h[u+p]!=n;)m+=h[u+p],p++;"H"==m?m=b.sJUIHeader:"F"==m&&(m=b.sJUIFooter);-1!=m.indexOf(".")?(n=m.split("."),l.id=n[0].substr(1,n[0].length-1),l.className=n[1]):"#"==m.charAt(0)?l.id=m.substr(1, -m.length-1):l.className=m;u+=p}e.append(l);e=f(l)}else if(">"==k)e=e.parent();else if("l"==k&&d.bPaginate&&d.bLengthChange)g=tb(a);else if("f"==k&&d.bFilter)g=ub(a);else if("r"==k&&d.bProcessing)g=vb(a);else if("t"==k)g=wb(a);else if("i"==k&&d.bInfo)g=xb(a);else if("p"==k&&d.bPaginate)g=yb(a);else if(0!==q.ext.feature.length)for(l=q.ext.feature,p=0,n=l.length;p',k=d.sSearch;k=k.match(/_INPUT_/)?k.replace("_INPUT_",g):k+g;b=f("
    ",{id:h.f?null:c+"_filter","class":b.sFilter}).append(f("
    ").addClass(b.sLength);a.aanFeatures.l||(l[0].id=c+"_length");l.children().append(a.oLanguage.sLengthMenu.replace("_MENU_", -e[0].outerHTML));f("select",l).val(a._iDisplayLength).on("change.DT",function(b){Va(a,f(this).val());S(a)});f(a.nTable).on("length.dt.DT",function(b,c,d){a===c&&f("select",l).val(d)});return l[0]}function yb(a){var b=a.sPaginationType,c=q.ext.pager[b],d="function"===typeof c,e=function(a){S(a)};b=f("
    ").addClass(a.oClasses.sPaging+b)[0];var h=a.aanFeatures;d||c.fnInit(a,b,e);h.p||(b.id=a.sTableId+"_paginate",a.aoDrawCallback.push({fn:function(a){if(d){var b=a._iDisplayStart,g=a._iDisplayLength, -f=a.fnRecordsDisplay(),m=-1===g;b=m?0:Math.ceil(b/g);g=m?1:Math.ceil(f/g);f=c(b,g);var p;m=0;for(p=h.p.length;mh&&(d=0)):"first"==b?d=0:"previous"==b?(d=0<=e?d-e:0,0>d&&(d=0)):"next"==b?d+e",{id:a.aanFeatures.r?null:a.sTableId+"_processing","class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).insertBefore(a.nTable)[0]}function K(a,b){a.oFeatures.bProcessing&&f(a.aanFeatures.r).css("display",b?"block":"none");A(a,null,"processing",[a,b])}function wb(a){var b=f(a.nTable);b.attr("role","grid");var c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var d=c.sX,e=c.sY, -h=a.oClasses,g=b.children("caption"),k=g.length?g[0]._captionSide:null,l=f(b[0].cloneNode(!1)),n=f(b[0].cloneNode(!1)),m=b.children("tfoot");m.length||(m=null);l=f("
    ",{"class":h.sScrollWrapper}).append(f("
    ",{"class":h.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:d?d?B(d):null:"100%"}).append(f("
    ",{"class":h.sScrollHeadInner}).css({"box-sizing":"content-box",width:c.sXInner||"100%"}).append(l.removeAttr("id").css("margin-left",0).append("top"===k?g:null).append(b.children("thead"))))).append(f("
    ", -{"class":h.sScrollBody}).css({position:"relative",overflow:"auto",width:d?B(d):null}).append(b));m&&l.append(f("
    ",{"class":h.sScrollFoot}).css({overflow:"hidden",border:0,width:d?d?B(d):null:"100%"}).append(f("
    ",{"class":h.sScrollFootInner}).append(n.removeAttr("id").css("margin-left",0).append("bottom"===k?g:null).append(b.children("tfoot")))));b=l.children();var p=b[0];h=b[1];var u=m?b[2]:null;if(d)f(h).on("scroll.DT",function(a){a=this.scrollLeft;p.scrollLeft=a;m&&(u.scrollLeft=a)}); -f(h).css(e&&c.bCollapse?"max-height":"height",e);a.nScrollHead=p;a.nScrollBody=h;a.nScrollFoot=u;a.aoDrawCallback.push({fn:na,sName:"scrolling"});return l[0]}function na(a){var b=a.oScroll,c=b.sX,d=b.sXInner,e=b.sY;b=b.iBarWidth;var h=f(a.nScrollHead),g=h[0].style,k=h.children("div"),l=k[0].style,n=k.children("table");k=a.nScrollBody;var m=f(k),w=k.style,u=f(a.nScrollFoot).children("div"),q=u.children("table"),t=f(a.nTHead),r=f(a.nTable),v=r[0],za=v.style,T=a.nTFoot?f(a.nTFoot):null,A=a.oBrowser, -x=A.bScrollOversize,ac=J(a.aoColumns,"nTh"),Ya=[],y=[],z=[],C=[],G,H=function(a){a=a.style;a.paddingTop="0";a.paddingBottom="0";a.borderTopWidth="0";a.borderBottomWidth="0";a.height=0};var D=k.scrollHeight>k.clientHeight;if(a.scrollBarVis!==D&&a.scrollBarVis!==p)a.scrollBarVis=D,aa(a);else{a.scrollBarVis=D;r.children("thead, tfoot").remove();if(T){var E=T.clone().prependTo(r);var F=T.find("tr");E=E.find("tr")}var I=t.clone().prependTo(r);t=t.find("tr");D=I.find("tr");I.find("th, td").removeAttr("tabindex"); -c||(w.width="100%",h[0].style.width="100%");f.each(ua(a,I),function(b,c){G=ba(a,b);c.style.width=a.aoColumns[G].sWidth});T&&N(function(a){a.style.width=""},E);h=r.outerWidth();""===c?(za.width="100%",x&&(r.find("tbody").height()>k.offsetHeight||"scroll"==m.css("overflow-y"))&&(za.width=B(r.outerWidth()-b)),h=r.outerWidth()):""!==d&&(za.width=B(d),h=r.outerWidth());N(H,D);N(function(a){z.push(a.innerHTML);Ya.push(B(f(a).css("width")))},D);N(function(a,b){-1!==f.inArray(a,ac)&&(a.style.width=Ya[b])}, -t);f(D).height(0);T&&(N(H,E),N(function(a){C.push(a.innerHTML);y.push(B(f(a).css("width")))},E),N(function(a,b){a.style.width=y[b]},F),f(E).height(0));N(function(a,b){a.innerHTML='
    '+z[b]+"
    ";a.childNodes[0].style.height="0";a.childNodes[0].style.overflow="hidden";a.style.width=Ya[b]},D);T&&N(function(a,b){a.innerHTML='
    '+C[b]+"
    ";a.childNodes[0].style.height="0";a.childNodes[0].style.overflow="hidden";a.style.width=y[b]},E);r.outerWidth()< -h?(F=k.scrollHeight>k.offsetHeight||"scroll"==m.css("overflow-y")?h+b:h,x&&(k.scrollHeight>k.offsetHeight||"scroll"==m.css("overflow-y"))&&(za.width=B(F-b)),""!==c&&""===d||O(a,1,"Possible column misalignment",6)):F="100%";w.width=B(F);g.width=B(F);T&&(a.nScrollFoot.style.width=B(F));!e&&x&&(w.height=B(v.offsetHeight+b));c=r.outerWidth();n[0].style.width=B(c);l.width=B(c);d=r.height()>k.clientHeight||"scroll"==m.css("overflow-y");e="padding"+(A.bScrollbarLeft?"Left":"Right");l[e]=d?b+"px":"0px";T&& -(q[0].style.width=B(c),u[0].style.width=B(c),u[0].style[e]=d?b+"px":"0px");r.children("colgroup").insertBefore(r.children("thead"));m.trigger("scroll");!a.bSorted&&!a.bFiltered||a._drawHold||(k.scrollTop=0)}}function N(a,b,c){for(var d=0,e=0,h=b.length,g,k;e").appendTo(k.find("tbody"));k.find("thead, tfoot").remove(); -k.append(f(a.nTHead).clone()).append(f(a.nTFoot).clone());k.find("tfoot th, tfoot td").css("width","");n=ua(a,k.find("thead")[0]);for(q=0;q").css({width:r.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(a.aoData.length)for(q=0;q").css(h|| -e?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(k).appendTo(p);h&&g?k.width(g):h?(k.css("width","auto"),k.removeAttr("width"),k.width()").css("width",B(a)).appendTo(b||y.body);b=a[0].offsetWidth;a.remove();return b}function Kb(a,b){var c=Lb(a,b);if(0>c)return null;var d=a.aoData[c];return d.nTr?d.anCells[b]:f("").html(I(a,c,b,"display"))[0]}function Lb(a,b){for(var c,d=-1,e=-1,h=0,g=a.aoData.length;hd&&(d=c.length,e=h);return e} -function B(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function Y(a){var b=[],c=a.aoColumns;var d=a.aaSortingFixed;var e=f.isPlainObject(d);var h=[];var g=function(a){a.length&&!f.isArray(a[0])?h.push(a):f.merge(h,a)};f.isArray(d)&&g(d);e&&d.pre&&g(d.pre);g(a.aaSorting);e&&d.post&&g(d.post);for(a=0;an?1:0; -if(0!==m)return"asc"===l.dir?m:-m}m=c[a];n=c[b];return mn?1:0}):g.sort(function(a,b){var h,g=k.length,f=e[a]._aSortData,l=e[b]._aSortData;for(h=0;hp?1:0})}a.bSorted=!0}function Nb(a){var b=a.aoColumns,c=Y(a);a=a.oLanguage.oAria;for(var d=0,e=b.length;d/g,"");var f=h.nTh;f.removeAttribute("aria-sort"); -h.bSortable&&(0e?e+1:3))}e=0;for(h=d.length;ee?e+1:3))}a.aLastSort=d}function Mb(a,b){var c=a.aoColumns[b],d=q.ext.order[c.sSortDataType],e;d&&(e=d.call(a.oInstance,a,b,ca(a,b)));for(var h,g=q.ext.type.order[c.sType+"-pre"],k=0,f=a.aoData.length;k=h.length?[0,c[1]]:c)}));b.search!==p&&f.extend(a.oPreviousSearch, -Gb(b.search));if(b.columns)for(d=0,e=b.columns.length;d=c&&(b=c-d);b-=b%d;if(-1===d||0>b)b=0;a._iDisplayStart=b}function Ra(a,b){a=a.renderer;var c=q.ext.renderer[b];return f.isPlainObject(a)&&a[b]?c[a[b]]||c._:"string"===typeof a?c[a]||c._:c._}function D(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function ka(a,b){var c=Pb.numbers_length,d=Math.floor(c/2);b<=c?a=Z(0,b):a<=d?(a=Z(0,c-2),a.push("ellipsis"),a.push(b-1)):(a>=b-1-d?a=Z(b-(c-2),b):(a=Z(a-d+2,a+d-1),a.push("ellipsis"), -a.push(b-1)),a.splice(0,0,"ellipsis"),a.splice(0,0,0));a.DT_el="span";return a}function Ha(a){f.each({num:function(b){return Da(b,a)},"num-fmt":function(b){return Da(b,a,bb)},"html-num":function(b){return Da(b,a,Ea)},"html-num-fmt":function(b){return Da(b,a,Ea,bb)}},function(b,c){C.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(C.type.search[b+a]=C.type.search.html)})}function Qb(a){return function(){var b=[Ca(this[q.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return q.ext.internal[a].apply(this, -b)}}var q=function(a){this.$=function(a,b){return this.api(!0).$(a,b)};this._=function(a,b){return this.api(!0).rows(a,b).data()};this.api=function(a){return a?new v(Ca(this[C.iApiIndex])):new v(this)};this.fnAddData=function(a,b){var c=this.api(!0);a=f.isArray(a)&&(f.isArray(a[0])||f.isPlainObject(a[0]))?c.rows.add(a):c.row.add(a);(b===p||b)&&c.draw();return a.flatten().toArray()};this.fnAdjustColumnSizing=function(a){var b=this.api(!0).columns.adjust(),c=b.settings()[0],d=c.oScroll;a===p||a?b.draw(!1): -(""!==d.sX||""!==d.sY)&&na(c)};this.fnClearTable=function(a){var b=this.api(!0).clear();(a===p||a)&&b.draw()};this.fnClose=function(a){this.api(!0).row(a).child.hide()};this.fnDeleteRow=function(a,b,c){var d=this.api(!0);a=d.rows(a);var e=a.settings()[0],h=e.aoData[a[0][0]];a.remove();b&&b.call(this,e,h);(c===p||c)&&d.draw();return h};this.fnDestroy=function(a){this.api(!0).destroy(a)};this.fnDraw=function(a){this.api(!0).draw(a)};this.fnFilter=function(a,b,c,d,e,f){e=this.api(!0);null===b||b===p? -e.search(a,c,d,f):e.column(b).search(a,c,d,f);e.draw()};this.fnGetData=function(a,b){var c=this.api(!0);if(a!==p){var d=a.nodeName?a.nodeName.toLowerCase():"";return b!==p||"td"==d||"th"==d?c.cell(a,b).data():c.row(a).data()||null}return c.data().toArray()};this.fnGetNodes=function(a){var b=this.api(!0);return a!==p?b.row(a).node():b.rows().nodes().flatten().toArray()};this.fnGetPosition=function(a){var b=this.api(!0),c=a.nodeName.toUpperCase();return"TR"==c?b.row(a).index():"TD"==c||"TH"==c?(a=b.cell(a).index(), -[a.row,a.columnVisible,a.column]):null};this.fnIsOpen=function(a){return this.api(!0).row(a).child.isShown()};this.fnOpen=function(a,b,c){return this.api(!0).row(a).child(b,c).show().child()[0]};this.fnPageChange=function(a,b){a=this.api(!0).page(a);(b===p||b)&&a.draw(!1)};this.fnSetColumnVis=function(a,b,c){a=this.api(!0).column(a).visible(b);(c===p||c)&&a.columns.adjust().draw()};this.fnSettings=function(){return Ca(this[C.iApiIndex])};this.fnSort=function(a){this.api(!0).order(a).draw()};this.fnSortListener= -function(a,b,c){this.api(!0).order.listener(a,b,c)};this.fnUpdate=function(a,b,c,d,e){var h=this.api(!0);c===p||null===c?h.row(b).data(a):h.cell(b,c).data(a);(e===p||e)&&h.columns.adjust();(d===p||d)&&h.draw();return 0};this.fnVersionCheck=C.fnVersionCheck;var b=this,c=a===p,d=this.length;c&&(a={});this.oApi=this.internal=C.internal;for(var e in q.ext.internal)e&&(this[e]=Qb(e));this.each(function(){var e={},g=1").appendTo(w));r.nTHead=b[0];b=w.children("tbody");0===b.length&&(b=f("").appendTo(w));r.nTBody=b[0];b=w.children("tfoot");0===b.length&&0").appendTo(w));0===b.length||0===b.children().length?w.addClass(x.sNoFooter):0/g,cc=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,dc=/(\/|\.|\*|\+|\?|\||\(|\)|\[|\]|\{|\}|\\|\$|\^|\-)/g,bb=/[',$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,P=function(a){return a&&!0!==a&&"-"!==a?!1: -!0},Sb=function(a){var b=parseInt(a,10);return!isNaN(b)&&isFinite(a)?b:null},Tb=function(a,b){cb[b]||(cb[b]=new RegExp(Ua(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(cb[b],"."):a},db=function(a,b,c){var d="string"===typeof a;if(P(a))return!0;b&&d&&(a=Tb(a,b));c&&d&&(a=a.replace(bb,""));return!isNaN(parseFloat(a))&&isFinite(a)},Ub=function(a,b,c){return P(a)?!0:P(a)||"string"===typeof a?db(a.replace(Ea,""),b,c)?!0:null:null},J=function(a,b,c){var d=[],e=0,h=a.length;if(c!== -p)for(;ea.length)){var b=a.slice().sort();for(var c=b[0],d=1, -e=b.length;d")[0],$b=ya.textContent!==p,bc=/<.*?>/g,Sa=q.util.throttle,Wb=[],G=Array.prototype,ec=function(a){var b,c=q.settings,d=f.map(c,function(a,b){return a.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase()){var e=f.inArray(a,d);return-1!==e?[c[e]]:null}if(a&&"function"===typeof a.settings)return a.settings().toArray();"string"===typeof a?b=f(a):a instanceof f&&(b=a)}else return[];if(b)return b.map(function(a){e=f.inArray(this, -d);return-1!==e?c[e]:null}).toArray()};var v=function(a,b){if(!(this instanceof v))return new v(a,b);var c=[],d=function(a){(a=ec(a))&&c.push.apply(c,a)};if(f.isArray(a))for(var e=0,h=a.length;ea?new v(b[a],this[a]):null},filter:function(a){var b=[];if(G.filter)b=G.filter.call(this,a,this);else for(var c=0,d=this.length;c").addClass(c),f("td",d).addClass(c).html(b)[0].colSpan=W(a),e.push(d[0]))};h(c,d);b._details&&b._details.detach();b._details=f(e);b._detailsShow&&b._details.insertAfter(b.nTr)},hb=function(a,b){var c=a.context;c.length&&(a=c[0].aoData[b!==p?b:a[0]])&&a._details&&(a._details.remove(),a._detailsShow=p,a._details=p)},Yb=function(a,b){var c=a.context;c.length&&a.length&&(a=c[0].aoData[a[0]],a._details&&((a._detailsShow=b)?a._details.insertAfter(a.nTr): -a._details.detach(),ic(c[0])))},ic=function(a){var b=new v(a),c=a.aoData;b.off("draw.dt.DT_details column-visibility.dt.DT_details destroy.dt.DT_details");0g){var m=f.map(d,function(a,b){return a.bVisible?b:null});return[m[m.length+g]]}return[ba(a,g)];case "name":return f.map(e,function(a,b){return a===n[1]?b:null});default:return[]}if(b.nodeName&&b._DT_CellIndex)return[b._DT_CellIndex.column];g=f(h).filter(b).map(function(){return f.inArray(this, -h)}).toArray();if(g.length||!b.nodeName)return g;g=f(b).closest("*[data-dt-column]");return g.length?[g.data("dt-column")]:[]},a,c)};t("columns()",function(a,b){a===p?a="":f.isPlainObject(a)&&(b=a,a="");b=fb(b);var c=this.iterator("table",function(c){return kc(c,a,b)},1);c.selector.cols=a;c.selector.opts=b;return c});x("columns().header()","column().header()",function(a,b){return this.iterator("column",function(a,b){return a.aoColumns[b].nTh},1)});x("columns().footer()","column().footer()",function(a, -b){return this.iterator("column",function(a,b){return a.aoColumns[b].nTf},1)});x("columns().data()","column().data()",function(){return this.iterator("column-rows",Zb,1)});x("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].mData},1)});x("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,d,e,f){return la(b.aoData,f,"search"===a?"_aFilterData":"_aSortData",c)},1)});x("columns().nodes()", -"column().nodes()",function(){return this.iterator("column-rows",function(a,b,c,d,e){return la(a.aoData,e,"anCells",b)},1)});x("columns().visible()","column().visible()",function(a,b){var c=this,d=this.iterator("column",function(b,c){if(a===p)return b.aoColumns[c].bVisible;var d=b.aoColumns,e=d[c],h=b.aoData,n;if(a!==p&&e.bVisible!==a){if(a){var m=f.inArray(!0,J(d,"bVisible"),c+1);d=0;for(n=h.length;dd;return!0};q.isDataTable=q.fnIsDataTable=function(a){var b=f(a).get(0),c=!1;if(a instanceof -q.Api)return!0;f.each(q.settings,function(a,e){a=e.nScrollHead?f("table",e.nScrollHead)[0]:null;var d=e.nScrollFoot?f("table",e.nScrollFoot)[0]:null;if(e.nTable===b||a===b||d===b)c=!0});return c};q.tables=q.fnTables=function(a){var b=!1;f.isPlainObject(a)&&(b=a.api,a=a.visible);var c=f.map(q.settings,function(b){if(!a||a&&f(b.nTable).is(":visible"))return b.nTable});return b?new v(c):c};q.camelToHungarian=L;t("$()",function(a,b){b=this.rows(b).nodes();b=f(b);return f([].concat(b.filter(a).toArray(), -b.find(a).toArray()))});f.each(["on","one","off"],function(a,b){t(b+"()",function(){var a=Array.prototype.slice.call(arguments);a[0]=f.map(a[0].split(/\s/),function(a){return a.match(/\.dt\b/)?a:a+".dt"}).join(" ");var d=f(this.tables().nodes());d[b].apply(d,a);return this})});t("clear()",function(){return this.iterator("table",function(a){qa(a)})});t("settings()",function(){return new v(this.context,this.context)});t("init()",function(){var a=this.context;return a.length?a[0].oInit:null});t("data()", -function(){return this.iterator("table",function(a){return J(a.aoData,"_aData")}).flatten()});t("destroy()",function(a){a=a||!1;return this.iterator("table",function(b){var c=b.nTableWrapper.parentNode,d=b.oClasses,e=b.nTable,h=b.nTBody,g=b.nTHead,k=b.nTFoot,l=f(e);h=f(h);var n=f(b.nTableWrapper),m=f.map(b.aoData,function(a){return a.nTr}),p;b.bDestroying=!0;A(b,"aoDestroyCallback","destroy",[b]);a||(new v(b)).columns().visible(!0);n.off(".DT").find(":not(tbody *)").off(".DT");f(z).off(".DT-"+b.sInstance); -e!=g.parentNode&&(l.children("thead").detach(),l.append(g));k&&e!=k.parentNode&&(l.children("tfoot").detach(),l.append(k));b.aaSorting=[];b.aaSortingFixed=[];Aa(b);f(m).removeClass(b.asStripeClasses.join(" "));f("th, td",g).removeClass(d.sSortable+" "+d.sSortableAsc+" "+d.sSortableDesc+" "+d.sSortableNone);h.children().detach();h.append(m);g=a?"remove":"detach";l[g]();n[g]();!a&&c&&(c.insertBefore(e,b.nTableReinsertBefore),l.css("width",b.sDestroyWidth).removeClass(d.sTable),(p=b.asDestroyStripes.length)&& -h.children().each(function(a){f(this).addClass(b.asDestroyStripes[a%p])}));c=f.inArray(b,q.settings);-1!==c&&q.settings.splice(c,1)})});f.each(["column","row","cell"],function(a,b){t(b+"s().every()",function(a){var c=this.selector.opts,e=this;return this.iterator(b,function(d,f,k,l,n){a.call(e[b](f,"cell"===b?k:c,"cell"===b?c:p),f,k,l,n)})})});t("i18n()",function(a,b,c){var d=this.context[0];a=U(a)(d.oLanguage);a===p&&(a=b);c!==p&&f.isPlainObject(a)&&(a=a[c]!==p?a[c]:a._);return a.replace("%d",c)}); -q.version="1.10.20";q.settings=[];q.models={};q.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0};q.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1};q.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,sClass:null,sContentPadding:null, -sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null};q.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,bSort:!0,bSortMulti:!0,bSortCellsTop:!1, -bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+a.sInstance+"_"+location.pathname))}catch(b){}}, -fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},oPaginate:{sFirst:"First",sLast:"Last", -sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"Processing...",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:f.extend({},q.models.oSearch),sAjaxDataProp:"data", -sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"};H(q.defaults);q.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};H(q.defaults.column);q.models.oSettings= -{oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],aoFooter:[],oPreviousSearch:{}, -aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",iStateDuration:0, -aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,bAjaxDataGet:!0,jqXHR:null,json:p,oAjaxData:p,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==D(this)?1*this._iRecordsTotal: -this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==D(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+a,d=this.aiDisplay.length,e=this.oFeatures,f=e.bPaginate;return e.bServerSide?!1===f||-1===a?b+d:Math.min(b+a,this._iRecordsDisplay):!f||c>d||-1===a?d:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null};q.ext=C={buttons:{}, -classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:q.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:q.version};f.extend(C,{afnFiltering:C.search,aTypes:C.type.detect,ofnSearch:C.type.search,oSort:C.type.order,afnSortData:C.order,aoFeatures:C.feature,oApi:C.internal,oStdClasses:C.classes,oPagination:C.pager}); -f.extend(q.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_asc_disabled", -sSortableDesc:"sorting_desc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"", -sJUIHeader:"",sJUIFooter:""});var Pb=q.ext.pager;f.extend(Pb,{simple:function(a,b){return["previous","next"]},full:function(a,b){return["first","previous","next","last"]},numbers:function(a,b){return[ka(a,b)]},simple_numbers:function(a,b){return["previous",ka(a,b),"next"]},full_numbers:function(a,b){return["first","previous",ka(a,b),"next","last"]},first_last_numbers:function(a,b){return["first",ka(a,b),"last"]},_numbers:ka,numbers_length:7});f.extend(!0,q.ext.renderer,{pageButton:{_:function(a,b, -c,d,e,h){var g=a.oClasses,k=a.oLanguage.oPaginate,l=a.oLanguage.oAria.paginate||{},n,m,q=0,t=function(b,d){var p,r=g.sPageButtonDisabled,u=function(b){Xa(a,b.data.action,!0)};var w=0;for(p=d.length;w").appendTo(b);t(x,v)}else{n=null;m=v;x=a.iTabIndex;switch(v){case "ellipsis":b.append('');break;case "first":n=k.sFirst;0===e&&(x=-1,m+=" "+r);break;case "previous":n=k.sPrevious;0===e&&(x=-1,m+= -" "+r);break;case "next":n=k.sNext;e===h-1&&(x=-1,m+=" "+r);break;case "last":n=k.sLast;e===h-1&&(x=-1,m+=" "+r);break;default:n=v+1,m=e===v?g.sPageButtonActive:""}null!==n&&(x=f("",{"class":g.sPageButton+" "+m,"aria-controls":a.sTableId,"aria-label":l[v],"data-dt-idx":q,tabindex:x,id:0===c&&"string"===typeof v?a.sTableId+"_"+v:null}).html(n).appendTo(b),$a(x,{action:v},u),q++)}}};try{var v=f(b).find(y.activeElement).data("dt-idx")}catch(mc){}t(f(b).empty(),d);v!==p&&f(b).find("[data-dt-idx="+ -v+"]").focus()}}});f.extend(q.ext.type.detect,[function(a,b){b=b.oLanguage.sDecimal;return db(a,b)?"num"+b:null},function(a,b){if(a&&!(a instanceof Date)&&!cc.test(a))return null;b=Date.parse(a);return null!==b&&!isNaN(b)||P(a)?"date":null},function(a,b){b=b.oLanguage.sDecimal;return db(a,b,!0)?"num-fmt"+b:null},function(a,b){b=b.oLanguage.sDecimal;return Ub(a,b)?"html-num"+b:null},function(a,b){b=b.oLanguage.sDecimal;return Ub(a,b,!0)?"html-num-fmt"+b:null},function(a,b){return P(a)||"string"=== -typeof a&&-1!==a.indexOf("<")?"html":null}]);f.extend(q.ext.type.search,{html:function(a){return P(a)?a:"string"===typeof a?a.replace(Rb," ").replace(Ea,""):""},string:function(a){return P(a)?a:"string"===typeof a?a.replace(Rb," "):a}});var Da=function(a,b,c,d){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=Tb(a,b));a.replace&&(c&&(a=a.replace(c,"")),d&&(a=a.replace(d,"")));return 1*a};f.extend(C.type.order,{"date-pre":function(a){a=Date.parse(a);return isNaN(a)?-Infinity:a},"html-pre":function(a){return P(a)? -"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return P(a)?"":"string"===typeof a?a.toLowerCase():a.toString?a.toString():""},"string-asc":function(a,b){return ab?1:0},"string-desc":function(a,b){return ab?-1:0}});Ha("");f.extend(!0,q.ext.renderer,{header:{_:function(a,b,c,d){f(a.nTable).on("order.dt.DT",function(e,f,g,k){a===f&&(e=c.idx,b.removeClass(c.sSortingClass+" "+d.sSortAsc+" "+d.sSortDesc).addClass("asc"==k[e]?d.sSortAsc:"desc"==k[e]?d.sSortDesc: -c.sSortingClass))})},jqueryui:function(a,b,c,d){f("
    ").addClass(d.sSortJUIWrapper).append(b.contents()).append(f("").addClass(d.sSortIcon+" "+c.sSortingClassJUI)).appendTo(b);f(a.nTable).on("order.dt.DT",function(e,f,g,k){a===f&&(e=c.idx,b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass("asc"==k[e]?d.sSortAsc:"desc"==k[e]?d.sSortDesc:c.sSortingClass),b.find("span."+d.sSortIcon).removeClass(d.sSortJUIAsc+" "+d.sSortJUIDesc+" "+d.sSortJUI+" "+d.sSortJUIAscAllowed+" "+d.sSortJUIDescAllowed).addClass("asc"== -k[e]?d.sSortJUIAsc:"desc"==k[e]?d.sSortJUIDesc:c.sSortingClassJUI))})}}});var ib=function(a){return"string"===typeof a?a.replace(//g,">").replace(/"/g,"""):a};q.render={number:function(a,b,c,d,e){return{display:function(f){if("number"!==typeof f&&"string"!==typeof f)return f;var g=0>f?"-":"",h=parseFloat(f);if(isNaN(h))return ib(f);h=h.toFixed(c);f=Math.abs(h);h=parseInt(f,10);f=c?b+(f-h).toFixed(c).substring(2):"";return g+(d||"")+h.toString().replace(/\B(?=(\d{3})+(?!\d))/g, -a)+f+(e||"")}}},text:function(){return{display:ib,filter:ib}}};f.extend(q.ext.internal,{_fnExternApiFunc:Qb,_fnBuildAjax:va,_fnAjaxUpdate:qb,_fnAjaxParameters:zb,_fnAjaxUpdateDraw:Ab,_fnAjaxDataSrc:wa,_fnAddColumn:Ia,_fnColumnOptions:ma,_fnAdjustColumnSizing:aa,_fnVisibleToColumnIndex:ba,_fnColumnIndexToVisible:ca,_fnVisbleColumns:W,_fnGetColumns:oa,_fnColumnTypes:Ka,_fnApplyColumnDefs:nb,_fnHungarianMap:H,_fnCamelToHungarian:L,_fnLanguageCompat:Ga,_fnBrowserDetect:lb,_fnAddData:R,_fnAddTr:pa,_fnNodeToDataIndex:function(a, -b){return b._DT_RowIndex!==p?b._DT_RowIndex:null},_fnNodeToColumnIndex:function(a,b,c){return f.inArray(c,a.aoData[b].anCells)},_fnGetCellData:I,_fnSetCellData:ob,_fnSplitObjNotation:Na,_fnGetObjectDataFn:U,_fnSetObjectDataFn:Q,_fnGetDataMaster:Oa,_fnClearTable:qa,_fnDeleteIndex:ra,_fnInvalidate:ea,_fnGetRowElements:Ma,_fnCreateTr:La,_fnBuildHead:pb,_fnDrawHead:ha,_fnDraw:S,_fnReDraw:V,_fnAddOptionsHtml:sb,_fnDetectHeader:fa,_fnGetUniqueThs:ua,_fnFeatureHtmlFilter:ub,_fnFilterComplete:ia,_fnFilterCustom:Db, -_fnFilterColumn:Cb,_fnFilter:Bb,_fnFilterCreateSearch:Ta,_fnEscapeRegex:Ua,_fnFilterData:Eb,_fnFeatureHtmlInfo:xb,_fnUpdateInfo:Hb,_fnInfoMacros:Ib,_fnInitialise:ja,_fnInitComplete:xa,_fnLengthChange:Va,_fnFeatureHtmlLength:tb,_fnFeatureHtmlPaginate:yb,_fnPageChange:Xa,_fnFeatureHtmlProcessing:vb,_fnProcessingDisplay:K,_fnFeatureHtmlTable:wb,_fnScrollDraw:na,_fnApplyToChildren:N,_fnCalculateColumnWidths:Ja,_fnThrottle:Sa,_fnConvertToWidth:Jb,_fnGetWidestNode:Kb,_fnGetMaxLenString:Lb,_fnStringToCss:B, -_fnSortFlatten:Y,_fnSort:rb,_fnSortAria:Nb,_fnSortListener:Za,_fnSortAttachListener:Qa,_fnSortingClasses:Aa,_fnSortData:Mb,_fnSaveState:Ba,_fnLoadState:Ob,_fnSettingsFromNode:Ca,_fnLog:O,_fnMap:M,_fnBindAction:$a,_fnCallbackReg:E,_fnCallbackFire:A,_fnLengthOverflow:Wa,_fnRenderer:Ra,_fnDataSource:D,_fnRowAttributes:Pa,_fnExtend:ab,_fnCalculateEnd:function(){}});f.fn.dataTable=q;q.$=f;f.fn.dataTableSettings=q.settings;f.fn.dataTableExt=q.ext;f.fn.DataTable=function(a){return f(this).dataTable(a).api()}; -f.each(q,function(a,b){f.fn.DataTable[a]=b});return f.fn.dataTable}); diff --git a/static/rmarkdown-libs/header-attrs/header-attrs.js b/static/rmarkdown-libs/header-attrs/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/static/rmarkdown-libs/header-attrs/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/static/rmarkdown-libs/htmlwidgets/htmlwidgets.js b/static/rmarkdown-libs/htmlwidgets/htmlwidgets.js deleted file mode 100644 index 6f3d672d..00000000 --- a/static/rmarkdown-libs/htmlwidgets/htmlwidgets.js +++ /dev/null @@ -1,903 +0,0 @@ -(function() { - // If window.HTMLWidgets is already defined, then use it; otherwise create a - // new object. This allows preceding code to set options that affect the - // initialization process (though none currently exist). - window.HTMLWidgets = window.HTMLWidgets || {}; - - // See if we're running in a viewer pane. If not, we're in a web browser. - var viewerMode = window.HTMLWidgets.viewerMode = - /\bviewer_pane=1\b/.test(window.location); - - // See if we're running in Shiny mode. If not, it's a static document. - // Note that static widgets can appear in both Shiny and static modes, but - // obviously, Shiny widgets can only appear in Shiny apps/documents. - var shinyMode = window.HTMLWidgets.shinyMode = - typeof(window.Shiny) !== "undefined" && !!window.Shiny.outputBindings; - - // We can't count on jQuery being available, so we implement our own - // version if necessary. - function querySelectorAll(scope, selector) { - if (typeof(jQuery) !== "undefined" && scope instanceof jQuery) { - return scope.find(selector); - } - if (scope.querySelectorAll) { - return scope.querySelectorAll(selector); - } - } - - function asArray(value) { - if (value === null) - return []; - if ($.isArray(value)) - return value; - return [value]; - } - - // Implement jQuery's extend - function extend(target /*, ... */) { - if (arguments.length == 1) { - return target; - } - for (var i = 1; i < arguments.length; i++) { - var source = arguments[i]; - for (var prop in source) { - if (source.hasOwnProperty(prop)) { - target[prop] = source[prop]; - } - } - } - return target; - } - - // IE8 doesn't support Array.forEach. - function forEach(values, callback, thisArg) { - if (values.forEach) { - values.forEach(callback, thisArg); - } else { - for (var i = 0; i < values.length; i++) { - callback.call(thisArg, values[i], i, values); - } - } - } - - // Replaces the specified method with the return value of funcSource. - // - // Note that funcSource should not BE the new method, it should be a function - // that RETURNS the new method. funcSource receives a single argument that is - // the overridden method, it can be called from the new method. The overridden - // method can be called like a regular function, it has the target permanently - // bound to it so "this" will work correctly. - function overrideMethod(target, methodName, funcSource) { - var superFunc = target[methodName] || function() {}; - var superFuncBound = function() { - return superFunc.apply(target, arguments); - }; - target[methodName] = funcSource(superFuncBound); - } - - // Add a method to delegator that, when invoked, calls - // delegatee.methodName. If there is no such method on - // the delegatee, but there was one on delegator before - // delegateMethod was called, then the original version - // is invoked instead. - // For example: - // - // var a = { - // method1: function() { console.log('a1'); } - // method2: function() { console.log('a2'); } - // }; - // var b = { - // method1: function() { console.log('b1'); } - // }; - // delegateMethod(a, b, "method1"); - // delegateMethod(a, b, "method2"); - // a.method1(); - // a.method2(); - // - // The output would be "b1", "a2". - function delegateMethod(delegator, delegatee, methodName) { - var inherited = delegator[methodName]; - delegator[methodName] = function() { - var target = delegatee; - var method = delegatee[methodName]; - - // The method doesn't exist on the delegatee. Instead, - // call the method on the delegator, if it exists. - if (!method) { - target = delegator; - method = inherited; - } - - if (method) { - return method.apply(target, arguments); - } - }; - } - - // Implement a vague facsimilie of jQuery's data method - function elementData(el, name, value) { - if (arguments.length == 2) { - return el["htmlwidget_data_" + name]; - } else if (arguments.length == 3) { - el["htmlwidget_data_" + name] = value; - return el; - } else { - throw new Error("Wrong number of arguments for elementData: " + - arguments.length); - } - } - - // http://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex - function escapeRegExp(str) { - return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); - } - - function hasClass(el, className) { - var re = new RegExp("\\b" + escapeRegExp(className) + "\\b"); - return re.test(el.className); - } - - // elements - array (or array-like object) of HTML elements - // className - class name to test for - // include - if true, only return elements with given className; - // if false, only return elements *without* given className - function filterByClass(elements, className, include) { - var results = []; - for (var i = 0; i < elements.length; i++) { - if (hasClass(elements[i], className) == include) - results.push(elements[i]); - } - return results; - } - - function on(obj, eventName, func) { - if (obj.addEventListener) { - obj.addEventListener(eventName, func, false); - } else if (obj.attachEvent) { - obj.attachEvent(eventName, func); - } - } - - function off(obj, eventName, func) { - if (obj.removeEventListener) - obj.removeEventListener(eventName, func, false); - else if (obj.detachEvent) { - obj.detachEvent(eventName, func); - } - } - - // Translate array of values to top/right/bottom/left, as usual with - // the "padding" CSS property - // https://developer.mozilla.org/en-US/docs/Web/CSS/padding - function unpackPadding(value) { - if (typeof(value) === "number") - value = [value]; - if (value.length === 1) { - return {top: value[0], right: value[0], bottom: value[0], left: value[0]}; - } - if (value.length === 2) { - return {top: value[0], right: value[1], bottom: value[0], left: value[1]}; - } - if (value.length === 3) { - return {top: value[0], right: value[1], bottom: value[2], left: value[1]}; - } - if (value.length === 4) { - return {top: value[0], right: value[1], bottom: value[2], left: value[3]}; - } - } - - // Convert an unpacked padding object to a CSS value - function paddingToCss(paddingObj) { - return paddingObj.top + "px " + paddingObj.right + "px " + paddingObj.bottom + "px " + paddingObj.left + "px"; - } - - // Makes a number suitable for CSS - function px(x) { - if (typeof(x) === "number") - return x + "px"; - else - return x; - } - - // Retrieves runtime widget sizing information for an element. - // The return value is either null, or an object with fill, padding, - // defaultWidth, defaultHeight fields. - function sizingPolicy(el) { - var sizingEl = document.querySelector("script[data-for='" + el.id + "'][type='application/htmlwidget-sizing']"); - if (!sizingEl) - return null; - var sp = JSON.parse(sizingEl.textContent || sizingEl.text || "{}"); - if (viewerMode) { - return sp.viewer; - } else { - return sp.browser; - } - } - - // @param tasks Array of strings (or falsy value, in which case no-op). - // Each element must be a valid JavaScript expression that yields a - // function. Or, can be an array of objects with "code" and "data" - // properties; in this case, the "code" property should be a string - // of JS that's an expr that yields a function, and "data" should be - // an object that will be added as an additional argument when that - // function is called. - // @param target The object that will be "this" for each function - // execution. - // @param args Array of arguments to be passed to the functions. (The - // same arguments will be passed to all functions.) - function evalAndRun(tasks, target, args) { - if (tasks) { - forEach(tasks, function(task) { - var theseArgs = args; - if (typeof(task) === "object") { - theseArgs = theseArgs.concat([task.data]); - task = task.code; - } - var taskFunc = tryEval(task); - if (typeof(taskFunc) !== "function") { - throw new Error("Task must be a function! Source:\n" + task); - } - taskFunc.apply(target, theseArgs); - }); - } - } - - // Attempt eval() both with and without enclosing in parentheses. - // Note that enclosing coerces a function declaration into - // an expression that eval() can parse - // (otherwise, a SyntaxError is thrown) - function tryEval(code) { - var result = null; - try { - result = eval(code); - } catch(error) { - if (!error instanceof SyntaxError) { - throw error; - } - try { - result = eval("(" + code + ")"); - } catch(e) { - if (e instanceof SyntaxError) { - throw error; - } else { - throw e; - } - } - } - return result; - } - - function initSizing(el) { - var sizing = sizingPolicy(el); - if (!sizing) - return; - - var cel = document.getElementById("htmlwidget_container"); - if (!cel) - return; - - if (typeof(sizing.padding) !== "undefined") { - document.body.style.margin = "0"; - document.body.style.padding = paddingToCss(unpackPadding(sizing.padding)); - } - - if (sizing.fill) { - document.body.style.overflow = "hidden"; - document.body.style.width = "100%"; - document.body.style.height = "100%"; - document.documentElement.style.width = "100%"; - document.documentElement.style.height = "100%"; - if (cel) { - cel.style.position = "absolute"; - var pad = unpackPadding(sizing.padding); - cel.style.top = pad.top + "px"; - cel.style.right = pad.right + "px"; - cel.style.bottom = pad.bottom + "px"; - cel.style.left = pad.left + "px"; - el.style.width = "100%"; - el.style.height = "100%"; - } - - return { - getWidth: function() { return cel.offsetWidth; }, - getHeight: function() { return cel.offsetHeight; } - }; - - } else { - el.style.width = px(sizing.width); - el.style.height = px(sizing.height); - - return { - getWidth: function() { return el.offsetWidth; }, - getHeight: function() { return el.offsetHeight; } - }; - } - } - - // Default implementations for methods - var defaults = { - find: function(scope) { - return querySelectorAll(scope, "." + this.name); - }, - renderError: function(el, err) { - var $el = $(el); - - this.clearError(el); - - // Add all these error classes, as Shiny does - var errClass = "shiny-output-error"; - if (err.type !== null) { - // use the classes of the error condition as CSS class names - errClass = errClass + " " + $.map(asArray(err.type), function(type) { - return errClass + "-" + type; - }).join(" "); - } - errClass = errClass + " htmlwidgets-error"; - - // Is el inline or block? If inline or inline-block, just display:none it - // and add an inline error. - var display = $el.css("display"); - $el.data("restore-display-mode", display); - - if (display === "inline" || display === "inline-block") { - $el.hide(); - if (err.message !== "") { - var errorSpan = $("").addClass(errClass); - errorSpan.text(err.message); - $el.after(errorSpan); - } - } else if (display === "block") { - // If block, add an error just after the el, set visibility:none on the - // el, and position the error to be on top of the el. - // Mark it with a unique ID and CSS class so we can remove it later. - $el.css("visibility", "hidden"); - if (err.message !== "") { - var errorDiv = $("
    ").addClass(errClass).css("position", "absolute") - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - // setting width can push out the page size, forcing otherwise - // unnecessary scrollbars to appear and making it impossible for - // the element to shrink; so use max-width instead - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - errorDiv.text(err.message); - $el.after(errorDiv); - - // Really dumb way to keep the size/position of the error in sync with - // the parent element as the window is resized or whatever. - var intId = setInterval(function() { - if (!errorDiv[0].parentElement) { - clearInterval(intId); - return; - } - errorDiv - .css("top", el.offsetTop) - .css("left", el.offsetLeft) - .css("maxWidth", el.offsetWidth) - .css("height", el.offsetHeight); - }, 500); - } - } - }, - clearError: function(el) { - var $el = $(el); - var display = $el.data("restore-display-mode"); - $el.data("restore-display-mode", null); - - if (display === "inline" || display === "inline-block") { - if (display) - $el.css("display", display); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } else if (display === "block"){ - $el.css("visibility", "inherit"); - $(el.nextSibling).filter(".htmlwidgets-error").remove(); - } - }, - sizing: {} - }; - - // Called by widget bindings to register a new type of widget. The definition - // object can contain the following properties: - // - name (required) - A string indicating the binding name, which will be - // used by default as the CSS classname to look for. - // - initialize (optional) - A function(el) that will be called once per - // widget element; if a value is returned, it will be passed as the third - // value to renderValue. - // - renderValue (required) - A function(el, data, initValue) that will be - // called with data. Static contexts will cause this to be called once per - // element; Shiny apps will cause this to be called multiple times per - // element, as the data changes. - window.HTMLWidgets.widget = function(definition) { - if (!definition.name) { - throw new Error("Widget must have a name"); - } - if (!definition.type) { - throw new Error("Widget must have a type"); - } - // Currently we only support output widgets - if (definition.type !== "output") { - throw new Error("Unrecognized widget type '" + definition.type + "'"); - } - // TODO: Verify that .name is a valid CSS classname - - // Support new-style instance-bound definitions. Old-style class-bound - // definitions have one widget "object" per widget per type/class of - // widget; the renderValue and resize methods on such widget objects - // take el and instance arguments, because the widget object can't - // store them. New-style instance-bound definitions have one widget - // object per widget instance; the definition that's passed in doesn't - // provide renderValue or resize methods at all, just the single method - // factory(el, width, height) - // which returns an object that has renderValue(x) and resize(w, h). - // This enables a far more natural programming style for the widget - // author, who can store per-instance state using either OO-style - // instance fields or functional-style closure variables (I guess this - // is in contrast to what can only be called C-style pseudo-OO which is - // what we required before). - if (definition.factory) { - definition = createLegacyDefinitionAdapter(definition); - } - - if (!definition.renderValue) { - throw new Error("Widget must have a renderValue function"); - } - - // For static rendering (non-Shiny), use a simple widget registration - // scheme. We also use this scheme for Shiny apps/documents that also - // contain static widgets. - window.HTMLWidgets.widgets = window.HTMLWidgets.widgets || []; - // Merge defaults into the definition; don't mutate the original definition. - var staticBinding = extend({}, defaults, definition); - overrideMethod(staticBinding, "find", function(superfunc) { - return function(scope) { - var results = superfunc(scope); - // Filter out Shiny outputs, we only want the static kind - return filterByClass(results, "html-widget-output", false); - }; - }); - window.HTMLWidgets.widgets.push(staticBinding); - - if (shinyMode) { - // Shiny is running. Register the definition with an output binding. - // The definition itself will not be the output binding, instead - // we will make an output binding object that delegates to the - // definition. This is because we foolishly used the same method - // name (renderValue) for htmlwidgets definition and Shiny bindings - // but they actually have quite different semantics (the Shiny - // bindings receive data that includes lots of metadata that it - // strips off before calling htmlwidgets renderValue). We can't - // just ignore the difference because in some widgets it's helpful - // to call this.renderValue() from inside of resize(), and if - // we're not delegating, then that call will go to the Shiny - // version instead of the htmlwidgets version. - - // Merge defaults with definition, without mutating either. - var bindingDef = extend({}, defaults, definition); - - // This object will be our actual Shiny binding. - var shinyBinding = new Shiny.OutputBinding(); - - // With a few exceptions, we'll want to simply use the bindingDef's - // version of methods if they are available, otherwise fall back to - // Shiny's defaults. NOTE: If Shiny's output bindings gain additional - // methods in the future, and we want them to be overrideable by - // HTMLWidget binding definitions, then we'll need to add them to this - // list. - delegateMethod(shinyBinding, bindingDef, "getId"); - delegateMethod(shinyBinding, bindingDef, "onValueChange"); - delegateMethod(shinyBinding, bindingDef, "onValueError"); - delegateMethod(shinyBinding, bindingDef, "renderError"); - delegateMethod(shinyBinding, bindingDef, "clearError"); - delegateMethod(shinyBinding, bindingDef, "showProgress"); - - // The find, renderValue, and resize are handled differently, because we - // want to actually decorate the behavior of the bindingDef methods. - - shinyBinding.find = function(scope) { - var results = bindingDef.find(scope); - - // Only return elements that are Shiny outputs, not static ones - var dynamicResults = results.filter(".html-widget-output"); - - // It's possible that whatever caused Shiny to think there might be - // new dynamic outputs, also caused there to be new static outputs. - // Since there might be lots of different htmlwidgets bindings, we - // schedule execution for later--no need to staticRender multiple - // times. - if (results.length !== dynamicResults.length) - scheduleStaticRender(); - - return dynamicResults; - }; - - // Wrap renderValue to handle initialization, which unfortunately isn't - // supported natively by Shiny at the time of this writing. - - shinyBinding.renderValue = function(el, data) { - Shiny.renderDependencies(data.deps); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var i = 0; data.evals && i < data.evals.length; i++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[i]); - } - if (!bindingDef.renderOnNullValue) { - if (data.x === null) { - el.style.visibility = "hidden"; - return; - } else { - el.style.visibility = "inherit"; - } - } - if (!elementData(el, "initialized")) { - initSizing(el); - - elementData(el, "initialized", true); - if (bindingDef.initialize) { - var result = bindingDef.initialize(el, el.offsetWidth, - el.offsetHeight); - elementData(el, "init_result", result); - } - } - bindingDef.renderValue(el, data.x, elementData(el, "init_result")); - evalAndRun(data.jsHooks.render, elementData(el, "init_result"), [el, data.x]); - }; - - // Only override resize if bindingDef implements it - if (bindingDef.resize) { - shinyBinding.resize = function(el, width, height) { - // Shiny can call resize before initialize/renderValue have been - // called, which doesn't make sense for widgets. - if (elementData(el, "initialized")) { - bindingDef.resize(el, width, height, elementData(el, "init_result")); - } - }; - } - - Shiny.outputBindings.register(shinyBinding, bindingDef.name); - } - }; - - var scheduleStaticRenderTimerId = null; - function scheduleStaticRender() { - if (!scheduleStaticRenderTimerId) { - scheduleStaticRenderTimerId = setTimeout(function() { - scheduleStaticRenderTimerId = null; - window.HTMLWidgets.staticRender(); - }, 1); - } - } - - // Render static widgets after the document finishes loading - // Statically render all elements that are of this widget's class - window.HTMLWidgets.staticRender = function() { - var bindings = window.HTMLWidgets.widgets || []; - forEach(bindings, function(binding) { - var matches = binding.find(document.documentElement); - forEach(matches, function(el) { - var sizeObj = initSizing(el, binding); - - if (hasClass(el, "html-widget-static-bound")) - return; - el.className = el.className + " html-widget-static-bound"; - - var initResult; - if (binding.initialize) { - initResult = binding.initialize(el, - sizeObj ? sizeObj.getWidth() : el.offsetWidth, - sizeObj ? sizeObj.getHeight() : el.offsetHeight - ); - elementData(el, "init_result", initResult); - } - - if (binding.resize) { - var lastSize = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - var resizeHandler = function(e) { - var size = { - w: sizeObj ? sizeObj.getWidth() : el.offsetWidth, - h: sizeObj ? sizeObj.getHeight() : el.offsetHeight - }; - if (size.w === 0 && size.h === 0) - return; - if (size.w === lastSize.w && size.h === lastSize.h) - return; - lastSize = size; - binding.resize(el, size.w, size.h, initResult); - }; - - on(window, "resize", resizeHandler); - - // This is needed for cases where we're running in a Shiny - // app, but the widget itself is not a Shiny output, but - // rather a simple static widget. One example of this is - // an rmarkdown document that has runtime:shiny and widget - // that isn't in a render function. Shiny only knows to - // call resize handlers for Shiny outputs, not for static - // widgets, so we do it ourselves. - if (window.jQuery) { - window.jQuery(document).on( - "shown.htmlwidgets shown.bs.tab.htmlwidgets shown.bs.collapse.htmlwidgets", - resizeHandler - ); - window.jQuery(document).on( - "hidden.htmlwidgets hidden.bs.tab.htmlwidgets hidden.bs.collapse.htmlwidgets", - resizeHandler - ); - } - - // This is needed for the specific case of ioslides, which - // flips slides between display:none and display:block. - // Ideally we would not have to have ioslide-specific code - // here, but rather have ioslides raise a generic event, - // but the rmarkdown package just went to CRAN so the - // window to getting that fixed may be long. - if (window.addEventListener) { - // It's OK to limit this to window.addEventListener - // browsers because ioslides itself only supports - // such browsers. - on(document, "slideenter", resizeHandler); - on(document, "slideleave", resizeHandler); - } - } - - var scriptData = document.querySelector("script[data-for='" + el.id + "'][type='application/json']"); - if (scriptData) { - var data = JSON.parse(scriptData.textContent || scriptData.text); - // Resolve strings marked as javascript literals to objects - if (!(data.evals instanceof Array)) data.evals = [data.evals]; - for (var k = 0; data.evals && k < data.evals.length; k++) { - window.HTMLWidgets.evaluateStringMember(data.x, data.evals[k]); - } - binding.renderValue(el, data.x, initResult); - evalAndRun(data.jsHooks.render, initResult, [el, data.x]); - } - }); - }); - - invokePostRenderHandlers(); - } - - - function has_jQuery3() { - if (!window.jQuery) { - return false; - } - var $version = window.jQuery.fn.jquery; - var $major_version = parseInt($version.split(".")[0]); - return $major_version >= 3; - } - - /* - / Shiny 1.4 bumped jQuery from 1.x to 3.x which means jQuery's - / on-ready handler (i.e., $(fn)) is now asyncronous (i.e., it now - / really means $(setTimeout(fn)). - / https://jquery.com/upgrade-guide/3.0/#breaking-change-document-ready-handlers-are-now-asynchronous - / - / Since Shiny uses $() to schedule initShiny, shiny>=1.4 calls initShiny - / one tick later than it did before, which means staticRender() is - / called renderValue() earlier than (advanced) widget authors might be expecting. - / https://github.com/rstudio/shiny/issues/2630 - / - / For a concrete example, leaflet has some methods (e.g., updateBounds) - / which reference Shiny methods registered in initShiny (e.g., setInputValue). - / Since leaflet is privy to this life-cycle, it knows to use setTimeout() to - / delay execution of those methods (until Shiny methods are ready) - / https://github.com/rstudio/leaflet/blob/18ec981/javascript/src/index.js#L266-L268 - / - / Ideally widget authors wouldn't need to use this setTimeout() hack that - / leaflet uses to call Shiny methods on a staticRender(). In the long run, - / the logic initShiny should be broken up so that method registration happens - / right away, but binding happens later. - */ - function maybeStaticRenderLater() { - if (shinyMode && has_jQuery3()) { - window.jQuery(window.HTMLWidgets.staticRender); - } else { - window.HTMLWidgets.staticRender(); - } - } - - if (document.addEventListener) { - document.addEventListener("DOMContentLoaded", function() { - document.removeEventListener("DOMContentLoaded", arguments.callee, false); - maybeStaticRenderLater(); - }, false); - } else if (document.attachEvent) { - document.attachEvent("onreadystatechange", function() { - if (document.readyState === "complete") { - document.detachEvent("onreadystatechange", arguments.callee); - maybeStaticRenderLater(); - } - }); - } - - - window.HTMLWidgets.getAttachmentUrl = function(depname, key) { - // If no key, default to the first item - if (typeof(key) === "undefined") - key = 1; - - var link = document.getElementById(depname + "-" + key + "-attachment"); - if (!link) { - throw new Error("Attachment " + depname + "/" + key + " not found in document"); - } - return link.getAttribute("href"); - }; - - window.HTMLWidgets.dataframeToD3 = function(df) { - var names = []; - var length; - for (var name in df) { - if (df.hasOwnProperty(name)) - names.push(name); - if (typeof(df[name]) !== "object" || typeof(df[name].length) === "undefined") { - throw new Error("All fields must be arrays"); - } else if (typeof(length) !== "undefined" && length !== df[name].length) { - throw new Error("All fields must be arrays of the same length"); - } - length = df[name].length; - } - var results = []; - var item; - for (var row = 0; row < length; row++) { - item = {}; - for (var col = 0; col < names.length; col++) { - item[names[col]] = df[names[col]][row]; - } - results.push(item); - } - return results; - }; - - window.HTMLWidgets.transposeArray2D = function(array) { - if (array.length === 0) return array; - var newArray = array[0].map(function(col, i) { - return array.map(function(row) { - return row[i] - }) - }); - return newArray; - }; - // Split value at splitChar, but allow splitChar to be escaped - // using escapeChar. Any other characters escaped by escapeChar - // will be included as usual (including escapeChar itself). - function splitWithEscape(value, splitChar, escapeChar) { - var results = []; - var escapeMode = false; - var currentResult = ""; - for (var pos = 0; pos < value.length; pos++) { - if (!escapeMode) { - if (value[pos] === splitChar) { - results.push(currentResult); - currentResult = ""; - } else if (value[pos] === escapeChar) { - escapeMode = true; - } else { - currentResult += value[pos]; - } - } else { - currentResult += value[pos]; - escapeMode = false; - } - } - if (currentResult !== "") { - results.push(currentResult); - } - return results; - } - // Function authored by Yihui/JJ Allaire - window.HTMLWidgets.evaluateStringMember = function(o, member) { - var parts = splitWithEscape(member, '.', '\\'); - for (var i = 0, l = parts.length; i < l; i++) { - var part = parts[i]; - // part may be a character or 'numeric' member name - if (o !== null && typeof o === "object" && part in o) { - if (i == (l - 1)) { // if we are at the end of the line then evalulate - if (typeof o[part] === "string") - o[part] = tryEval(o[part]); - } else { // otherwise continue to next embedded object - o = o[part]; - } - } - } - }; - - // Retrieve the HTMLWidget instance (i.e. the return value of an - // HTMLWidget binding's initialize() or factory() function) - // associated with an element, or null if none. - window.HTMLWidgets.getInstance = function(el) { - return elementData(el, "init_result"); - }; - - // Finds the first element in the scope that matches the selector, - // and returns the HTMLWidget instance (i.e. the return value of - // an HTMLWidget binding's initialize() or factory() function) - // associated with that element, if any. If no element matches the - // selector, or the first matching element has no HTMLWidget - // instance associated with it, then null is returned. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.find = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var el = scope.querySelector(selector); - if (el === null) { - return null; - } else { - return window.HTMLWidgets.getInstance(el); - } - }; - - // Finds all elements in the scope that match the selector, and - // returns the HTMLWidget instances (i.e. the return values of - // an HTMLWidget binding's initialize() or factory() function) - // associated with the elements, in an array. If elements that - // match the selector don't have an associated HTMLWidget - // instance, the returned array will contain nulls. - // - // The scope argument is optional, and defaults to window.document. - window.HTMLWidgets.findAll = function(scope, selector) { - if (arguments.length == 1) { - selector = scope; - scope = document; - } - - var nodes = scope.querySelectorAll(selector); - var results = []; - for (var i = 0; i < nodes.length; i++) { - results.push(window.HTMLWidgets.getInstance(nodes[i])); - } - return results; - }; - - var postRenderHandlers = []; - function invokePostRenderHandlers() { - while (postRenderHandlers.length) { - var handler = postRenderHandlers.shift(); - if (handler) { - handler(); - } - } - } - - // Register the given callback function to be invoked after the - // next time static widgets are rendered. - window.HTMLWidgets.addPostRenderHandler = function(callback) { - postRenderHandlers.push(callback); - }; - - // Takes a new-style instance-bound definition, and returns an - // old-style class-bound definition. This saves us from having - // to rewrite all the logic in this file to accomodate both - // types of definitions. - function createLegacyDefinitionAdapter(defn) { - var result = { - name: defn.name, - type: defn.type, - initialize: function(el, width, height) { - return defn.factory(el, width, height); - }, - renderValue: function(el, x, instance) { - return instance.renderValue(x); - }, - resize: function(el, width, height, instance) { - return instance.resize(width, height); - } - }; - - if (defn.find) - result.find = defn.find; - if (defn.renderError) - result.renderError = defn.renderError; - if (defn.clearError) - result.clearError = defn.clearError; - - return result; - } -})(); - diff --git a/static/rmarkdown-libs/jquery/LICENSE.txt b/static/rmarkdown-libs/jquery/LICENSE.txt deleted file mode 100644 index 5bf4f5e6..00000000 --- a/static/rmarkdown-libs/jquery/LICENSE.txt +++ /dev/null @@ -1,37 +0,0 @@ -Copyright 2005, 2014 jQuery Foundation and other contributors, -https://jquery.org/ - -This software consists of voluntary contributions made by many -individuals. For exact contribution history, see the revision history -available at https://github.com/jquery/jquery - -The following license applies to all parts of this software except as -documented below: - -==== - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -==== - -All files located in the node_modules and external directories are -externally maintained libraries used by this software which have their -own licenses; we recommend you read them, as their terms may differ from -the terms above. diff --git a/static/rmarkdown-libs/jquery/jquery.min.js b/static/rmarkdown-libs/jquery/jquery.min.js deleted file mode 100644 index e8364758..00000000 --- a/static/rmarkdown-libs/jquery/jquery.min.js +++ /dev/null @@ -1,5 +0,0 @@ -/*! jQuery v1.12.4 | (c) jQuery Foundation | jquery.org/license */ -!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=a.document,e=c.slice,f=c.concat,g=c.push,h=c.indexOf,i={},j=i.toString,k=i.hasOwnProperty,l={},m="1.12.4",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return e.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:e.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a){return n.each(this,a)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(e.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor()},push:g,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(e=arguments[h]))for(d in e)a=g[d],c=e[d],g!==c&&(j&&c&&(n.isPlainObject(c)||(b=n.isArray(c)))?(b?(b=!1,f=a&&n.isArray(a)?a:[]):f=a&&n.isPlainObject(a)?a:{},g[d]=n.extend(j,f,c)):void 0!==c&&(g[d]=c));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray||function(a){return"array"===n.type(a)},isWindow:function(a){return null!=a&&a==a.window},isNumeric:function(a){var b=a&&a.toString();return!n.isArray(a)&&b-parseFloat(b)+1>=0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},isPlainObject:function(a){var b;if(!a||"object"!==n.type(a)||a.nodeType||n.isWindow(a))return!1;try{if(a.constructor&&!k.call(a,"constructor")&&!k.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}if(!l.ownFirst)for(b in a)return k.call(a,b);for(b in a);return void 0===b||k.call(a,b)},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?i[j.call(a)]||"object":typeof a},globalEval:function(b){b&&n.trim(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b){var c,d=0;if(s(a)){for(c=a.length;c>d;d++)if(b.call(a[d],d,a[d])===!1)break}else for(d in a)if(b.call(a[d],d,a[d])===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):g.call(c,a)),c},inArray:function(a,b,c){var d;if(b){if(h)return h.call(b,a,c);for(d=b.length,c=c?0>c?Math.max(0,d+c):c:0;d>c;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,b){var c=+b.length,d=0,e=a.length;while(c>d)a[e++]=b[d++];if(c!==c)while(void 0!==b[d])a[e++]=b[d++];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,e,g=0,h=[];if(s(a))for(d=a.length;d>g;g++)e=b(a[g],g,c),null!=e&&h.push(e);else for(g in a)e=b(a[g],g,c),null!=e&&h.push(e);return f.apply([],h)},guid:1,proxy:function(a,b){var c,d,f;return"string"==typeof b&&(f=a[b],b=a,a=f),n.isFunction(a)?(c=e.call(arguments,2),d=function(){return a.apply(b||this,c.concat(e.call(arguments)))},d.guid=a.guid=a.guid||n.guid++,d):void 0},now:function(){return+new Date},support:l}),"function"==typeof Symbol&&(n.fn[Symbol.iterator]=c[Symbol.iterator]),n.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(a,b){i["[object "+b+"]"]=b.toLowerCase()});function s(a){var b=!!a&&"length"in a&&a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ga(),z=ga(),A=ga(),B=function(a,b){return a===b&&(l=!0),0},C=1<<31,D={}.hasOwnProperty,E=[],F=E.pop,G=E.push,H=E.push,I=E.slice,J=function(a,b){for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},K="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",L="[\\x20\\t\\r\\n\\f]",M="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",N="\\["+L+"*("+M+")(?:"+L+"*([*^$|!~]?=)"+L+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+M+"))|)"+L+"*\\]",O=":("+M+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+N+")*)|.*)\\)|)",P=new RegExp(L+"+","g"),Q=new RegExp("^"+L+"+|((?:^|[^\\\\])(?:\\\\.)*)"+L+"+$","g"),R=new RegExp("^"+L+"*,"+L+"*"),S=new RegExp("^"+L+"*([>+~]|"+L+")"+L+"*"),T=new RegExp("="+L+"*([^\\]'\"]*?)"+L+"*\\]","g"),U=new RegExp(O),V=new RegExp("^"+M+"$"),W={ID:new RegExp("^#("+M+")"),CLASS:new RegExp("^\\.("+M+")"),TAG:new RegExp("^("+M+"|[*])"),ATTR:new RegExp("^"+N),PSEUDO:new RegExp("^"+O),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+L+"*(even|odd|(([+-]|)(\\d*)n|)"+L+"*(?:([+-]|)"+L+"*(\\d+)|))"+L+"*\\)|)","i"),bool:new RegExp("^(?:"+K+")$","i"),needsContext:new RegExp("^"+L+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+L+"*((?:-\\d)?\\d*)"+L+"*\\)|)(?=[^-]|$)","i")},X=/^(?:input|select|textarea|button)$/i,Y=/^h\d$/i,Z=/^[^{]+\{\s*\[native \w/,$=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,_=/[+~]/,aa=/'|\\/g,ba=new RegExp("\\\\([\\da-f]{1,6}"+L+"?|("+L+")|.)","ig"),ca=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},da=function(){m()};try{H.apply(E=I.call(v.childNodes),v.childNodes),E[v.childNodes.length].nodeType}catch(ea){H={apply:E.length?function(a,b){G.apply(a,I.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fa(a,b,d,e){var f,h,j,k,l,o,r,s,w=b&&b.ownerDocument,x=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==x&&9!==x&&11!==x)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==x&&(o=$.exec(a)))if(f=o[1]){if(9===x){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(w&&(j=w.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(o[2])return H.apply(d,b.getElementsByTagName(a)),d;if((f=o[3])&&c.getElementsByClassName&&b.getElementsByClassName)return H.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==x)w=b,s=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(aa,"\\$&"):b.setAttribute("id",k=u),r=g(a),h=r.length,l=V.test(k)?"#"+k:"[id='"+k+"']";while(h--)r[h]=l+" "+qa(r[h]);s=r.join(","),w=_.test(a)&&oa(b.parentNode)||b}if(s)try{return H.apply(d,w.querySelectorAll(s)),d}catch(y){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(Q,"$1"),b,d,e)}function ga(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ha(a){return a[u]=!0,a}function ia(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ja(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function ka(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||C)-(~a.sourceIndex||C);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function la(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function na(a){return ha(function(b){return b=+b,ha(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function oa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=fa.support={},f=fa.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fa.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ia(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ia(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Z.test(n.getElementsByClassName),c.getById=ia(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}},d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return"undefined"!=typeof b.getElementsByClassName&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=Z.test(n.querySelectorAll))&&(ia(function(a){o.appendChild(a).innerHTML="",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+L+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+L+"*(?:value|"+K+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ia(function(a){var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+L+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Z.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ia(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",O)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Z.test(o.compareDocumentPosition),t=b||Z.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?J(k,a)-J(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?J(k,a)-J(k,b):0;if(e===f)return ka(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?ka(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},fa.matches=function(a,b){return fa(a,null,null,b)},fa.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(T,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fa(b,n,null,[a]).length>0},fa.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fa.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&D.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fa.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fa.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fa.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fa.selectors={cacheLength:50,createPseudo:ha,match:W,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(ba,ca),a[3]=(a[3]||a[4]||a[5]||"").replace(ba,ca),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fa.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fa.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return W.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&U.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(ba,ca).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+L+")"+a+"("+L+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fa.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(P," ")+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fa.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ha(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=J(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ha(function(a){var b=[],c=[],d=h(a.replace(Q,"$1"));return d[u]?ha(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ha(function(a){return function(b){return fa(a,b).length>0}}),contains:ha(function(a){return a=a.replace(ba,ca),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ha(function(a){return V.test(a||"")||fa.error("unsupported lang: "+a),a=a.replace(ba,ca).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Y.test(a.nodeName)},input:function(a){return X.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:na(function(){return[0]}),last:na(function(a,b){return[b-1]}),eq:na(function(a,b,c){return[0>c?c+b:c]}),even:na(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:na(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:na(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:na(function(a,b,c){for(var d=0>c?c+b:c;++db;b++)d+=a[b].value;return d}function ra(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j,k=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(j=b[u]||(b[u]={}),i=j[b.uniqueID]||(j[b.uniqueID]={}),(h=i[d])&&h[0]===w&&h[1]===f)return k[2]=h[2];if(i[d]=k,k[2]=a(b,c,g))return!0}}}function sa(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function ta(a,b,c){for(var d=0,e=b.length;e>d;d++)fa(a,b[d],c);return c}function ua(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(c&&!c(f,d,e)||(g.push(f),j&&b.push(h)));return g}function va(a,b,c,d,e,f){return d&&!d[u]&&(d=va(d)),e&&!e[u]&&(e=va(e,f)),ha(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||ta(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ua(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ua(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?J(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ua(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):H.apply(g,r)})}function wa(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ra(function(a){return a===b},h,!0),l=ra(function(a){return J(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];f>i;i++)if(c=d.relative[a[i].type])m=[ra(sa(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return va(i>1&&sa(m),i>1&&qa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(Q,"$1"),c,e>i&&wa(a.slice(i,e)),f>e&&wa(a=a.slice(e)),f>e&&qa(a))}m.push(c)}return sa(m)}function xa(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=F.call(i));u=ua(u)}H.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&fa.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ha(f):f}return h=fa.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wa(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xa(e,d)),f.selector=a}return f},i=fa.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(ba,ca),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=W.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(ba,ca),_.test(j[0].type)&&oa(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qa(j),!a)return H.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,!b||_.test(a)&&oa(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ia(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ia(function(a){return a.innerHTML="","#"===a.firstChild.getAttribute("href")})||ja("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ia(function(a){return a.innerHTML="",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ja("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ia(function(a){return null==a.getAttribute("disabled")})||ja(K,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fa}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.uniqueSort=n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&n(a).is(c))break;d.push(a)}return d},v=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},w=n.expr.match.needsContext,x=/^<([\w-]+)\s*\/?>(?:<\/\1>|)$/,y=/^.[^:#\[\.,]*$/;function z(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(y.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return n.inArray(a,b)>-1!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=[],d=this,e=d.length;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;e>b;b++)if(n.contains(d[b],this))return!0}));for(b=0;e>b;b++)n.find(a,d[b],c);return c=this.pushStack(e>1?n.unique(c):c),c.selector=this.selector?this.selector+" "+a:a,c},filter:function(a){return this.pushStack(z(this,a||[],!1))},not:function(a){return this.pushStack(z(this,a||[],!0))},is:function(a){return!!z(this,"string"==typeof a&&w.test(a)?n(a):a||[],!1).length}});var A,B=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,C=n.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||A,"string"==typeof a){if(e="<"===a.charAt(0)&&">"===a.charAt(a.length-1)&&a.length>=3?[null,a,null]:B.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),x.test(e[1])&&n.isPlainObject(b))for(e in b)n.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}if(f=d.getElementById(e[2]),f&&f.parentNode){if(f.id!==e[2])return A.find(a);this.length=1,this[0]=f}return this.context=d,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?"undefined"!=typeof c.ready?c.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};C.prototype=n.fn,A=n(d);var D=/^(?:parents|prev(?:Until|All))/,E={children:!0,contents:!0,next:!0,prev:!0};n.fn.extend({has:function(a){var b,c=n(a,this),d=c.length;return this.filter(function(){for(b=0;d>b;b++)if(n.contains(this,c[b]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=w.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?n.inArray(this[0],n(a)):n.inArray(a.jquery?a[0]:a,this):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.uniqueSort(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function F(a,b){do a=a[b];while(a&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return u(a,"parentNode")},parentsUntil:function(a,b,c){return u(a,"parentNode",c)},next:function(a){return F(a,"nextSibling")},prev:function(a){return F(a,"previousSibling")},nextAll:function(a){return u(a,"nextSibling")},prevAll:function(a){return u(a,"previousSibling")},nextUntil:function(a,b,c){return u(a,"nextSibling",c)},prevUntil:function(a,b,c){return u(a,"previousSibling",c)},siblings:function(a){return v((a.parentNode||{}).firstChild,a)},children:function(a){return v(a.firstChild)},contents:function(a){return n.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(E[a]||(e=n.uniqueSort(e)),D.test(a)&&(e=e.reverse())),this.pushStack(e)}});var G=/\S+/g;function H(a){var b={};return n.each(a.match(G)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?H(a):n.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h-1)f.splice(c,1),h>=c&&h--}),this},has:function(a){return a?n.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=!0,c||j.disable(),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().progress(c.notify).done(c.resolve).fail(c.reject):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=e.call(arguments),d=c.length,f=1!==d||a&&n.isFunction(a.promise)?d:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(d){b[a]=this,c[a]=arguments.length>1?e.call(arguments):d,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(d>1)for(i=new Array(d),j=new Array(d),k=new Array(d);d>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().progress(h(b,j,i)).done(h(b,k,c)).fail(g.reject):--f;return f||g.resolveWith(k,c),g.promise()}});var I;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){(a===!0?--n.readyWait:n.isReady)||(n.isReady=!0,a!==!0&&--n.readyWait>0||(I.resolveWith(d,[n]),n.fn.triggerHandler&&(n(d).triggerHandler("ready"),n(d).off("ready"))))}});function J(){d.addEventListener?(d.removeEventListener("DOMContentLoaded",K),a.removeEventListener("load",K)):(d.detachEvent("onreadystatechange",K),a.detachEvent("onload",K))}function K(){(d.addEventListener||"load"===a.event.type||"complete"===d.readyState)&&(J(),n.ready())}n.ready.promise=function(b){if(!I)if(I=n.Deferred(),"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll)a.setTimeout(n.ready);else if(d.addEventListener)d.addEventListener("DOMContentLoaded",K),a.addEventListener("load",K);else{d.attachEvent("onreadystatechange",K),a.attachEvent("onload",K);var c=!1;try{c=null==a.frameElement&&d.documentElement}catch(e){}c&&c.doScroll&&!function f(){if(!n.isReady){try{c.doScroll("left")}catch(b){return a.setTimeout(f,50)}J(),n.ready()}}()}return I.promise(b)},n.ready.promise();var L;for(L in n(l))break;l.ownFirst="0"===L,l.inlineBlockNeedsLayout=!1,n(function(){var a,b,c,e;c=d.getElementsByTagName("body")[0],c&&c.style&&(b=d.createElement("div"),e=d.createElement("div"),e.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(e).appendChild(b),"undefined"!=typeof b.style.zoom&&(b.style.cssText="display:inline;margin:0;border:0;padding:1px;width:1px;zoom:1",l.inlineBlockNeedsLayout=a=3===b.offsetWidth,a&&(c.style.zoom=1)),c.removeChild(e))}),function(){var a=d.createElement("div");l.deleteExpando=!0;try{delete a.test}catch(b){l.deleteExpando=!1}a=null}();var M=function(a){var b=n.noData[(a.nodeName+" ").toLowerCase()],c=+a.nodeType||1;return 1!==c&&9!==c?!1:!b||b!==!0&&a.getAttribute("classid")===b},N=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,O=/([A-Z])/g;function P(a,b,c){if(void 0===c&&1===a.nodeType){var d="data-"+b.replace(O,"-$1").toLowerCase();if(c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:N.test(c)?n.parseJSON(c):c}catch(e){}n.data(a,b,c)}else c=void 0; -}return c}function Q(a){var b;for(b in a)if(("data"!==b||!n.isEmptyObject(a[b]))&&"toJSON"!==b)return!1;return!0}function R(a,b,d,e){if(M(a)){var f,g,h=n.expando,i=a.nodeType,j=i?n.cache:a,k=i?a[h]:a[h]&&h;if(k&&j[k]&&(e||j[k].data)||void 0!==d||"string"!=typeof b)return k||(k=i?a[h]=c.pop()||n.guid++:h),j[k]||(j[k]=i?{}:{toJSON:n.noop}),"object"!=typeof b&&"function"!=typeof b||(e?j[k]=n.extend(j[k],b):j[k].data=n.extend(j[k].data,b)),g=j[k],e||(g.data||(g.data={}),g=g.data),void 0!==d&&(g[n.camelCase(b)]=d),"string"==typeof b?(f=g[b],null==f&&(f=g[n.camelCase(b)])):f=g,f}}function S(a,b,c){if(M(a)){var d,e,f=a.nodeType,g=f?n.cache:a,h=f?a[n.expando]:n.expando;if(g[h]){if(b&&(d=c?g[h]:g[h].data)){n.isArray(b)?b=b.concat(n.map(b,n.camelCase)):b in d?b=[b]:(b=n.camelCase(b),b=b in d?[b]:b.split(" ")),e=b.length;while(e--)delete d[b[e]];if(c?!Q(d):!n.isEmptyObject(d))return}(c||(delete g[h].data,Q(g[h])))&&(f?n.cleanData([a],!0):l.deleteExpando||g!=g.window?delete g[h]:g[h]=void 0)}}}n.extend({cache:{},noData:{"applet ":!0,"embed ":!0,"object ":"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"},hasData:function(a){return a=a.nodeType?n.cache[a[n.expando]]:a[n.expando],!!a&&!Q(a)},data:function(a,b,c){return R(a,b,c)},removeData:function(a,b){return S(a,b)},_data:function(a,b,c){return R(a,b,c,!0)},_removeData:function(a,b){return S(a,b,!0)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=n.data(f),1===f.nodeType&&!n._data(f,"parsedAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),P(f,d,e[d])));n._data(f,"parsedAttrs",!0)}return e}return"object"==typeof a?this.each(function(){n.data(this,a)}):arguments.length>1?this.each(function(){n.data(this,a,b)}):f?P(f,a,n.data(f,a)):void 0},removeData:function(a){return this.each(function(){n.removeData(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=n._data(a,b),c&&(!d||n.isArray(c)?d=n._data(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return n._data(a,c)||n._data(a,c,{empty:n.Callbacks("once memory").add(function(){n._removeData(a,b+"queue"),n._removeData(a,c)})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.lengthh;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},Z=/^(?:checkbox|radio)$/i,$=/<([\w:-]+)/,_=/^$|\/(?:java|ecma)script/i,aa=/^\s+/,ba="abbr|article|aside|audio|bdi|canvas|data|datalist|details|dialog|figcaption|figure|footer|header|hgroup|main|mark|meter|nav|output|picture|progress|section|summary|template|time|video";function ca(a){var b=ba.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}!function(){var a=d.createElement("div"),b=d.createDocumentFragment(),c=d.createElement("input");a.innerHTML="
    a",l.leadingWhitespace=3===a.firstChild.nodeType,l.tbody=!a.getElementsByTagName("tbody").length,l.htmlSerialize=!!a.getElementsByTagName("link").length,l.html5Clone="<:nav>"!==d.createElement("nav").cloneNode(!0).outerHTML,c.type="checkbox",c.checked=!0,b.appendChild(c),l.appendChecked=c.checked,a.innerHTML="",l.noCloneChecked=!!a.cloneNode(!0).lastChild.defaultValue,b.appendChild(a),c=d.createElement("input"),c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),a.appendChild(c),l.checkClone=a.cloneNode(!0).cloneNode(!0).lastChild.checked,l.noCloneEvent=!!a.addEventListener,a[n.expando]=1,l.attributes=!a.getAttribute(n.expando)}();var da={option:[1,""],legend:[1,"
    ","
    "],area:[1,"",""],param:[1,"",""],thead:[1,"","
    "],tr:[2,"","
    "],col:[2,"","
    "],td:[3,"","
    "],_default:l.htmlSerialize?[0,"",""]:[1,"X
    ","
    "]};da.optgroup=da.option,da.tbody=da.tfoot=da.colgroup=da.caption=da.thead,da.th=da.td;function ea(a,b){var c,d,e=0,f="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):void 0;if(!f)for(f=[],c=a.childNodes||a;null!=(d=c[e]);e++)!b||n.nodeName(d,b)?f.push(d):n.merge(f,ea(d,b));return void 0===b||b&&n.nodeName(a,b)?n.merge([a],f):f}function fa(a,b){for(var c,d=0;null!=(c=a[d]);d++)n._data(c,"globalEval",!b||n._data(b[d],"globalEval"))}var ga=/<|&#?\w+;/,ha=/r;r++)if(g=a[r],g||0===g)if("object"===n.type(g))n.merge(q,g.nodeType?[g]:g);else if(ga.test(g)){i=i||p.appendChild(b.createElement("div")),j=($.exec(g)||["",""])[1].toLowerCase(),m=da[j]||da._default,i.innerHTML=m[1]+n.htmlPrefilter(g)+m[2],f=m[0];while(f--)i=i.lastChild;if(!l.leadingWhitespace&&aa.test(g)&&q.push(b.createTextNode(aa.exec(g)[0])),!l.tbody){g="table"!==j||ha.test(g)?""!==m[1]||ha.test(g)?0:i:i.firstChild,f=g&&g.childNodes.length;while(f--)n.nodeName(k=g.childNodes[f],"tbody")&&!k.childNodes.length&&g.removeChild(k)}n.merge(q,i.childNodes),i.textContent="";while(i.firstChild)i.removeChild(i.firstChild);i=p.lastChild}else q.push(b.createTextNode(g));i&&p.removeChild(i),l.appendChecked||n.grep(ea(q,"input"),ia),r=0;while(g=q[r++])if(d&&n.inArray(g,d)>-1)e&&e.push(g);else if(h=n.contains(g.ownerDocument,g),i=ea(p.appendChild(g),"script"),h&&fa(i),c){f=0;while(g=i[f++])_.test(g.type||"")&&c.push(g)}return i=null,p}!function(){var b,c,e=d.createElement("div");for(b in{submit:!0,change:!0,focusin:!0})c="on"+b,(l[b]=c in a)||(e.setAttribute(c,"t"),l[b]=e.attributes[c].expando===!1);e=null}();var ka=/^(?:input|select|textarea)$/i,la=/^key/,ma=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,na=/^(?:focusinfocus|focusoutblur)$/,oa=/^([^.]*)(?:\.(.+)|)/;function pa(){return!0}function qa(){return!1}function ra(){try{return d.activeElement}catch(a){}}function sa(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)sa(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=qa;else if(!e)return a;return 1===f&&(g=e,e=function(a){return n().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=n.guid++)),a.each(function(){n.event.add(this,b,e,d,c)})}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n._data(a);if(r){c.handler&&(i=c,c=i.handler,e=i.selector),c.guid||(c.guid=n.guid++),(g=r.events)||(g=r.events={}),(k=r.handle)||(k=r.handle=function(a){return"undefined"==typeof n||a&&n.event.triggered===a.type?void 0:n.event.dispatch.apply(k.elem,arguments)},k.elem=a),b=(b||"").match(G)||[""],h=b.length;while(h--)f=oa.exec(b[h])||[],o=q=f[1],p=(f[2]||"").split(".").sort(),o&&(j=n.event.special[o]||{},o=(e?j.delegateType:j.bindType)||o,j=n.event.special[o]||{},l=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},i),(m=g[o])||(m=g[o]=[],m.delegateCount=0,j.setup&&j.setup.call(a,d,p,k)!==!1||(a.addEventListener?a.addEventListener(o,k,!1):a.attachEvent&&a.attachEvent("on"+o,k))),j.add&&(j.add.call(a,l),l.handler.guid||(l.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,l):m.push(l),n.event.global[o]=!0);a=null}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n.hasData(a)&&n._data(a);if(r&&(k=r.events)){b=(b||"").match(G)||[""],j=b.length;while(j--)if(h=oa.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=k[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),i=f=m.length;while(f--)g=m[f],!e&&q!==g.origType||c&&c.guid!==g.guid||h&&!h.test(g.namespace)||d&&d!==g.selector&&("**"!==d||!g.selector)||(m.splice(f,1),g.selector&&m.delegateCount--,l.remove&&l.remove.call(a,g));i&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete k[o])}else for(o in k)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(k)&&(delete r.handle,n._removeData(a,"events"))}},trigger:function(b,c,e,f){var g,h,i,j,l,m,o,p=[e||d],q=k.call(b,"type")?b.type:b,r=k.call(b,"namespace")?b.namespace.split("."):[];if(i=m=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!na.test(q+n.event.triggered)&&(q.indexOf(".")>-1&&(r=q.split("."),q=r.shift(),r.sort()),h=q.indexOf(":")<0&&"on"+q,b=b[n.expando]?b:new n.Event(q,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=r.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+r.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:n.makeArray(c,[b]),l=n.event.special[q]||{},f||!l.trigger||l.trigger.apply(e,c)!==!1)){if(!f&&!l.noBubble&&!n.isWindow(e)){for(j=l.delegateType||q,na.test(j+q)||(i=i.parentNode);i;i=i.parentNode)p.push(i),m=i;m===(e.ownerDocument||d)&&p.push(m.defaultView||m.parentWindow||a)}o=0;while((i=p[o++])&&!b.isPropagationStopped())b.type=o>1?j:l.bindType||q,g=(n._data(i,"events")||{})[b.type]&&n._data(i,"handle"),g&&g.apply(i,c),g=h&&i[h],g&&g.apply&&M(i)&&(b.result=g.apply(i,c),b.result===!1&&b.preventDefault());if(b.type=q,!f&&!b.isDefaultPrevented()&&(!l._default||l._default.apply(p.pop(),c)===!1)&&M(e)&&h&&e[q]&&!n.isWindow(e)){m=e[h],m&&(e[h]=null),n.event.triggered=q;try{e[q]()}catch(s){}n.event.triggered=void 0,m&&(e[h]=m)}return b.result}},dispatch:function(a){a=n.event.fix(a);var b,c,d,f,g,h=[],i=e.call(arguments),j=(n._data(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,c=0;while((g=f.handlers[c++])&&!a.isImmediatePropagationStopped())a.rnamespace&&!a.rnamespace.test(g.namespace)||(a.handleObj=g,a.data=g.data,d=((n.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==d&&(a.result=d)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&("click"!==a.type||isNaN(a.button)||a.button<1))for(;i!=this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(d=[],c=0;h>c;c++)f=b[c],e=f.selector+" ",void 0===d[e]&&(d[e]=f.needsContext?n(e,this).index(i)>-1:n.find(e,this,null,[i]).length),d[e]&&d.push(f);d.length&&g.push({elem:i,handlers:d})}return h]","i"),va=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:-]+)[^>]*)\/>/gi,wa=/\s*$/g,Aa=ca(d),Ba=Aa.appendChild(d.createElement("div"));function Ca(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function Da(a){return a.type=(null!==n.find.attr(a,"type"))+"/"+a.type,a}function Ea(a){var b=ya.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Fa(a,b){if(1===b.nodeType&&n.hasData(a)){var c,d,e,f=n._data(a),g=n._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;e>d;d++)n.event.add(b,c,h[c][d])}g.data&&(g.data=n.extend({},g.data))}}function Ga(a,b){var c,d,e;if(1===b.nodeType){if(c=b.nodeName.toLowerCase(),!l.noCloneEvent&&b[n.expando]){e=n._data(b);for(d in e.events)n.removeEvent(b,d,e.handle);b.removeAttribute(n.expando)}"script"===c&&b.text!==a.text?(Da(b).text=a.text,Ea(b)):"object"===c?(b.parentNode&&(b.outerHTML=a.outerHTML),l.html5Clone&&a.innerHTML&&!n.trim(b.innerHTML)&&(b.innerHTML=a.innerHTML)):"input"===c&&Z.test(a.type)?(b.defaultChecked=b.checked=a.checked,b.value!==a.value&&(b.value=a.value)):"option"===c?b.defaultSelected=b.selected=a.defaultSelected:"input"!==c&&"textarea"!==c||(b.defaultValue=a.defaultValue)}}function Ha(a,b,c,d){b=f.apply([],b);var e,g,h,i,j,k,m=0,o=a.length,p=o-1,q=b[0],r=n.isFunction(q);if(r||o>1&&"string"==typeof q&&!l.checkClone&&xa.test(q))return a.each(function(e){var f=a.eq(e);r&&(b[0]=q.call(this,e,f.html())),Ha(f,b,c,d)});if(o&&(k=ja(b,a[0].ownerDocument,!1,a,d),e=k.firstChild,1===k.childNodes.length&&(k=e),e||d)){for(i=n.map(ea(k,"script"),Da),h=i.length;o>m;m++)g=k,m!==p&&(g=n.clone(g,!0,!0),h&&n.merge(i,ea(g,"script"))),c.call(a[m],g,m);if(h)for(j=i[i.length-1].ownerDocument,n.map(i,Ea),m=0;h>m;m++)g=i[m],_.test(g.type||"")&&!n._data(g,"globalEval")&&n.contains(j,g)&&(g.src?n._evalUrl&&n._evalUrl(g.src):n.globalEval((g.text||g.textContent||g.innerHTML||"").replace(za,"")));k=e=null}return a}function Ia(a,b,c){for(var d,e=b?n.filter(b,a):a,f=0;null!=(d=e[f]);f++)c||1!==d.nodeType||n.cleanData(ea(d)),d.parentNode&&(c&&n.contains(d.ownerDocument,d)&&fa(ea(d,"script")),d.parentNode.removeChild(d));return a}n.extend({htmlPrefilter:function(a){return a.replace(va,"<$1>")},clone:function(a,b,c){var d,e,f,g,h,i=n.contains(a.ownerDocument,a);if(l.html5Clone||n.isXMLDoc(a)||!ua.test("<"+a.nodeName+">")?f=a.cloneNode(!0):(Ba.innerHTML=a.outerHTML,Ba.removeChild(f=Ba.firstChild)),!(l.noCloneEvent&&l.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(d=ea(f),h=ea(a),g=0;null!=(e=h[g]);++g)d[g]&&Ga(e,d[g]);if(b)if(c)for(h=h||ea(a),d=d||ea(f),g=0;null!=(e=h[g]);g++)Fa(e,d[g]);else Fa(a,f);return d=ea(f,"script"),d.length>0&&fa(d,!i&&ea(a,"script")),d=h=e=null,f},cleanData:function(a,b){for(var d,e,f,g,h=0,i=n.expando,j=n.cache,k=l.attributes,m=n.event.special;null!=(d=a[h]);h++)if((b||M(d))&&(f=d[i],g=f&&j[f])){if(g.events)for(e in g.events)m[e]?n.event.remove(d,e):n.removeEvent(d,e,g.handle);j[f]&&(delete j[f],k||"undefined"==typeof d.removeAttribute?d[i]=void 0:d.removeAttribute(i),c.push(f))}}}),n.fn.extend({domManip:Ha,detach:function(a){return Ia(this,a,!0)},remove:function(a){return Ia(this,a)},text:function(a){return Y(this,function(a){return void 0===a?n.text(this):this.empty().append((this[0]&&this[0].ownerDocument||d).createTextNode(a))},null,a,arguments.length)},append:function(){return Ha(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ca(this,a);b.appendChild(a)}})},prepend:function(){return Ha(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ca(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return Ha(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return Ha(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++){1===a.nodeType&&n.cleanData(ea(a,!1));while(a.firstChild)a.removeChild(a.firstChild);a.options&&n.nodeName(a,"select")&&(a.options.length=0)}return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return Y(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a)return 1===b.nodeType?b.innerHTML.replace(ta,""):void 0;if("string"==typeof a&&!wa.test(a)&&(l.htmlSerialize||!ua.test(a))&&(l.leadingWhitespace||!aa.test(a))&&!da[($.exec(a)||["",""])[1].toLowerCase()]){a=n.htmlPrefilter(a);try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(ea(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=[];return Ha(this,arguments,function(b){var c=this.parentNode;n.inArray(this,a)<0&&(n.cleanData(ea(this)),c&&c.replaceChild(b,this))},a)}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=0,e=[],f=n(a),h=f.length-1;h>=d;d++)c=d===h?this:this.clone(!0),n(f[d])[b](c),g.apply(e,c.get());return this.pushStack(e)}});var Ja,Ka={HTML:"block",BODY:"block"};function La(a,b){var c=n(b.createElement(a)).appendTo(b.body),d=n.css(c[0],"display");return c.detach(),d}function Ma(a){var b=d,c=Ka[a];return c||(c=La(a,b),"none"!==c&&c||(Ja=(Ja||n("