annotate test-data/annotemp/pivot_wider_jupytool_notebook.ipynb @ 2:b14e398fee7f draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 61182ba790bdeeb98750403b869051ccad1a736c
author ecology
date Thu, 16 Jan 2025 15:51:30 +0000
parents e1b8dad192ee
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
1 {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
2 "cells": [
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
3 {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
4 "cell_type": "markdown",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
5 "metadata": {},
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
6 "source": [
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
7 "# Pivot wider Jupytool "
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
8 ]
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
9 },
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
10 {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
11 "cell_type": "markdown",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
12 "metadata": {},
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
13 "source": [
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
14 "This Jupyter notebook is dedicated to the pivot_wider function from the tidyr R package. \n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
15 "This script is the final part of the data preparation for the ecoregionalization Galaxy workflow. "
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
16 ]
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
17 },
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
18 {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
19 "cell_type": "code",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
20 "execution_count": 62,
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
21 "metadata": {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
22 "tags": []
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
23 },
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
24 "outputs": [],
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
25 "source": [
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
26 "#Date : 22/05/2024\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
27 "#Author : Seguineau Pauline & Yvan Le Bras \n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
28 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
29 "#Load libraries\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
30 "library(tidyr)\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
31 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
32 "#load file \n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
33 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
34 "input_path = \"galaxy_inputs\"\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
35 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
36 "for (dir in list.dirs(input_path)){\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
37 " for (file in list.files(dir)) {\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
38 " file_path = file.path(dir, file)}\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
39 "}\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
40 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
41 "file = read.table(file_path,header=T, sep = \"\\t\")\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
42 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
43 "#Run pivot_wider function\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
44 "pivot_file = pivot_wider(data = file,\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
45 " names_from = phylum_class_order_family_genus_specificEpithet,\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
46 " values_from = individualCount,\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
47 " values_fill = 0,\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
48 " values_fn = sum)\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
49 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
50 "#Replace all occurences >= 1 by 1 to have only presence (1) or absence (0) data\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
51 "for(c in 3:length(pivot_file)){\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
52 " pivot_file[c][pivot_file[c]>=1] <- 1}\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
53 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
54 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
55 "write.table(pivot_file, \"outputs/pivot_file.tabular\", sep = \"\\t\", quote = F, row.names = F)"
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
56 ]
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
57 },
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
58 {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
59 "cell_type": "markdown",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
60 "metadata": {},
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
61 "source": [
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
62 "In this Jupyter notebook, we used the pivot_wider function of the tidyr package to transform our data into a wider format and adapted to subsequent analyses as part of the Galaxy workflow for ecoregionalization. This transformation allowed us to convert our data to a format where each taxon becomes a separate column. We also took care to fill in the missing values with zeros and to sum the individual counts in case of duplications. Then all data >= 1 are replace by 1 to have only presence (1) or abscence (0) data.\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
63 "\n",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
64 "Thus, this notebook is an essential building block of our analysis pipeline, ensuring that the data is properly formatted and ready to be explored and interpreted for ecoregionalization studies."
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
65 ]
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
66 }
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
67 ],
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
68 "metadata": {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
69 "kernelspec": {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
70 "display_name": "R",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
71 "language": "R",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
72 "name": "ir"
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
73 },
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
74 "language_info": {
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
75 "codemirror_mode": "r",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
76 "file_extension": ".r",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
77 "mimetype": "text/x-r-source",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
78 "name": "R",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
79 "pygments_lexer": "r",
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
80 "version": "4.0.3"
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
81 }
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
82 },
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
83 "nbformat": 4,
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
84 "nbformat_minor": 4
e1b8dad192ee planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
85 }