annotate test-data/annotemp/pivot_wider_jupytool_notebook.ipynb @ 0:e067fe0084c5 draft

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
author ecology
date Fri, 27 Sep 2024 12:56:53 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
1 {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
2 "cells": [
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
3 {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
4 "cell_type": "markdown",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
5 "metadata": {},
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
6 "source": [
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
7 "# Pivot wider Jupytool "
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
8 ]
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
9 },
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
10 {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
11 "cell_type": "markdown",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
12 "metadata": {},
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
13 "source": [
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
14 "This Jupyter notebook is dedicated to the pivot_wider function from the tidyr R package. \n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
15 "This script is the final part of the data preparation for the ecoregionalization Galaxy workflow. "
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
16 ]
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
17 },
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
18 {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
19 "cell_type": "code",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
20 "execution_count": 62,
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
21 "metadata": {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
22 "tags": []
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
23 },
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
24 "outputs": [],
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
25 "source": [
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
26 "#Date : 22/05/2024\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
27 "#Author : Seguineau Pauline & Yvan Le Bras \n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
28 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
29 "#Load libraries\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
30 "library(tidyr)\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
31 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
32 "#load file \n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
33 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
34 "input_path = \"galaxy_inputs\"\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
35 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
36 "for (dir in list.dirs(input_path)){\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
37 " for (file in list.files(dir)) {\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
38 " file_path = file.path(dir, file)}\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
39 "}\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
40 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
41 "file = read.table(file_path,header=T, sep = \"\\t\")\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
42 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
43 "#Run pivot_wider function\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
44 "pivot_file = pivot_wider(data = file,\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
45 " names_from = phylum_class_order_family_genus_specificEpithet,\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
46 " values_from = individualCount,\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
47 " values_fill = 0,\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
48 " values_fn = sum)\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
49 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
50 "#Replace all occurences >= 1 by 1 to have only presence (1) or absence (0) data\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
51 "for(c in 3:length(pivot_file)){\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
52 " pivot_file[c][pivot_file[c]>=1] <- 1}\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
53 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
54 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
55 "write.table(pivot_file, \"outputs/pivot_file.tabular\", sep = \"\\t\", quote = F, row.names = F)"
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
56 ]
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
57 },
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
58 {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
59 "cell_type": "markdown",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
60 "metadata": {},
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
61 "source": [
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
62 "In this Jupyter notebook, we used the pivot_wider function of the tidyr package to transform our data into a wider format and adapted to subsequent analyses as part of the Galaxy workflow for ecoregionalization. This transformation allowed us to convert our data to a format where each taxon becomes a separate column. We also took care to fill in the missing values with zeros and to sum the individual counts in case of duplications. Then all data >= 1 are replace by 1 to have only presence (1) or abscence (0) data.\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
63 "\n",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
64 "Thus, this notebook is an essential building block of our analysis pipeline, ensuring that the data is properly formatted and ready to be explored and interpreted for ecoregionalization studies."
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
65 ]
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
66 }
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
67 ],
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
68 "metadata": {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
69 "kernelspec": {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
70 "display_name": "R",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
71 "language": "R",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
72 "name": "ir"
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
73 },
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
74 "language_info": {
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
75 "codemirror_mode": "r",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
76 "file_extension": ".r",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
77 "mimetype": "text/x-r-source",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
78 "name": "R",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
79 "pygments_lexer": "r",
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
80 "version": "4.0.3"
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
81 }
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
82 },
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
83 "nbformat": 4,
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
84 "nbformat_minor": 4
e067fe0084c5 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
85 }