CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

| Download

Sample Template for Wix Embedding

Views: 46
License: MIT
Image: ubuntu2204
Kernel: Python 3 (system-wide)

Data Collection via GET request from SpaceX API

# importing libraries import requests import pandas as pd import numpy as np import datetime

Defining functions that use the API to extract the specfic columns we need using the launch data's unique identification number

# For Booster Version def gBV(data): for x in data['rocket']: if x: response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json() BoosterVersion.append(response['name'])
# For Launch Site, Longitude, Latitude def gLS(data): for x in data ['launchpad']: if x: response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json() Longitude.append(response['longitude']) Latitude.append(response['latitude']) LaunchSite.append(response['name'])
# For Payload mass_kg, orbit def gpd(data): for load in data['payloads']: if load: response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json() PayloadMass.append(response['mass_kg']) Orbit.append(response['orbit'])
# For the cores column of the dataset def gcd(data): for core in data['cores']: if core['core'] != None: response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json() Block.append(response['block']) ReusedCount.append(response['reuse_count']) Serial.append(response['serial']) else: Block.append(None) ReusedCount.append(None) Serial.append(None) Outcome.append(str(core['landing_success'])+' '+str(core['landing_type'])) Flights.append(core['flight']) GridFins.append(core['gridfins']) Reused.append(core['reused']) Legs.append(core['legs']) LandingPad.append(core['landpad'])
# Requesting data from SpaceX API spacex_url="https://api.spacexdata.com/v4/launches/past" response = requests.get(spacex_url)
pd.set_option('display.max_columns', None) pd.set_option('display.max_colwidth', None) data=pd.json_normalize(response.json()) data.head()
# Using the API to get information on launches using the ID's for each launch data = data[['rocket','payloads','launchpad','cores','flight_number','date_utc']] #Filtering out rows with multiple cores - indicating Falcon2 rockets data = data[data['cores'].map(len)==1] data = data[data['payloads'].map(len)==1] # Both payload and cores are lists of size 1 we take the single value and replace the current feature with the result data['cores'] = data['cores'].map(lambda x : x[0]) data['payloads'] = data['payloads'].map(lambda x : x[0]) # Converting the date_utc to datetime, then restricting our launches by the new date data['date'] = pd.to_datetime(data['date_utc']).dt.date data = data[data['date'] <= datetime.date(2020, 11, 13)]
# Creating lists to store the data from our requests BoosterVersion = [] PayloadMass = [] Orbit = [] LaunchSite = [] Outcome = [] Flights = [] GridFins = [] Reused = [] Legs = [] LandingPad = [] Block = [] ReusedCount = [] Serial = [] Longitude = [] Latitude = []
# Calling our defined functions to fill the lists with data gBV(data) gLS(data) gpd(data) gcd(data)
# Constructing a dictionary from our lists that now have the our requested data launch_dct = {'FlightNumber': list(data['flight_number']), 'Date': list(data['date']), 'BoosterVersion':BoosterVersion, 'PayloadMass':PayloadMass, 'Orbit':Orbit, 'LaunchSite':LaunchSite, 'Outcome':Outcome, 'Flights':Flights, 'GridFins':GridFins, 'Reused':Reused, 'Legs':Legs, 'LandingPad':LandingPad, 'Block':Block, 'ReusedCount':ReusedCount, 'Serial':Serial, 'Longitude': Longitude, 'Latitude': Latitude} # Creating a pandas df from our newly created dictionary df=pd.DataFrame(launch_dct) df.head()
# Filtering out Falcon 1 Launches by making a new dataframe f9df = df[df['BoosterVersion']!= 'Falcon 1'] f9df.loc[:,'FlightNumber'] = list(range(1, f9df.shape[0]+1)) f9df.head()
# Downloading our dataframe to a CSV file f9df.to_csv('dataset_p1_API.csv', index=False)