from platform import python_version
print(python_version())3.8.11
Mohammad Belal
December 18, 2021
# Run the pip install command below if you don't already have the library
!pip install git+https://github.com/JustAnotherArchivist/snscrape.git
# Run the below command if you don't already have Pandas
# !pip install pandas
# Imports
import snscrape.modules.twitter as sntwitterCollecting git+https://github.com/JustAnotherArchivist/snscrape.git
Cloning https://github.com/JustAnotherArchivist/snscrape.git to c:\users\acer\appdata\local\temp\pip-req-build-b_3ucq_3
Resolved https://github.com/JustAnotherArchivist/snscrape.git to commit f9a3fafb3fe44986e81bb0f8b7024de1a2fa065d
Requirement already satisfied: requests[socks] in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from snscrape==0.3.5.dev121+gf9a3faf) (2.26.0)
Requirement already satisfied: lxml in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from snscrape==0.3.5.dev121+gf9a3faf) (4.6.3)
Requirement already satisfied: beautifulsoup4 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from snscrape==0.3.5.dev121+gf9a3faf) (4.9.3)
Requirement already satisfied: pytz in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from snscrape==0.3.5.dev121+gf9a3faf) (2021.1)
Requirement already satisfied: soupsieve>1.2 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from beautifulsoup4->snscrape==0.3.5.dev121+gf9a3faf) (2.2.1)
Requirement already satisfied: charset-normalizer~=2.0.0 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from requests[socks]->snscrape==0.3.5.dev121+gf9a3faf) (2.0.4)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from requests[socks]->snscrape==0.3.5.dev121+gf9a3faf) (1.26.6)
Requirement already satisfied: idna<4,>=2.5 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from requests[socks]->snscrape==0.3.5.dev121+gf9a3faf) (3.2)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from requests[socks]->snscrape==0.3.5.dev121+gf9a3faf) (2021.5.30)
Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from requests[socks]->snscrape==0.3.5.dev121+gf9a3faf) (1.7.1)
Running command git clone -q https://github.com/JustAnotherArchivist/snscrape.git 'C:\Users\Acer\AppData\Local\Temp\pip-req-build-b_3ucq_3'
Requirement already satisfied: pandas in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (1.3.1)
Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2017.3 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from pandas) (2021.1)
Requirement already satisfied: numpy>=1.17.3 in c:\users\acer\anaconda3\envs\py3x\lib\site-packages (from pandas) (1.21.1)
Requirement already satisfied: six>=1.5 in c:\users\acer\appdata\roaming\python\python38\site-packages (from python-dateutil>=2.7.3->pandas) (1.14.0)
Link to the Paper by Jashinsky
keywords=["suicidal","suicide","kill myself","my suicide note","end my life","never wake up","can't go on","not worth living",
"ready to jump","sleep forever","want to die","be dead","better off without me","better off dead","suicide plan",
"suicide pact","tired of living","don't want to be here","die alone","go to sleep forever"]
len(keywords)20
# Setting variables to be used below
maxTweets = 500
# Creating list to append tweet data to
tweets_list_january = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i in keywords:
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(f'{i} since:2021-01-01 until:2021-01-31').get_items()):
if i>maxTweets:
break
tweets_list_january.append([tweet.date, tweet.id, tweet.content, tweet.user.username,tweet.user.displayname,tweet.user.location])
# for i,tweet in enumerate(sntwitter.TwitterSearchScraper('ready to jump since:2021-01-01 until:2021-01-31').get_items()):
# if i>10:
# break
# # tweets_list2.append([tweet.date, tweet.id, tweet.content, tweet.user.username,tweet.user.location])
# print(tweet.user.username)tweets_df_january = pd.DataFrame(tweets_list_january, columns=['Datetime', 'Tweet Id', 'Text', 'Username','Displayname','Location'])
# Display first 5 entries from dataframe
len(tweets_df_january)
tweets_df_january.head()| Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|
| 0 | 2021-01-30 23:59:11+00:00 | 1355666901842751491 | Momiji wanted to commit suicide in OoO when Ri... | sgottadraw | Rere / Rei / Re | CHAOS! I CHOOSE YOU! |
| 1 | 2021-01-30 23:58:57+00:00 | 1355666844120612866 | IF YOU ACT SUICIDAL FOR ATTENTION YOURE WEIRD ASF | kalnzl | kal | im not inactive just lying low |
| 2 | 2021-01-30 23:58:53+00:00 | 1355666826773094406 | i feel so suicidal and i literally got home se... | cryshroom | ☹ | TW vent acc |
| 3 | 2021-01-30 23:57:36+00:00 | 1355666502955970561 | suicidal thoughts começaram demasiado cedo htt... | mariiasrt | maria | venus |
| 4 | 2021-01-30 23:57:20+00:00 | 1355666438653149187 | @CopingMAGA This tiktok triggered my suicidal ... | doyouknowdebobo | Haigher. |
# Setting variables to be used below
maxTweets = 500
# Creating list to append tweet data to
tweets_list_february = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i in keywords:
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(f'{i} since:2021-02-01 until:2021-02-28').get_items()):
if i>maxTweets:
break
tweets_list_february.append([tweet.date, tweet.id, tweet.content, tweet.user.username,tweet.user.displayname,tweet.user.location])tweets_df_february = pd.DataFrame(tweets_list_february, columns=['Datetime', 'Tweet Id', 'Text', 'Username','Displayname','Location'])
# Display first 5 entries from dataframe
tweets_df_february| Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|
| 0 | 2021-02-27 23:59:35+00:00 | 1365813864143675394 | me when i’m feeling suicidal again cool cool c... | J00NCALS | jj | they/them |
| 1 | 2021-02-27 23:58:26+00:00 | 1365813572811583490 | i feel so weird tonight, I feel so down and su... | mqddiesortiz | -em♥ | she/they | lesbain |
| 2 | 2021-02-27 23:58:02+00:00 | 1365813473955905537 | @mwanamutapaa @AlwaysAnyways @Danidollasss Hey... | luffylee | LONNIE DAVIS | Los Angeles, CA |
| 3 | 2021-02-27 23:57:35+00:00 | 1365813360961458184 | @dr_SDRK True! The government is imposing lots... | thebossoriginal | The Boss© 😎 🇺🇸 🇮🇳 | United States |
| 4 | 2021-02-27 23:57:32+00:00 | 1365813347778760711 | @BareLeft @mattzarb It's a case of falling for... | SirPaulHartley | Paul Hartley | |
| ... | ... | ... | ... | ... | ... | ... |
| 10015 | 2021-02-05 07:32:15+00:00 | 1357592861508857857 | @ChicagoFireGur1 Somebody’s still awake?! Go t... | forever_chicago | Natalia | |
| 10016 | 2021-02-05 07:20:47+00:00 | 1357589976985309186 | morning, im gonna go to sleep again\njust here... | ArekSucks | Arek🌈 | any pronouns |
| 10017 | 2021-02-05 04:47:55+00:00 | 1357551506044559361 | "Have you ever buried your nose in a mountain ... | DBDAN61 | Dan | Los Angeles, California |
| 10018 | 2021-02-05 04:34:51+00:00 | 1357548215214743553 | @cestlavie9090 @loridyanne @BTS_twt Before I g... | LorettaB1962 | Loretta B 💜 aka Lovely Army | |
| 10019 | 2021-02-05 03:37:05+00:00 | 1357533679237660672 | Just needed my babies to go to sleep but I’m j... | CrazyCocoMama | Coco |
10020 rows × 6 columns
# Setting variables to be used below
maxTweets = 500
# Creating list to append tweet data to
tweets_list_march = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for j in keywords:
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(f'{i} since:2021-03-01 until:2021-03-30').get_items()):
if i>maxTweets:
break
tweets_list_march.append([tweet.date, tweet.id, tweet.content, tweet.user.username,tweet.user.displayname,tweet.user.location])tweets_df_march = pd.DataFrame(tweets_list_march, columns=['Datetime', 'Tweet Id', 'Text', 'Username','Displayname','Location'])
# Display first 5 entries from dataframe
tweets_df_march| Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|
| 0 | 2021-03-29 23:59:30+00:00 | 1376685479303606273 | RT>おう、、切ない…\nおいしいご飯が食べれますように🙏🏻 | tanuki_501 | 豆だぬき🐾 | イラスト⇨ |
| 1 | 2021-03-29 23:59:29+00:00 | 1376685476380205056 | @echinaceakatze @cactusy_501 你講得好好呀 欣賞你\n原諒到父母... | funfun96904935 | Giselle | |
| 2 | 2021-03-29 23:58:16+00:00 | 1376685167868137472 | 無料100連SSR2とスーパームック\nベリアルはもう4凸あるからただのはずれじゃ\nスーパ... | ARIA_501 | アリア | |
| 3 | 2021-03-29 23:58:04+00:00 | 1376685119386357760 | 🛑❗️ ¡ATENCIÓN! ¡APERTURAMOS DOS SEDES EN LIMA... | GumaroRodriguez | Instituto Investigación Científica Mundo ININCIM | Arequipa, Perú |
| 4 | 2021-03-29 23:57:35+00:00 | 1376684997155823618 | @blackmailqqueen 501 | queens_property | KiannahsDonkey | Goddess Kiannahs Cage |
| ... | ... | ... | ... | ... | ... | ... |
| 10015 | 2021-03-29 20:30:04+00:00 | 1376632775198052355 | @k7L 501 | naad12399 | Nada | اتحاد جدة |
| 10016 | 2021-03-29 20:30:00+00:00 | 1376632756050874368 | #ContadorCotejo Según voceros oficiales duran... | cotejoinfo | Cotejo | Venezuela |
| 10017 | 2021-03-29 20:29:56+00:00 | 1376632740070711296 | @k7L 501 | soffe153 | سبحانك ربي | |
| 10018 | 2021-03-29 20:29:47+00:00 | 1376632704100360193 | @k7L 501 | jaa99955 | jm90 | |
| 10019 | 2021-03-29 20:29:43+00:00 | 1376632685154742282 | @k7L ٥٠١ | fatom1414fat | فاطمه💙🤍💙🤍💙🇸🇦🇸🇦 | المملكة العربية السعودية |
10020 rows × 6 columns
# Setting variables to be used below
maxTweets = 500
# Creating list to append tweet data to
tweets_list_april = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i in keywords:
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(f'{i} since:2021-04-01 until:2021-04-30').get_items()):
if i>maxTweets:
break
tweets_list_april.append([tweet.date, tweet.id, tweet.content, tweet.user.username,tweet.user.displayname,tweet.user.location])tweets_df_april = pd.DataFrame(tweets_list_april, columns=['Datetime', 'Tweet Id', 'Text', 'Username','Displayname','Location'])
# Display first 5 entries from dataframe
tweets_df_april.head()| Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|
| 0 | 2021-04-29 23:59:48+00:00 | 1387919578454495236 | @Malcolm_theCat @chewbaaarker I’ve been slande... | mand_rainbow | Amanda | England |
| 1 | 2021-04-29 23:59:38+00:00 | 1387919536322752512 | what if i was suicidal what if he was my 13th ... | gothhyuka | shinji from among us📮📚semi ia for school | kai/yu/kaiyu, 15 , ar53 NA |
| 2 | 2021-04-29 23:59:19+00:00 | 1387919456072986625 | // suicide ment\n\nghostbur is gone. \nwhat if... | honeymoobloom | glitch | she/her | 17 | bri simp !! |
| 3 | 2021-04-29 23:59:05+00:00 | 1387919399529574401 | i was EVEN MORE mentally unstable at the time ... | vivaIapony | twink-182 𓆗↯⁜(the bilvy half of treckett) | seattle•18•⚣•some nsfw+drugs |
| 4 | 2021-04-29 23:56:43+00:00 | 1387918800738205701 | @jef_999 the sad part is after staying away fo... | poundsterlingss | Baby🥺Girl💜; Ugo✨🇬🇧 | Lagos, Nigeria |
# Setting variables to be used below
maxTweets = 500
# Creating list to append tweet data to
tweets_list_may = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i in keywords:
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(f'{i} since:2021-05-01 until:2021-05-31').get_items()):
if i>maxTweets:
break
tweets_list_may.append([tweet.date, tweet.id, tweet.content, tweet.user.username,tweet.user.displayname,tweet.user.location])tweets_df_may = pd.DataFrame(tweets_list_may, columns=['Datetime', 'Tweet Id', 'Text', 'Username','Displayname','Location'])
# Display first 5 entries from dataframe
tweets_df_may.head()| Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|
| 0 | 2021-05-30 23:59:10+00:00 | 1399153444217364502 | CW: SUICIDAL//SOFT GORE//WORMS//TEETH\nJust a ... | fizzysodapop | swagcorez @ working on characters for a manga | |
| 1 | 2021-05-30 23:59:08+00:00 | 1399153432980754437 | It Kinda makes the Magical creatures in #Mirac... | Red_Rescue | Sebastian Colt | The Internet |
| 2 | 2021-05-30 23:56:28+00:00 | 1399152762244546560 | @mchaelortz I tried to explain being passively... | antibhadrata | monsieur 🤬🤬🤬 | kweens, occupied canarsie land |
| 3 | 2021-05-30 23:56:00+00:00 | 1399152646699761666 | @astericias what if I was suicidal??? /j | clemenqt | echo || ✈️ | he they it +pinned |
| 4 | 2021-05-30 23:55:59+00:00 | 1399152643260493828 | @meuriiiiiiine @Rebecca18158220 @SparkleMeghan... | lizziehaque | lizzie haque art | derbyshire |
| Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|
| 0 | 2021-01-30 23:59:11+00:00 | 1355666901842751491 | Momiji wanted to commit suicide in OoO when Ri... | sgottadraw | Rere / Rei / Re | CHAOS! I CHOOSE YOU! |
| 1 | 2021-01-30 23:58:57+00:00 | 1355666844120612866 | IF YOU ACT SUICIDAL FOR ATTENTION YOURE WEIRD ASF | kalnzl | kal | im not inactive just lying low |
| 2 | 2021-01-30 23:58:53+00:00 | 1355666826773094406 | i feel so suicidal and i literally got home se... | cryshroom | ☹ | TW vent acc |
| 3 | 2021-01-30 23:57:36+00:00 | 1355666502955970561 | suicidal thoughts começaram demasiado cedo htt... | mariiasrt | maria | venus |
| 4 | 2021-01-30 23:57:20+00:00 | 1355666438653149187 | @CopingMAGA This tiktok triggered my suicidal ... | doyouknowdebobo | Haigher. | |
| ... | ... | ... | ... | ... | ... | ... |
| 10015 | 2021-05-08 05:57:56+00:00 | 1390908809246355464 | Hm. I read word s,, then. I go to sleep foreve... | wonkeydonkey69 | dinkey donkey | New York, USA |
| 10016 | 2021-05-08 05:42:05+00:00 | 1390904820064677888 | Im just 8 pills away to go to sleep forever | _ventiice_ | nenekkaubuatfrontflip | |
| 10017 | 2021-05-08 05:20:50+00:00 | 1390899473870643202 | Doing TV reminds me of being a racer. It use t... | MW55 | Michael Waltrip | |
| 10018 | 2021-05-08 05:13:51+00:00 | 1390897714959986692 | @SerithDusson This magic wand is so special it... | Gokuswitch2 | gswitch #Macemob | United States |
| 10019 | 2021-05-08 04:38:50+00:00 | 1390888903163400199 | Sis took forever to go to Sleep tonight 😩 | _LOVEJasmine | Angel of mine ❣️ | miami | 🇧🇸 |
50100 rows × 6 columns
| index | Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|---|
| 0 | 0 | 2021-01-30 23:59:11+00:00 | 1355666901842751491 | Momiji wanted to commit suicide in OoO when Ri... | sgottadraw | Rere / Rei / Re | CHAOS! I CHOOSE YOU! |
| 1 | 1 | 2021-01-30 23:58:57+00:00 | 1355666844120612866 | IF YOU ACT SUICIDAL FOR ATTENTION YOURE WEIRD ASF | kalnzl | kal | im not inactive just lying low |
| 2 | 2 | 2021-01-30 23:58:53+00:00 | 1355666826773094406 | i feel so suicidal and i literally got home se... | cryshroom | ☹ | TW vent acc |
| 3 | 3 | 2021-01-30 23:57:36+00:00 | 1355666502955970561 | suicidal thoughts começaram demasiado cedo htt... | mariiasrt | maria | venus |
| 4 | 4 | 2021-01-30 23:57:20+00:00 | 1355666438653149187 | @CopingMAGA This tiktok triggered my suicidal ... | doyouknowdebobo | Haigher. | |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 50095 | 10015 | 2021-05-08 05:57:56+00:00 | 1390908809246355464 | Hm. I read word s,, then. I go to sleep foreve... | wonkeydonkey69 | dinkey donkey | New York, USA |
| 50096 | 10016 | 2021-05-08 05:42:05+00:00 | 1390904820064677888 | Im just 8 pills away to go to sleep forever | _ventiice_ | nenekkaubuatfrontflip | |
| 50097 | 10017 | 2021-05-08 05:20:50+00:00 | 1390899473870643202 | Doing TV reminds me of being a racer. It use t... | MW55 | Michael Waltrip | |
| 50098 | 10018 | 2021-05-08 05:13:51+00:00 | 1390897714959986692 | @SerithDusson This magic wand is so special it... | Gokuswitch2 | gswitch #Macemob | United States |
| 50099 | 10019 | 2021-05-08 04:38:50+00:00 | 1390888903163400199 | Sis took forever to go to Sleep tonight 😩 | _LOVEJasmine | Angel of mine ❣️ | miami | 🇧🇸 |
50100 rows × 7 columns
| Unnamed: 0 | index | Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 2021-01-30 23:59:11+00:00 | 1355666901842751491 | Momiji wanted to commit suicide in OoO when Ri... | sgottadraw | Rere / Rei / Re | CHAOS! I CHOOSE YOU! |
| 1 | 1 | 1 | 2021-01-30 23:58:57+00:00 | 1355666844120612866 | IF YOU ACT SUICIDAL FOR ATTENTION YOURE WEIRD ASF | kalnzl | kal | im not inactive just lying low |
| 2 | 2 | 2 | 2021-01-30 23:58:53+00:00 | 1355666826773094406 | i feel so suicidal and i literally got home se... | cryshroom | ☹ | TW vent acc |
| 3 | 3 | 3 | 2021-01-30 23:57:36+00:00 | 1355666502955970561 | suicidal thoughts começaram demasiado cedo htt... | mariiasrt | maria | venus |
| 10 | 10 | 10 | 2021-01-30 23:52:49+00:00 | 1355665301615112193 | Suicidal thoughts...✨but outdoors✨ | lemurss | Ana Hernandez | Austin |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 50092 | 50092 | 10012 | 2021-05-08 09:00:15+00:00 | 1390954688435130370 | Again, life is so much fun that when I go to s... | Nath16S | Arvoton | Finland |
| 50094 | 50094 | 10014 | 2021-05-08 07:22:58+00:00 | 1390930207545217024 | Let me go to sleep and forget about the pass f... | Sadittynayy | sooneysosaditty | Little Rock, AR |
| 50095 | 50095 | 10015 | 2021-05-08 05:57:56+00:00 | 1390908809246355464 | Hm. I read word s,, then. I go to sleep foreve... | wonkeydonkey69 | dinkey donkey | New York, USA |
| 50098 | 50098 | 10018 | 2021-05-08 05:13:51+00:00 | 1390897714959986692 | @SerithDusson This magic wand is so special it... | Gokuswitch2 | gswitch #Macemob | United States |
| 50099 | 50099 | 10019 | 2021-05-08 04:38:50+00:00 | 1390888903163400199 | Sis took forever to go to Sleep tonight 😩 | _LOVEJasmine | Angel of mine ❣️ | miami | 🇧🇸 |
34486 rows × 8 columns
31 @krystle
56 Mr.Jones🐿🖤
86 devan. ⚘
107 ☢ Newclear Radio™
120 Shelly Quimby, M.Ed🌻🌸📚🐾 🇺🇸🏴
177 Some Random Old Chick
198 🐉Karri🐉
212 Chubby Spinster
224 quotur
243 (self-proclaimed CEO of Shiori) WeebMeme
247 Jacquelyn
248 Plalism Son
301 Maestro Powell
303 hallee
304 Onikastoejam
312 daeba
438 Donald Frederick
442 Zuleika Lee
446 Zuleika Lee
447 Zuleika Lee
Name: Displayname, dtype: object
import re
df4 = df3
#removing punctuation and emojis
df4['Displayname']=df3['Displayname'].str.replace('[^\w\s#@/:%.,_-]', '', flags=re.UNICODE)
df4FutureWarning: The default value of regex will change from True to False in a future version.
df4['Displayname']=df3['Displayname'].str.replace('[^\w\s#@/:%.,_-]', '', flags=re.UNICODE)
<ipython-input-18-88b2a02079c3>:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df4['Displayname']=df3['Displayname'].str.replace('[^\w\s#@/:%.,_-]', '', flags=re.UNICODE)
| Unnamed: 0 | index | Datetime | Tweet Id | Text | Username | Displayname | Location | |
|---|---|---|---|---|---|---|---|---|
| 31 | 31 | 31 | 2021-01-30 23:41:34+00:00 | 1355662468660518914 | @LindaWesson I think that some depression and ... | krystlelyte | @krystle | United States |
| 56 | 56 | 56 | 2021-01-30 23:28:15+00:00 | 1355659117847846914 | No suicidal shit I want everybody to wear red ... | ChuckDaHooper01 | Mr.Jones | United States |
| 86 | 86 | 86 | 2021-01-30 23:07:52+00:00 | 1355653989963018242 | I am 28 today. A strange milestone for someone... | devvydarling | devan. | Georgia, USA |
| 107 | 107 | 107 | 2021-01-30 22:53:07+00:00 | 1355650278763229185 | Suicidal Tendencies - The Struggle Is Real\n☢ ... | NewclearRadioUS | Newclear Radio | USA / WORLD WIDE |
| 120 | 120 | 120 | 2021-01-30 22:46:19+00:00 | 1355648567357136901 | @WoodlanderPaul Will send ALL the pheasants yo... | ShellyQuimby1 | Shelly Quimby, M.Ed | North Dakota, USA |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 50042 | 50042 | 9962 | 2021-05-10 17:17:34+00:00 | 1391804621040099331 | I just wanna cuddle my kitty and go to sleep f... | chunnytabbit | chey taylor | Oklahoma, USA |
| 50047 | 50047 | 9967 | 2021-05-10 10:19:44+00:00 | 1391699469834629122 | When I need to go to sleep I need my rest ASAP... | LatashaBrie | Mac Checka | Wisconsin, USA |
| 50048 | 50048 | 9968 | 2021-05-10 09:49:35+00:00 | 1391691880749060096 | My youngest sister is playing Can We Kiss Fore... | yehsicca | 𝕵𝖊𝖘𝖘𝖎𝖈𝖆 | Texas, USA |
| 50095 | 50095 | 10015 | 2021-05-08 05:57:56+00:00 | 1390908809246355464 | Hm. I read word s,, then. I go to sleep foreve... | wonkeydonkey69 | dinkey donkey | New York, USA |
| 50098 | 50098 | 10018 | 2021-05-08 05:13:51+00:00 | 1390897714959986692 | @SerithDusson This magic wand is so special it... | Gokuswitch2 | gswitch #Macemob | United States |
2224 rows × 8 columns
# df4[['first_name','last_name']] = df4['Displayname'].loc[df4['Displayname'].str.split().str.len() == 2].str.split(expand=True)
df4['first'] = df4['Displayname'].str.split(' ').str[0]SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df4['first'] = df4['Displayname'].str.split(' ').str[0]
for i in range(2001,2224):
j=df4.loc[i,"first"]
print(i)
rs=requests.get(f"https://api.genderize.io/?name={j}")
rs2=rs.json()
df4.loc[i,"Gender"]=rs2["gender"]2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223