derek-thomas
commited on
Commit
·
1756d68
1
Parent(s):
b5d7dd6
Fixing imports
Browse files- main.py +0 -1
- utilities/user_defined_functions.py +17 -1
main.py
CHANGED
@@ -23,7 +23,6 @@ if frequency not in ["daily", "hourly"]:
|
|
23 |
|
24 |
# Authenticate with Hugging Face using an auth token
|
25 |
auth_token = os.environ["HUGGINGFACE_AUTH_TOKEN"]
|
26 |
-
login(auth_token, add_to_git_credential=True)
|
27 |
|
28 |
logger = setup_logger(__name__)
|
29 |
|
|
|
23 |
|
24 |
# Authenticate with Hugging Face using an auth token
|
25 |
auth_token = os.environ["HUGGINGFACE_AUTH_TOKEN"]
|
|
|
26 |
|
27 |
logger = setup_logger(__name__)
|
28 |
|
utilities/user_defined_functions.py
CHANGED
@@ -1,13 +1,29 @@
|
|
|
|
1 |
from datetime import datetime
|
2 |
|
3 |
import pandas as pd
|
4 |
from datasets import Dataset, DatasetDict, load_dataset
|
|
|
5 |
|
6 |
-
from main import auth_token, dataset_name, logger
|
7 |
from utilities.data_processing import data_processing
|
|
|
8 |
from utilities.praw_downloader import praw_downloader
|
9 |
from utilities.praw_processor import preprocess_praw_data
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Dummy row for when we create a new repo
|
12 |
dummy_data = {
|
13 |
"id": ['id'],
|
|
|
1 |
+
import os
|
2 |
from datetime import datetime
|
3 |
|
4 |
import pandas as pd
|
5 |
from datasets import Dataset, DatasetDict, load_dataset
|
6 |
+
from huggingface_hub import login
|
7 |
|
|
|
8 |
from utilities.data_processing import data_processing
|
9 |
+
from utilities.my_logger import setup_logger
|
10 |
from utilities.praw_downloader import praw_downloader
|
11 |
from utilities.praw_processor import preprocess_praw_data
|
12 |
|
13 |
+
# Set dataset name, path to README.md, and existing dataset details
|
14 |
+
subreddit = os.environ["SUBREDDIT"]
|
15 |
+
username = os.environ["USERNAME"]
|
16 |
+
dataset_name = f"{username}/dataset-creator-reddit-{subreddit}"
|
17 |
+
|
18 |
+
frequency = os.environ.get("FREQUENCY", '').lower()
|
19 |
+
if frequency not in ["daily", "hourly"]:
|
20 |
+
raise ValueError("FREQUENCY environment variable must be 'daily' or 'hourly'")
|
21 |
+
|
22 |
+
# Authenticate with Hugging Face using an auth token
|
23 |
+
auth_token = os.environ["HUGGINGFACE_AUTH_TOKEN"]
|
24 |
+
login(auth_token, add_to_git_credential=True)
|
25 |
+
|
26 |
+
logger = setup_logger(__name__)
|
27 |
# Dummy row for when we create a new repo
|
28 |
dummy_data = {
|
29 |
"id": ['id'],
|