home..

AI 활용

September 2024 (3502 Words, 20 Minutes)

시스템 목적 및 역할 (자세한 분석)

이 AI는 강원도 지역의 날씨와 기후 특성을 기반으로 최적의 야외 활동을 추천하는 지능형 시스템이다.

기상 조건과 지리적 특성을 종합적으로 고려하여 최적의 야외 활동을 제안하는 지능형 추천 시스템으로, 사용자의 실제 활동 기록을 통해 지속적으로 학습하고 발전하는 구조로 설계되었다.

주요 기능은 다음과 같다:

활동 추천 메커니즘 /api/v1/predict 엔드포인트를 통해 구현되어 있다. 사용자가 도시와 날짜를 입력하면, 해당 조건에 가장 적합한 활동 목록을 반환한다.

@app.route(f'/api/{API_VERSION}/predict', methods=['POST'])
def predict_v1():
    # 데이터 유효성 검증
    city = preprocess_location_name(data['city'])
    date = data['date']

    # 날씨 데이터 가져오기
    weather = weather_forecast[city][date]

    # 모델을 통한 활동 추천
    recommended_activities = recommend_activity(model, weather, climate_data[city],
                                               weather_scaler, climate_scaler, activity_data)

    # 결과 반환
    response = {
        'city': city,
        'date': date,
        'weather': weather,
        'recommended_activities': recommended_activities
    }

내부적으로 recommend_activity 함수는 날씨와 기후 데이터를 모델에 입력하여 각 활동의 적합도 점수를 계산한다:

def recommend_activity(model, weather, climate, weather_scaler, climate_scaler, activity_data):
    # 날씨 데이터 인코딩 및 정규화
    weather_input = prepare_weather_input(weather)
    weather_normalized = weather_scaler.transform(weather_input)

    # 기후 데이터 정규화
    climate_input = prepare_climate_input(climate)
    climate_normalized = climate_scaler.transform(climate_input)

    # 텐서 변환
    weather_tensor = torch.tensor(weather_normalized, dtype=torch.float32)
    climate_tensor = torch.tensor(climate_normalized, dtype=torch.float32)

    # 모델 예측
    with torch.no_grad():
        output = model(weather_tensor, climate_tensor)
        scores = output[0].numpy()

    # 결과 형식화 및 정렬
    recommended_activities = [
        {'activity': activity['name'], 'score': float(scores[i])}
        for i, activity in enumerate(activity_data)
    ]
    recommended_activities.sort(key=lambda x: x['score'], reverse=True)

    return recommended_activities

장소 추천 메커니즘

사용자가 특정 활동을 선택했을 때, 그 활동에 가장 적합한 지역을 추천한다:

def recommend_locations_for_activity(model, activity, climate_data, weather_forecast,
                                    weather_scaler, climate_scaler, activity_data):
    # 활동 인덱스 찾기
    activity_index = next((i for i, a in enumerate(activity_data) if a['name'] == activity), None)

    locations_with_scores = []
    # 모든 지역에 대해 점수 계산
    for location, climate in climate_data.items():
        # 첫 번째 날짜의 날씨 데이터 사용
        weather = weather_forecast[location][list(weather_forecast[location].keys())[0]]

        # 입력 데이터 준비 및 정규화
        weather_input = prepare_weather_input(weather)
        climate_input = prepare_climate_input(climate)
        weather_normalized = weather_scaler.transform(weather_input)
        climate_normalized = climate_scaler.transform(climate_input)

        # 텐서 변환
        weather_tensor = torch.tensor(weather_normalized, dtype=torch.float32)
        climate_tensor = torch.tensor(climate_normalized, dtype=torch.float32)

        # 특정 활동에 대한 점수 계산
        with torch.no_grad():
            output = model(weather_tensor, climate_tensor)
            score = output[0][activity_index].item()

        locations_with_scores.append((location, float(score)))

    # 점수 기준 내림차순 정렬
    locations_with_scores.sort(key=lambda x: x[1], reverse=True)

    return locations_with_scores

이 함수는 /api/v1/recommend/by_activity 엔드포인트에서 사용되며, 각 지역의 기후와 현재 날씨를 고려하여 점수를 매긴다.

날짜 추천 메커니즘

특정 활동과 지역이 주어졌을 때, 향후 날짜 중 가장 적합한 날짜를 추천한다

def recommend_dates_for_activity_and_location(model, activity, location, climate_data,
                                             weather_forecast, weather_scaler, climate_scaler,
                                             activity_data):
    # 기본 파라미터 검증 및 설정
    activity_index = next((i for i, a in enumerate(activity_data) if a['name'] == activity), None)
    location = preprocess_location_name(location)
    climate = climate_data[location]

    dates_with_scores = []
    # 해당 지역의 모든 날짜별 날씨에 대해 점수 계산
    for date, weather in weather_forecast[location].items():
        # 입력 데이터 준비
        weather_input = prepare_weather_input(weather)
        climate_input = prepare_climate_input(climate)

        # 데이터 정규화
        weather_normalized = weather_scaler.transform(weather_input)
        climate_normalized = climate_scaler.transform(climate_input)

        # 예측 및 점수 계산
        weather_tensor = torch.tensor(weather_normalized, dtype=torch.float32)
        climate_tensor = torch.tensor(climate_normalized, dtype=torch.float32)

        with torch.no_grad():
            output = model(weather_tensor, climate_tensor)
            score = output[0][activity_index].item()

        dates_with_scores.append((date, float(score)))

    # 점수 기준 내림차순 정렬
    dates_with_scores.sort(key=lambda x: x[1], reverse=True)

    # 상위 10개 날짜 반환
    return dates_with_scores[:10]
    ```

이 기능은 /api/v1/recommend/by_activity_and_location 엔드포인트에서 제공된다.

1. 딥려닝 모델 구조

ActivityRecommendationModel 클래스는 날씨와 기후 데이터를 동시에 처리하는 멀티 인풋 신경망이다

class ActivityRecommendationModel(nn.Module):
    def __init__(self, weather_input_size, climate_input_size, hidden_size, num_activities):
        super(ActivityRecommendationModel, self).__init__()
        # 날씨 데이터 처리 레이어
        self.weather_fc = nn.Linear(weather_input_size, hidden_size)
        # 기후 데이터 처리 레이어
        self.climate_fc = nn.Linear(climate_input_size, hidden_size)
        # 통합 처리 레이어
        self.fc1 = nn.Linear(hidden_size * 2, hidden_size)
        # 출력 레이어 (각 활동에 대한 점수)
        self.fc2 = nn.Linear(hidden_size, num_activities)
        # 활성화 함수와 드롭아웃
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, weather, climate):
        # 날씨 특성 추출
        weather_features = self.relu(self.weather_fc(weather))
        # 기후 특성 추출
        climate_features = self.relu(self.climate_fc(climate))
        # 특성 결합
        combined = torch.cat((weather_features, climate_features), dim=1)
        # 최종 처리
        x = self.relu(self.fc1(combined))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

모델 입력:
- 날씨 데이터: 온도(최저/최고), 습도, 풍속, 하늘 상태(원-핫), 강수 유형(원-핫)
- 기후 데이터: 겨울/여름 평균 온도, 연간/계절별 강수량, 눈/안개 일수
모델 출력:
- 30개 야외 활동에 대한 적합도 점수

2. 데이터 전처리 세부 프로세스

def preprocess_data(climate_data, activity_data):
    X_activity = []  # 날씨 특성
    X_climate = []   # 기후 특성
    y = []           # 활동 라벨

    # 범주형 변수 인코딩 맵
    sky_status_map = {'맑음': [1, 0, 0], '구름 많음': [0, 1, 0], '흐림': [0, 0, 1]}
    precip_type_map = {'없음': [1, 0, 0, 0, 0], '비': [0, 1, 0, 0, 0],
                       '비/눈': [0, 0, 1, 0, 0], '눈': [0, 0, 0, 1, 0],
                       '소나기': [0, 0, 0, 0, 1]}

    # 모든 활동과 지역 조합에 대한 훈련 데이터 생성
    for activity in activity_data:
        for city, climate in climate_data.items():
            # 원-핫 인코딩 적용
            sky_status_encoded = sky_status_map[activity['sky_status']]
            precip_type_encoded = precip_type_map[activity['precipitation']]

            # 날씨 특성 벡터 구성
            X_activity.append([
                float(activity['temperature_min']),
                float(activity['temperature_max']),
                float(activity['humidity']),
                float(activity['wind_speed']),
                *sky_status_encoded,
                *precip_type_encoded
            ])

            # 기후 특성 벡터 구성
            X_climate.append([
                climate['winter_temp'],
                climate['summer_temp'],
                climate['annual_precipitation'],
                climate['winter_precipitation'],
                climate['summer_precipitation'],
                climate['snow_days'],
                climate['foggy_days']
            ])

            # 활동 인덱스 (타겟)
            y.append(activity_data.index(activity))

    # 특성 정규화
    weather_scaler = StandardScaler()
    climate_scaler = StandardScaler()
    X_activity_normalized = weather_scaler.fit_transform(X_activity)
    X_climate_normalized = climate_scaler.fit_transform(X_climate)

    return (torch.tensor(X_activity_normalized, dtype=torch.float32),
            torch.tensor(X_climate_normalized, dtype=torch.float32),
            torch.tensor(y, dtype=torch.long),
            weather_scaler, climate_scaler)

지역명과 활동명에 대한 추가 전처리:

def preprocess_location_name(name):
    # '시', '군', '구' 제거
    return re.sub(r'(시|군|구)$', '', name)

def preprocess_activity_name(name):
    # 공백 제거
    return ''.join(name.split())

3. 모델 학습 과정 상세

모델 학습은 클래스 불균형을 고려한 가중치를 적용하여 진행된다:

def train_model(X_weather, X_climate, y, num_epochs=100, learning_rate=0.001):
    # 훈련/검증 데이터 분할
    X_train, X_val, y_train, y_val = train_test_split(X_weather, y, test_size=0.2, random_state=42)
    climate_train, climate_val, _, _ = train_test_split(X_climate, y, test_size=0.2, random_state=42)

    # 모델 초기화
    weather_input_size = X_weather.shape[1]
    climate_input_size = X_climate.shape[1]
    hidden_size = 64
    num_activities = len(torch.unique(y))
    model = ActivityRecommendationModel(weather_input_size, climate_input_size, hidden_size, num_activities)

    # 클래스 가중치 계산 (희소 활동에 더 높은 가중치)
    y_np = y.numpy()
    class_counts = Counter(y_np)
    total_samples = len(y_np)
    class_weights = torch.zeros(num_activities)
    for cls in range(num_activities):
        if cls in class_counts:
            class_weights[cls] = total_samples / class_counts[cls]
        else:
            class_weights[cls] = 1.0

    # 가중치가 적용된 손실 함수와 옵티마이저
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 학습 루프
    for epoch in range(num_epochs):
        model.train()
        outputs = model(X_train, climate_train)
        loss = criterion(outputs, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 주기적 검증
        if (epoch + 1) % 10 == 0:
            model.eval()
            with torch.no_grad():
                val_outputs = model(X_val, climate_val)
                val_loss = criterion(val_outputs, y_val)
                _, predicted = torch.max(val_outputs, 1)
                accuracy = (predicted == y_val).float().mean()
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Accuracy: {accuracy.item():.4f}')

    return model

4. 배치 학습 process

사용자 피드백 데이터를 활용해 모델을 주기적으로 업데이트한다:
중기 날씨 데이터와 단기 날씨 데이터를 가져오는 7일 마다 배치학습을 업데이트시킨다 ( 스케줄러를 통해)

def batch_learning():
    print("Starting batch learning...")

    # 현재 모델과 스케일러 로드
    model, weather_scaler, climate_scaler = load_trained_model()

    # 사용자 활동 데이터 가져오기
    user_activities = get_user_activity_data()
    climate_data, activity_data = load_data()

    # 사용자 데이터 전처리
    X_weather, X_climate, y_new = preprocess_user_data(user_activities, climate_data, activity_data)

    if len(X_weather) > 0:
        # 데이터 정규화
        X_weather_normalized = weather_scaler.transform(X_weather)
        X_climate_normalized = climate_scaler.transform(X_climate)

        # 모델 재학습
        X_weather_tensor = torch.tensor(X_weather_normalized, dtype=torch.float32)
        X_climate_tensor = torch.tensor(X_climate_normalized, dtype=torch.float32)
        y_new_tensor = torch.tensor(y_new, dtype=torch.long)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        model.train()
        for epoch in range(50):
            optimizer.zero_grad()
            outputs = model(X_weather_tensor, X_climate_tensor)
            loss = criterion(outputs, y_new_tensor)
            loss.backward()
            optimizer.step()

        # 모델 저장
        save_model(model, weather_scaler, climate_scaler)

        # 사용자 활동 데이터 삭제 (학습 완료 후)
        db = get_db()
        db['useractivities'].delete_many({})

        print(f"Batch learning completed. Model updated with {len(X_weather)} data points.")
    else:
        print("No data available for batch learning.")

     # 이 스케줄러는 별도 스레드에서 실행
    def run_scheduler():
    schedule.every(7).days.do(batch_learning)
    while True:
        schedule.run_pending()
        time.sleep(3600)  # 1시간마다 체크
    # app.py에서 스케줄러 시작
scheduler_thread = threading.Thread(target=run_scheduler, daemon=True)
scheduler_thread.start()

데이터 Source 및 구조

1. 기후 데이터

gangwong climate data.json에는 강원도 18개 지역의 기후 데이터가 포함되어 있다:
영동/영서 지역별로 날씨를 구분하였다.

traindataweather.json에는 30가지 야외 활동에 대한 최적 날씨 조건이 정의되어 있다:

[
  {
    "name": "번지 점프",
    "temperature_min": "10",
    "temperature_max": "25",
    "humidity": "60",
    "wind_speed": "5",
    "precipitation": "없음",
    "sky_status": "맑음"
  }
  // 다른 활동들...
]

location.json에는 각 지역의 격자 좌표가 포함되어 있다:

{
  "강릉시": { "nx": 92, "ny": 131 },
  "속초시": { "nx": 87, "ny": 141 }
  // 다른 지역들...
}

2. 사용자 활동 데이터

useractivities.json에는 사용자가 실제로 수행한 야외 활동 기록이 포함되어 있다:

사용자가 다녀온 액티비티에 대해 후기 글을 쓸때, 액티비티 종류와 날짜, 또 장소가 전달되어 해당 날짜와 위치 (X,y) 좌표 에 대한 날씨정보가 fetch된다.
이러한 피드백 데이터는 모델이 초기 훈련 데이터에서 벗어나 실제 사용자 선호도를 학습하는 데 중요한 역할을 한다.

[
  {
    "location": "인제",
    "activityTag": "번지 점프",
    "date": "2024-06-21",
    "weather": {
      "temperature_min": 17.8,
      "temperature_max": 35.6,
      "humidity": 64.3,
      "wind_speed": 1.1,
      "precipitation": "없음",
      "sky_status": "맑음"
    }
  }
  // 다른 활동 기록들...
]

API 구조 & Caching System

/api/v1/predict [POST]
- 입력: city, date
- 출력: 추천 활동 목록, 날씨 정보
/api/v1/recommend/by_activity [POST]
- 입력: activity
- 출력: 추천 지역 목록, 각 지역별 추천 날짜
/api/v1/recommend/by_location [POST]
- 입력: location
- 출력: 추천 활동 목록, 각 활동별 추천 날짜
/api/v1/recommend/by_activity_and_location [POST]
- 입력: activity, location
- 출력: 추천 날짜 목록
/api/v1/record_activity [POST]
- 입력: location, activityTag, date, weather
- 출력: 성공 메시지
/api/v1/activities [GET]
- 출력: 활동 목록 (캐싱됨)
/api/v1/cities [GET]
- 출력: 도시 목록 (캐싱됨)

캐싱

자주 요청되는 데이터에 대해 1시간 단위로 캐싱을 적용한다:

# 캐싱 설정
cache = Cache(app, config={'CACHE_TYPE': 'simple'})

@app.route(f'/api/{API_VERSION}/activities', methods=['GET'])
@cache.cached(timeout=3600)  # 1시간 동안 캐시
def get_activities_v1():
    app.logger.info("Received get activities request")
    activities = [activity['name'] for activity in activity_data]
    app.logger.info(f"Get activities result: {activities}")
    return jsonify(activities)

@app.route(f'/api/{API_VERSION}/cities', methods=['GET'])
@cache.cached(timeout=3600)  # 1시간 동안 캐시
def get_cities_v1():
    app.logger.info("Received get cities request")
    cities = list(climate_data.keys())
    app.logger.info(f"Get cities result: {cities}")
    return jsonify(cities)

이후 앱 시작 시 캐시를 초기화하여 새로운 데이터로 시작한다