·

Data Schema


student_profile:
  demographics:
    - field: age
      type: integer
      min: 0
      max: 120
      example: 16
      description: Student's age in years
      required: true

    - field: gender
      type: string
      enum: [Male, Female, Non-binary, "Prefer not to say"]
      example: Female
      description: Self-reported gender identity
      required: true

    - field: ethnicity
      type: string
      example: Hispanic
      description: Student's self-identified ethnic or racial background
      required: false

  household:
    - field: address_type
      type: string
      enum: [Urban, Rural, Suburban]
      example: Urban
      description: Type of residential area
      required: true

    - field: family_size
      type: integer
      min: 1
      max: 20
      example: 4
      description: Total number of people in the household
      required: true

    - field: num_siblings
      type: integer
      min: 0
      example: 2
      description: Number of siblings (including half/step siblings)
      required: true

    - field: birth_order
      type: integer
      min: 1
      example: 1
      description: Birth order (1 = firstborn, 2 = second child, etc.)
      required: true

  family_background:
    structure:
      - field: parent_marital_status
        type: string
        enum: [Single, Married, Separated, Divorced, Widowed, "Domestic Partnership"]
        example: Married
        description: Current marital/relationship status of parents/guardians
        required: true

      - field: guardian
        type: string
        enum: ["Father", "Mother", "Parents", "Grandparent", "Other Relative", "Foster Care"]
        example: Parents
        description: Primary guardian(s) the student lives with
        required: true

    education:
      - field: mother_education
        type: integer
        enum: [0, 1, 2, 3, 4]
        example: 3
        description: |
          0 = No formal education
          1 = Primary education
          2 = Secondary education
          3 = Some college/technical school
          4 = Bachelor's degree or higher
        required: true

      - field: father_education
        type: integer
        enum: [0, 1, 2, 3, 4]
        example: 3
        description: |
          0 = No formal education
          1 = Primary education
          2 = Secondary education
          3 = Some college/technical school
          4 = Bachelor's degree or higher
        required: true

    employment:
      - field: mother_occupation
        type: string
        example: "Registered Nurse"
        description: Mother's current or most recent occupation
        required: false

      - field: father_occupation
        type: string
        example: "Software Engineer"
        description: Father's current or most recent occupation
        required: false

    resources:
      - field: internet_access
        type: boolean
        example: true
        description: Whether the student has reliable internet access at home
        required: true

      - field: family_support
        type: boolean
        example: true
        description: Whether family provides academic and emotional support
        required: true

      - field: family_history_mental_health
        type: boolean
        example: false
        description: Whether there is a known family history of mental health conditions
        required: true


academic_profile:
  # Core identification and enrollment information
  identification:
    - field: student_id
      type: string
      format: UUID
      example: "550e8400-e29b-41d4-a716-446655440000"
      description: Unique identifier for the student in the system
      required: true

    - field: school_id
      type: string
      pattern: '^[A-Z]\d{3}$'
      example: "S123"
      description: "Unique identifier for the educational institution"
      required: true

  academic_status:
    - field: enrollment_year
      type: integer
      min: 2000
      max: 2100
      example: 2023
      description: Year the student first enrolled at the institution
      required: true

    - field: current_year
      type: integer
      min: 2000
      max: 2100
      example: 2025
      description: Current academic year
      required: true

    - field: class_standing
      type: string
      enum: ["Freshman", "Sophomore", "Junior", "Senior", "Graduate"]
      example: "Junior"
      description: Current academic standing based on credit hours completed
      required: true

  academic_performance:
    gpa:
      - field: cumulative_gpa
        type: float
        min: 0.0
        max: 4.0
        example: 3.75
        description: Cumulative Grade Point Average on a 4.0 scale
        required: true

      - field: current_term_gpa
        type: float
        min: 0.0
        max: 4.0
        example: 3.9
        description: GPA for the current academic term
        required: true

    standardized_scores:
      - field: math_score
        type: integer
        min: 0
        max: 100
        example: 88
        description: Most recent mathematics examination score (0-100)

      - field: language_arts_score
        type: integer
        min: 0
        max: 100
        example: 92
        description: Most recent language arts examination score (0-100)

      - field: science_score
        type: integer
        min: 0
        max: 100
        example: 85
        description: Most recent science examination score (0-100)

      - field: social_studies_score
        type: integer
        min: 0
        max: 100
        example: 90
        description: Most recent social studies examination score (0-100)

      - field: foreign_language_score
        type: integer
        min: 0
        max: 100
        example: 82
        description: Most recent foreign language examination score (0-100)

    performance_metrics:
      - field: academic_failures
        type: integer
        min: 0
        example: 1
        description: Total number of failed courses to date
        required: true

      - field: academic_probation
        type: boolean
        example: false
        description: Whether the student is currently on academic probation
        required: true

  academic_engagement:
    preparation:
      - field: test_preparation_completed
        type: boolean
        example: true
        description: Whether the student has completed a test preparation course
        required: true

      - field: study_hours_weekly
        type: integer
        min: 0
        max: 100
        example: 15
        description: Average number of hours spent studying per week
        required: true

    self_assessment:
      - field: academic_pressure_level
        type: integer
        min: 1
        max: 5
        example: 3
        description: Self-reported level of academic pressure (1=Very Low, 5=Very High)
        required: true

      - field: study_time
        type: integer
        min: 0
        max: 40
        required: true
        example: 20
        description: Weekly hours spent studying

      # Extracurricular involvement details
      - field: extracurricular_participation
        type: object
        required: true
        properties:
          - field: is_active
            type: boolean
            required: true
            example: true
            description: Whether the student participates in any extracurricular activities
            
          - field: weekly_hours
            type: number
            minimum: 0
            maximum: 40
            required: true
            example: 5.5
            description: Average weekly hours spent on extracurricular activities
            
          - field: activity_types
            type: array
            items:
              type: string
              enum: [sports, academic_clubs, arts, community_service, leadership, other]
            required: true
            example: ["sports", "academic_clubs"]
            description: |
              Types of activities the student participates in:
              - sports: Team or individual sports
              - academic_clubs: Math club, science club, debate, etc.
              - arts: Music, theater, visual arts
              - community_service: Volunteer work, charity organizations
              - leadership: Student government, peer mentoring
              - other: Any activities not covered above
              
          - field: leadership_roles
            type: array
            items:
              type: string
            required: false
            example: ["team_captain", "club_president"]
            description: Any leadership positions held in these activities

      # Athletic participation details
      - field: athletic_involvement
        type: object
        required: true
        properties:
          - field: is_active
            type: boolean
            required: true
            example: true
            description: Whether the student currently participates in any athletic activities
            
          - field: sports
            type: array
            items:
              type: string
              enum: [soccer, basketball, volleyball, track_field, swimming, tennis, baseball, football, other]
            required: true
            example: ["basketball", "track_field"]
            description: |
              List of sports the student participates in:
              - soccer: Association football
              - basketball: Basketball
              - volleyball: Volleyball
              - track_field: Track and field athletics
              - swimming: Competitive swimming
              - tennis: Tennis
              - baseball: Baseball or softball
              - football: American football
              - other: Any sport not listed above
              
          - field: competition_level
            type: string
            enum: [intramural, junior_varsity, varsity, club, recreational]
            required: false
            example: "varsity"
            description: |
              Highest level of competition:
              - intramural: School intramural leagues
              - junior_varsity: School JV team
              - varsity: School varsity team
              - club: Competitive club teams
              - recreational: Non-competitive/recreational play
              
          - field: weekly_hours
            type: number
            minimum: 0
            maximum: 40
            required: true
            example: 8.5
            description: Average weekly hours spent on athletic activities including practice and games
            
          - field: positions
            type: array
            items:
              type: string
            required: false
            example: ["point_guard", "shooting_guard"]
            description: Positions played in team sports (if applicable)
            
          - field: awards
            type: array
            items:
              type: string
            required: false
            example: ["mvp_2024", "all_conference"]
            description: Athletic achievements or recognitions received

      # Career and work-related information
      - field: has_part_time_job
        type: boolean
        required: true
        example: false
        description: |
          Indicates if the student is currently employed in a part-time position.
          Includes both formal employment and informal work arrangements.

      - field: work_hours_weekly
        type: integer
        min: 0
        max: 40
        required: true
        example: 10
        description: |
          Average number of hours worked per week in part-time employment.
          Set to 0 if not currently employed.

      # Higher education and career aspirations
      - field: higher_education_intent
        type: string
        enum: ["definitely_yes", "likely_yes", "unsure", "likely_no", "definitely_no"]
        required: true
        example: "likely_yes"
        description: |
          Student's current intention to pursue higher education after high school.
          This helps in academic counseling and resource allocation.

      - field: career_field_interest
        type: string
        required: false
        example: "computer_science"
        description: |
          Broad field of career interest. Can be used to connect students with
          relevant opportunities and resources. Examples include:
          - computer_science
          - healthcare
          - business
          - engineering
          - arts_humanities
          - education
          - undecided

      - field: career_confidence
        type: integer
        min: 1
        max: 5
        required: true
        example: 4
        description: |
          Student's confidence level in their career choice (1-5):
          1: Not at all confident
          2: Slightly confident
          3: Moderately confident
          4: Very confident
          5: Extremely confident

      # Personal relationships
      - field: has_romantic_relationship
        type: boolean
        required: true
        example: false
        description: |
          Whether the student is currently in a romantic relationship.
          Used to understand social support systems and time commitments.

      # Academic and work engagement
      - field: academic_engagement
        type: integer
        min: 1
        max: 5
        required: true
        example: 4
        description: |
          Self-reported level of engagement with academic work (1-5):
          1: Very disengaged
          2: Somewhat disengaged
          3: Neutral
          4: Somewhat engaged
          5: Very engaged

  # Attendance and school commute information
  attendance:
    - field: attendance_record
      type: object
      required: true
      properties:
        - field: total_absences
          type: integer
          minimum: 0
          maximum: 365
          required: true
          example: 5
          description: Total number of school days missed in the current academic year
          
        - field: excused_absences
          type: integer
          minimum: 0
          maximum: 365
          required: true
          example: 2
          description: Number of absences with valid excuse (included in total_absences)
          
        - field: tardies
          type: integer
          minimum: 0
          maximum: 200
          required: true
          example: 3
          description: Number of late arrivals to school
          
        - field: attendance_rate
          type: number
          minimum: 0
          maximum: 100
          required: true
          example: 95.5
          description: Percentage of school days attended (0-100%)
    
    - field: transportation_details
      type: object
      required: true
      properties:
        - field: primary_transport
          type: string
          required: true
          enum: [school_bus, public_transit, family_vehicle, carpool, walking, bicycle, other]
          example: "school_bus"
          description: |
            Primary mode of transportation to school:
            - school_bus: District-provided school bus
            - public_transit: Public transportation (metro, city bus, etc.)
            - family_vehicle: Personal/family car
            - carpool: Shared ride with other students
            - walking: Walking to school
            - bicycle: Biking to school
            - other: Other transportation method
            
        - field: secondary_transport
          type: string
          required: false
          enum: [school_bus, public_transit, family_vehicle, carpool, walking, bicycle, none]
          example: "family_vehicle"
          description: Secondary/backup transportation method, if any
          
        - field: average_commute_time
          type: object
          required: true
          properties:
            - field: minutes
              type: number
              minimum: 0
              maximum: 240
              required: true
              example: 22.5
              description: Average one-way commute time in minutes
              
            - field: consistency
              type: string
              enum: [very_consistent, somewhat_consistent, variable, unpredictable]
              required: true
              example: "somewhat_consistent"
              description: |
                How consistent the commute time is:
                - very_consistent: ±5 minutes
                - somewhat_consistent: ±15 minutes
                - variable: ±30 minutes
                - unpredictable: Varies by more than 30 minutes
                
        - field: has_transportation_issues
          type: boolean
          required: true
          example: false
          description: Whether the student experiences frequent transportation problems

mental_health:
  # Self-reported mental health conditions and symptoms
  conditions:
    - field: depression_diagnosis
      type: boolean
      example: true
      description: Whether the student has been diagnosed with depression by a professional
      required: true

    - field: depression_severity
      type: integer
      min: 0
      max: 10
      example: 6
      description: Self-reported depression severity (0=None, 10=Extreme)
      required: true

    - field: anxiety_diagnosis
      type: boolean
      example: true
      description: Whether the student has been diagnosed with an anxiety disorder
      required: true

    - field: anxiety_severity
      type: integer
      min: 0
      max: 10
      example: 7
      description: Self-reported anxiety severity (0=None, 10=Extreme)
      required: true

    - field: panic_attack_frequency
      type: string
      enum: ["Never", "Rarely", "Monthly", "Weekly", "Daily"]
      example: "Monthly"
      description: Frequency of panic attacks experienced
      required: true

  risk_factors:
    - field: suicidal_ideation
      type: boolean
      example: false
      description: Whether the student has experienced thoughts of self-harm or suicide
      required: true
      severity: high_risk

    - field: self_harm_behavior
      type: string
      enum: ["Never", "In the past", "Currently"]
      example: "In the past"
      description: History of self-harm behaviors
      required: true
      severity: high_risk

    - field: substance_use_frequency
      type: string
      enum: ["Never", "Rarely", "Monthly", "Weekly", "Daily"]
      example: "Rarely"
      description: Frequency of alcohol or drug use
      required: true

  behavioral_indicators:
    - field: social_isolation
      type: integer
      min: 1
      max: 5
      example: 3
      description: Self-reported level of social isolation (1=Not at all, 5=Extremely)
      required: true

    - field: mood_instability
      type: integer
      min: 1
      max: 5
      example: 3
      description: Frequency of mood swings (1=Never, 5=Constantly)
      required: true

    - field: stress_level
      type: integer
      min: 1
      max: 10
      example: 7
      description: Current stress level (1=None, 10=Extreme)
      required: true

    - field: coping_effectiveness
      type: integer
      min: 1
      max: 5
      example: 3
      description: Self-rated effectiveness of coping strategies (1=Not at all, 5=Very)
      required: true

  support_system:
    - field: counseling_usage
      type: string
      enum: ["Never", "In the past", "Currently"]
      example: "Currently"
      description: History of using counseling services
      required: true

    - field: medication_usage
      type: string
      enum: ["None", "In the past", "Currently prescribed"]
      example: "Currently prescribed"
      description: Current or past psychiatric medication use
      required: true

    - field: social_support_quality
      type: integer
      min: 1
      max: 5
      example: 4
      description: Perceived quality of social support (1=Very poor, 5=Excellent)
      required: true

wellbeing_metrics:
  # Daily health and wellness behaviors
  lifestyle:
    - field: sleep_hours
      type: float
      min: 0
      max: 24
      example: 6.5
      description: Average nightly sleep duration in hours
      required: true

    - field: screen_time_hours
      type: float
      min: 0
      max: 24
      example: 4.5
      description: Average daily recreational screen time in hours
      required: true

    - field: physical_activity_hours
      type: float
      min: 0
      max: 168
      example: 3.0
      description: Weekly exercise time in hours
      required: true

  # Standardized psychological assessments
  survey_scores:
    - field: PHQ9_score
      type: integer
      min: 0
      max: 27
      example: 8
      description: |
        PHQ-9 Depression Severity Score (0-27):
        0-4: Minimal depression
        5-9: Mild depression
        10-14: Moderate depression
        15-19: Moderately severe depression
        20-27: Severe depression
      required: true

    - field: WHO5_score
      type: integer
      min: 0
      max: 25
      example: 15
      description: |
        WHO-5 Well-Being Index (0-25):
        0-12: Low well-being
        13-17: Moderate well-being
        18-25: High well-being
      required: true

    - field: GAD7_score
      type: integer
      min: 0
      max: 21
      example: 6
      description: |
        GAD-7 Anxiety Severity Score (0-21):
        0-4: Minimal anxiety
        5-9: Mild anxiety
        10-14: Moderate anxiety
        15-21: Severe anxiety
      required: true

    - field: PSS_score
      type: integer
      min: 0
      max: 40
      example: 16
      description: |
        Perceived Stress Scale (0-40):
        0-13: Low stress
        14-26: Moderate stress
        27-40: High stress
      required: true

    - field: UCLA_loneliness_score
      type: integer
      min: 20
      max: 80
      example: 45
      description: |
        UCLA Loneliness Scale (20-80):
        20-34: Low loneliness
        35-49: Moderate loneliness
        50-80: High loneliness
      required: true

  # Text analysis metrics
  sentiment_analysis:
    - field: sentiment_compound
      type: float
      min: -1.0
      max: 1.0
      example: 0.5
      description: |
        Overall sentiment polarity (-1.0 to +1.0):
        -1.0 to -0.5: Very negative
        -0.5 to -0.1: Negative
        -0.1 to 0.1: Neutral
        0.1 to 0.5: Positive
        0.5 to 1.0: Very positive
      required: true

    - field: sentiment_positive
      type: float
      min: 0
      max: 1
      example: 0.4
      description: Proportion of positive sentiment (0-1)
      required: true

    - field: sentiment_neutral
      type: float
      min: 0
      max: 1
      example: 0.3
      description: Proportion of neutral sentiment (0-1)
      required: true

    - field: sentiment_negative
      type: float
      min: 0
      max: 1
      example: 0.3
      description: Proportion of negative sentiment (0-1)
      required: true

  # Readability metrics
  readability_scores:
    - field: ARI
      type: float
      min: 1
      max: 20
      example: 7.5
      description: Automated Readability Index (1-20, higher is more complex)
      required: true

    - field: Coleman_Liau_Index
      type: float
      min: 1
      max: 20
      example: 8.2
      description: Coleman-Liau readability score (1-20, higher is more complex)
      required: true

    - field: Flesch_Kincaid_Grade_Level
      type: float
      min: 1
      max: 16
      example: 9.1
      description: U.S. school grade level needed to understand the text (1-16)
      required: true

# System and administrative metadata
meta:
  # Timestamp information
  - field: created_at
    type: datetime
    format: date-time
    example: "2025-08-17T14:33:00-04:00"
    description: |
      ISO 8601 timestamp when this record was first created.
      Always include timezone offset (e.g., -04:00 for EDT).
    required: true

  - field: updated_at
    type: datetime
    format: date-time
    example: "2025-08-17T14:33:00-04:00"
    description: |
      ISO 8601 timestamp when this record was last modified.
      Should match created_at for new records.
    required: true

  # Data source and versioning
  - field: data_source
    type: string
    example: "student_portal_v1"
    description: |
      Identifier for the system or interface that collected this data.
      Should be a URL-friendly string (lowercase, underscores).
    required: true

  - field: schema_version
    type: string
    pattern: '^\d+\.\d+\.\d+$'
    example: "1.0.0"
    description: |
      Version of the schema this data conforms to.
      Follows semantic versioning (MAJOR.MINOR.PATCH).
    required: true

  # Data quality flags
  - field: is_complete
    type: boolean
    example: true
    description: |
      Indicates whether all required fields have been populated.
      Should be false if any required fields are missing.
    required: true

  - field: data_quality_score
    type: integer
    min: 0
    max: 100
    example: 95
    description: |
      Automated data quality assessment score (0-100).
      Higher values indicate better data quality.
    required: true

More from the blog