diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 32e07bc..ea9a8a7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,3 +48,59 @@ jobs: - name: Run Go tests run: go test -v ./feedfetcher/... + + test-go-integration: + runs-on: ubuntu-latest + + services: + redis: + image: redis:7-alpine + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + minio: + image: minio/minio:latest + ports: + - 9000:9000 + env: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + options: >- + --health-cmd "curl -f http://localhost:9000/minio/health/live" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - uses: actions/checkout@v3 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.23' + + - name: Install MinIO Client + run: | + wget https://dl.min.io/client/mc/release/linux-amd64/mc + chmod +x mc + sudo mv mc /usr/local/bin/ + + - name: Configure MinIO + run: | + mc alias set local http://localhost:9000 minioadmin minioadmin + mc mb local/feedreader2018-articles || true + + - name: Run integration tests + env: + REDIS_HOST: localhost + REDIS_PORT: 6379 + S3_ENDPOINT: http://localhost:9000 + S3_ACCESS_KEY: minioadmin + S3_SECRET_KEY: minioadmin + S3_BUCKET: feedreader2018-articles + run: go test -v -tags=integration ./feedfetcher/... diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..53db908 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,32 @@ +version: '3.8' + +services: + redis: + image: redis:7-alpine + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + + minio: + image: minio/minio:latest + ports: + - "9000:9000" + - "9001:9001" + environment: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + command: server /data --console-address ":9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 5s + timeout: 3s + retries: 5 + volumes: + - minio-data:/data + +volumes: + minio-data: diff --git a/feedfetcher/fetcher.go b/feedfetcher/fetcher.go new file mode 100644 index 0000000..0fe3a86 --- /dev/null +++ b/feedfetcher/fetcher.go @@ -0,0 +1,220 @@ +package feedfetcher + +import ( + "bytes" + "context" + "fmt" + "io" + "net/http" + "strconv" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/go-redis/redis/v8" + "github.com/mmcdole/gofeed" +) + +type Fetcher struct { + redisClient *redis.Client + s3Client *s3.Client + httpClient *http.Client + s3Bucket string +} + +type FeedResponse struct { + Success bool `json:"success"` + Title string `json:"title,omitempty"` + Link string `json:"link,omitempty"` + LastModified string `json:"lastModified,omitempty"` + Etag string `json:"etag,omitempty"` + Articles []string `json:"articles"` + StatusCode int `json:"statusCode,omitempty"` + StatusMessage string `json:"statusMessage,omitempty"` +} + +func NewFetcher(redisClient *redis.Client, s3Client *s3.Client, s3Bucket string) *Fetcher { + return &Fetcher{ + redisClient: redisClient, + s3Client: s3Client, + httpClient: &http.Client{}, + s3Bucket: s3Bucket, + } +} + +func (f *Fetcher) FetchFeed(ctx context.Context, feedURI string) (*FeedResponse, error) { + keys := BuildRedisKeys(feedURI) + + // Fetch stored feed metadata from Redis + storedFeed, err := f.redisClient.HGetAll(ctx, keys.FeedKey).Result() + if err != nil && err != redis.Nil { + return nil, fmt.Errorf("failed to get stored feed: %w", err) + } + + // Build request headers with cached etag/lastModified + headers := BuildRequestHeaders(storedFeed["lastModified"], storedFeed["etag"]) + + // Fetch the feed + req, err := http.NewRequestWithContext(ctx, "GET", feedURI, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + for key, value := range headers { + req.Header.Set(key, value) + } + + resp, err := f.httpClient.Do(req) + if err != nil { + return &FeedResponse{ + Success: false, + StatusMessage: err.Error(), + }, nil + } + defer resp.Body.Close() + + // Handle HTTP status codes + if resp.StatusCode == http.StatusNotModified { + // Feed not modified, return cached articles + articles, err := f.getArticleIds(ctx, keys.ArticlesKey) + if err != nil { + return nil, err + } + return &FeedResponse{ + Success: true, + Title: storedFeed["title"], + Link: storedFeed["link"], + LastModified: storedFeed["lastModified"], + Etag: storedFeed["etag"], + Articles: articles, + StatusCode: resp.StatusCode, + }, nil + } + + if resp.StatusCode != http.StatusOK { + return &FeedResponse{ + Success: false, + StatusCode: resp.StatusCode, + StatusMessage: resp.Status, + }, nil + } + + // Parse the feed + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + fp := gofeed.NewParser() + feed, err := fp.ParseString(string(body)) + if err != nil { + return &FeedResponse{ + Success: false, + StatusMessage: fmt.Sprintf("failed to parse feed: %v", err), + }, nil + } + + // Store feed metadata in Redis + lastModified := resp.Header.Get("Last-Modified") + etag := resp.Header.Get("Etag") + + feedMeta := map[string]interface{}{ + "title": feed.Title, + "link": feed.Link, + "lastModified": lastModified, + "etag": etag, + } + + if err := f.redisClient.HMSet(ctx, keys.FeedKey, feedMeta).Err(); err != nil { + return nil, fmt.Errorf("failed to store feed metadata: %w", err) + } + + // Process articles + for _, item := range feed.Items { + article := Article{ + GUID: item.GUID, + Title: item.Title, + Description: item.Description, + } + + // Use published date or updated date + if item.PublishedParsed != nil { + article.PubDate = item.PublishedParsed.Format("2006-01-02T15:04:05Z07:00") + } else if item.UpdatedParsed != nil { + article.PubDate = item.UpdatedParsed.Format("2006-01-02T15:04:05Z07:00") + } + + // Validate article + if !IsValidArticle(&article) { + continue + } + + // Process article + processedArticle := ProcessArticle(article, feedURI) + articleKey := BuildArticleKey(processedArticle.Hash) + + // Get old score from Redis + oldScoreStr, err := f.redisClient.ZScore(ctx, keys.ArticlesKey, articleKey).Result() + var oldScore *string + if err == nil { + scoreStr := strconv.FormatInt(int64(oldScoreStr), 10) + oldScore = &scoreStr + } + + // Add article to sorted set + err = f.redisClient.ZAdd(ctx, keys.ArticlesKey, &redis.Z{ + Score: float64(processedArticle.Score), + Member: articleKey, + }).Err() + if err != nil { + return nil, fmt.Errorf("failed to add article to sorted set: %w", err) + } + + // Store article in S3 if score changed + if ShouldStoreArticle(oldScore, processedArticle.Score) { + if err := f.storeArticleInS3(ctx, processedArticle); err != nil { + return nil, fmt.Errorf("failed to store article in S3: %w", err) + } + } + } + + // Get all article IDs + articles, err := f.getArticleIds(ctx, keys.ArticlesKey) + if err != nil { + return nil, err + } + + return &FeedResponse{ + Success: true, + Title: feed.Title, + Link: feed.Link, + LastModified: lastModified, + Etag: etag, + Articles: articles, + StatusCode: resp.StatusCode, + }, nil +} + +func (f *Fetcher) storeArticleInS3(ctx context.Context, article Article) error { + body, err := GenerateArticleBody(article) + if err != nil { + return err + } + + _, err = f.s3Client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(f.s3Bucket), + Key: aws.String(article.Hash + ".json"), + Body: bytes.NewReader([]byte(body)), + ContentType: aws.String("application/json"), + }) + + return err +} + +func (f *Fetcher) getArticleIds(ctx context.Context, articlesKey string) ([]string, error) { + allArticles, err := f.redisClient.ZRevRange(ctx, articlesKey, 0, -1).Result() + if err != nil { + return nil, fmt.Errorf("failed to get articles from sorted set: %w", err) + } + + return ExtractArticleIds(allArticles), nil +} diff --git a/feedfetcher/fetcher_integration_test.go b/feedfetcher/fetcher_integration_test.go new file mode 100644 index 0000000..eb39088 --- /dev/null +++ b/feedfetcher/fetcher_integration_test.go @@ -0,0 +1,362 @@ +//go:build integration +// +build integration + +package feedfetcher + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/go-redis/redis/v8" + "gopkg.in/yaml.v3" +) + +type FetchFeedTestCases struct { + FeedGetTests []struct { + Description string `yaml:"description"` + FeedFixture string `yaml:"feed_fixture"` + FeedURI string `yaml:"feed_uri"` + ExpectedFeedMetadata struct { + Title string `yaml:"title"` + Link string `yaml:"link"` + } `yaml:"expected_feed_metadata"` + ExpectedArticlesCount int `yaml:"expected_articles_count"` + ExpectedArticles []struct { + GUID string `yaml:"guid"` + Title string `yaml:"title"` + Hash string `yaml:"hash"` + Score int64 `yaml:"score"` + FeedURL string `yaml:"feedurl"` + } `yaml:"expected_articles"` + CachingTest *struct { + ResponseHeaders struct { + LastModified string `yaml:"last_modified"` + Etag string `yaml:"etag"` + } `yaml:"response_headers"` + ExpectedArticlesCount int `yaml:"expected_articles_count"` + ShouldReturn304OnSecondRequest bool `yaml:"should_return_304_on_second_request"` + } `yaml:"caching_test"` + } `yaml:"feed_get_tests"` +} + +func setupRedisClient() (*redis.Client, error) { + host := os.Getenv("REDIS_HOST") + if host == "" { + host = "localhost" + } + port := os.Getenv("REDIS_PORT") + if port == "" { + port = "6379" + } + + client := redis.NewClient(&redis.Options{ + Addr: fmt.Sprintf("%s:%s", host, port), + }) + + ctx := context.Background() + if err := client.Ping(ctx).Err(); err != nil { + return nil, fmt.Errorf("failed to connect to Redis: %w", err) + } + + return client, nil +} + +func setupS3Client() (*s3.Client, error) { + endpoint := os.Getenv("S3_ENDPOINT") + if endpoint == "" { + endpoint = "http://localhost:9000" + } + + accessKey := os.Getenv("S3_ACCESS_KEY") + if accessKey == "" { + accessKey = "minioadmin" + } + + secretKey := os.Getenv("S3_SECRET_KEY") + if secretKey == "" { + secretKey = "minioadmin" + } + + cfg, err := config.LoadDefaultConfig(context.Background(), + config.WithRegion("us-east-1"), + config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, "")), + ) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + client := s3.NewFromConfig(cfg, func(o *s3.Options) { + o.BaseEndpoint = aws.String(endpoint) + o.UsePathStyle = true + }) + + return client, nil +} + +func TestFetchFeedIntegration(t *testing.T) { + // Load test cases + data, err := os.ReadFile("../testdata/feed-get-tests.yaml") + if err != nil { + t.Fatalf("Failed to read test data: %v", err) + } + + var testCases FetchFeedTestCases + if err := yaml.Unmarshal(data, &testCases); err != nil { + t.Fatalf("Failed to parse test data: %v", err) + } + + // Setup Redis and S3 clients + redisClient, err := setupRedisClient() + if err != nil { + t.Fatalf("Failed to setup Redis client: %v", err) + } + defer redisClient.Close() + + s3Client, err := setupS3Client() + if err != nil { + t.Fatalf("Failed to setup S3 client: %v", err) + } + + bucket := os.Getenv("S3_BUCKET") + if bucket == "" { + bucket = "feedreader2018-articles" + } + + // Run tests + for _, tc := range testCases.FeedGetTests { + t.Run(tc.Description, func(t *testing.T) { + ctx := context.Background() + + // Clear Redis data for this test + keys := BuildRedisKeys(tc.FeedURI) + redisClient.Del(ctx, keys.FeedKey, keys.ArticlesKey) + + // Load feed fixture + feedData, err := os.ReadFile(tc.FeedFixture) + if err != nil { + t.Fatalf("Failed to read feed fixture: %v", err) + } + + // Create test HTTP server to serve the feed + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/xml") + w.Write(feedData) + })) + defer server.Close() + + // Create Fetcher and fetch the feed + fetcher := NewFetcher(redisClient, s3Client, bucket) + result, err := fetcher.FetchFeed(ctx, server.URL) + if err != nil { + t.Fatalf("Fetcher.FetchFeed() failed: %v", err) + } + + // Verify success + if !result.Success { + t.Errorf("Expected success=true, got success=%v, message=%s", result.Success, result.StatusMessage) + } + + // Verify feed metadata + if tc.ExpectedFeedMetadata.Title != "" { + if result.Title != tc.ExpectedFeedMetadata.Title { + t.Errorf("Title mismatch: got %s, want %s", result.Title, tc.ExpectedFeedMetadata.Title) + } + } + + if tc.ExpectedFeedMetadata.Link != "" { + if result.Link != tc.ExpectedFeedMetadata.Link { + t.Errorf("Link mismatch: got %s, want %s", result.Link, tc.ExpectedFeedMetadata.Link) + } + } + + // Verify article count + if len(result.Articles) != tc.ExpectedArticlesCount { + t.Errorf("Article count mismatch: got %d, want %d", len(result.Articles), tc.ExpectedArticlesCount) + } + + // Verify specific articles if provided + for _, expectedArticle := range tc.ExpectedArticles { + articleKey := BuildArticleKey(expectedArticle.Hash) + found := false + for _, article := range result.Articles { + if article == expectedArticle.Hash { + found = true + break + } + } + + if !found { + t.Errorf("Expected article %s not found in results", expectedArticle.Hash) + } + + // Verify article is in Redis sorted set + score, err := redisClient.ZScore(ctx, keys.ArticlesKey, articleKey).Result() + if err != nil { + t.Errorf("Article %s not found in Redis sorted set: %v", articleKey, err) + } else if int64(score) != expectedArticle.Score { + t.Errorf("Article score mismatch: got %d, want %d", int64(score), expectedArticle.Score) + } + + // Verify article is in S3 + _, err = s3Client.HeadObject(ctx, &s3.HeadObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(expectedArticle.Hash + ".json"), + }) + if err != nil { + t.Errorf("Article %s not found in S3: %v", expectedArticle.Hash, err) + } + } + }) + } +} + +func TestFetchFeedCaching(t *testing.T) { + ctx := context.Background() + + // Load test cases + data, err := os.ReadFile("../testdata/feed-get-tests.yaml") + if err != nil { + t.Fatalf("Failed to read test data: %v", err) + } + + var testCases FetchFeedTestCases + if err := yaml.Unmarshal(data, &testCases); err != nil { + t.Fatalf("Failed to parse test data: %v", err) + } + + // Find the caching test case + var cachingTest *struct { + Description string `yaml:"description"` + FeedFixture string `yaml:"feed_fixture"` + FeedURI string `yaml:"feed_uri"` + ExpectedFeedMetadata struct { + Title string `yaml:"title"` + Link string `yaml:"link"` + } `yaml:"expected_feed_metadata"` + ExpectedArticlesCount int `yaml:"expected_articles_count"` + ExpectedArticles []struct { + GUID string `yaml:"guid"` + Title string `yaml:"title"` + Hash string `yaml:"hash"` + Score int64 `yaml:"score"` + FeedURL string `yaml:"feedurl"` + } `yaml:"expected_articles"` + CachingTest *struct { + ResponseHeaders struct { + LastModified string `yaml:"last_modified"` + Etag string `yaml:"etag"` + } `yaml:"response_headers"` + ExpectedArticlesCount int `yaml:"expected_articles_count"` + ShouldReturn304OnSecondRequest bool `yaml:"should_return_304_on_second_request"` + } `yaml:"caching_test"` + } + + for _, tc := range testCases.FeedGetTests { + if tc.CachingTest != nil { + cachingTest = &tc + break + } + } + + if cachingTest == nil { + t.Skip("No caching test case found in YAML") + } + + // Setup clients + redisClient, err := setupRedisClient() + if err != nil { + t.Fatalf("Failed to setup Redis client: %v", err) + } + defer redisClient.Close() + + s3Client, err := setupS3Client() + if err != nil { + t.Fatalf("Failed to setup S3 client: %v", err) + } + + bucket := os.Getenv("S3_BUCKET") + if bucket == "" { + bucket = "feedreader2018-articles" + } + + // Load feed fixture + feedData, err := os.ReadFile(cachingTest.FeedFixture) + if err != nil { + t.Fatalf("Failed to read feed fixture: %v", err) + } + + requestCount := 0 + lastModified := cachingTest.CachingTest.ResponseHeaders.LastModified + etag := cachingTest.CachingTest.ResponseHeaders.Etag + + // Create test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestCount++ + + // On second request, check for caching headers and return 304 + if requestCount > 1 { + if r.Header.Get("If-Modified-Since") != lastModified { + t.Errorf("Expected If-Modified-Since header: %s, got: %s", lastModified, r.Header.Get("If-Modified-Since")) + } + if r.Header.Get("If-None-Match") != etag { + t.Errorf("Expected If-None-Match header: %s, got: %s", etag, r.Header.Get("If-None-Match")) + } + w.WriteHeader(http.StatusNotModified) + return + } + + // First request returns full feed + w.Header().Set("Content-Type", "application/xml") + w.Header().Set("Last-Modified", lastModified) + w.Header().Set("Etag", etag) + w.Write(feedData) + })) + defer server.Close() + + fetcher := NewFetcher(redisClient, s3Client, bucket) + + // Clear Redis + keys := BuildRedisKeys(server.URL) + redisClient.Del(ctx, keys.FeedKey, keys.ArticlesKey) + + // First request + result1, err := fetcher.FetchFeed(ctx, server.URL) + if err != nil { + t.Fatalf("First request failed: %v", err) + } + + if !result1.Success { + t.Errorf("First request should succeed") + } + + expectedCount := cachingTest.CachingTest.ExpectedArticlesCount + if len(result1.Articles) != expectedCount { + t.Errorf("Expected %d articles, got %d", expectedCount, len(result1.Articles)) + } + + // Second request (should use caching) + result2, err := fetcher.FetchFeed(ctx, server.URL) + if err != nil { + t.Fatalf("Second request failed: %v", err) + } + + if !result2.Success { + t.Errorf("Second request should succeed") + } + + if result2.StatusCode != http.StatusNotModified { + t.Errorf("Expected 304 Not Modified, got %d", result2.StatusCode) + } + + if len(result2.Articles) != expectedCount { + t.Errorf("Expected %d articles from cache, got %d", expectedCount, len(result2.Articles)) + } +} diff --git a/go.mod b/go.mod index e83a97a..12d968e 100644 --- a/go.mod +++ b/go.mod @@ -3,3 +3,36 @@ module github.com/feedreaderco/api go 1.25.2 require gopkg.in/yaml.v3 v3.0.1 + +require ( + github.com/PuerkitoBio/goquery v1.8.0 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/aws/aws-sdk-go-v2 v1.39.2 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1 // indirect + github.com/aws/aws-sdk-go-v2/config v1.31.12 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.18.16 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.9 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.9 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.9 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.9 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.9 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.9 // indirect + github.com/aws/aws-sdk-go-v2/service/s3 v1.88.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.29.6 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.1 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.38.6 // indirect + github.com/aws/smithy-go v1.23.0 // indirect + github.com/cespare/xxhash/v2 v2.1.2 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/go-redis/redis/v8 v8.11.5 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mmcdole/gofeed v1.3.0 // indirect + github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + golang.org/x/net v0.4.0 // indirect + golang.org/x/text v0.5.0 // indirect +) diff --git a/go.sum b/go.sum index a62c313..95f8a69 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,76 @@ +github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= +github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/aws/aws-sdk-go-v2 v1.39.2 h1:EJLg8IdbzgeD7xgvZ+I8M1e0fL0ptn/M47lianzth0I= +github.com/aws/aws-sdk-go-v2 v1.39.2/go.mod h1:sDioUELIUO9Znk23YVmIk86/9DOpkbyyVb1i/gUNFXY= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1 h1:i8p8P4diljCr60PpJp6qZXNlgX4m2yQFpYk+9ZT+J4E= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.1/go.mod h1:ddqbooRZYNoJ2dsTwOty16rM+/Aqmk/GOXrK8cg7V00= +github.com/aws/aws-sdk-go-v2/config v1.31.12 h1:pYM1Qgy0dKZLHX2cXslNacbcEFMkDMl+Bcj5ROuS6p8= +github.com/aws/aws-sdk-go-v2/config v1.31.12/go.mod h1:/MM0dyD7KSDPR+39p9ZNVKaHDLb9qnfDurvVS2KAhN8= +github.com/aws/aws-sdk-go-v2/credentials v1.18.16 h1:4JHirI4zp958zC026Sm+V4pSDwW4pwLefKrc0bF2lwI= +github.com/aws/aws-sdk-go-v2/credentials v1.18.16/go.mod h1:qQMtGx9OSw7ty1yLclzLxXCRbrkjWAM7JnObZjmCB7I= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.9 h1:Mv4Bc0mWmv6oDuSWTKnk+wgeqPL5DRFu5bQL9BGPQ8Y= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.9/go.mod h1:IKlKfRppK2a1y0gy1yH6zD+yX5uplJ6UuPlgd48dJiQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.9 h1:se2vOWGD3dWQUtfn4wEjRQJb1HK1XsNIt825gskZ970= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.9/go.mod h1:hijCGH2VfbZQxqCDN7bwz/4dzxV+hkyhjawAtdPWKZA= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.9 h1:6RBnKZLkJM4hQ+kN6E7yWFveOTg8NLPHAkqrs4ZPlTU= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.9/go.mod h1:V9rQKRmK7AWuEsOMnHzKj8WyrIir1yUJbZxDuZLFvXI= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.9 h1:w9LnHqTq8MEdlnyhV4Bwfizd65lfNCNgdlNC6mM5paE= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.9/go.mod h1:LGEP6EK4nj+bwWNdrvX/FnDTFowdBNwcSPuZu/ouFys= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1 h1:oegbebPEMA/1Jny7kvwejowCaHz1FWZAQ94WXFNCyTM= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.1/go.mod h1:kemo5Myr9ac0U9JfSjMo9yHLtw+pECEHsFtJ9tqCEI8= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.0 h1:X0FveUndcZ3lKbSpIC6rMYGRiQTcUVRNH6X4yYtIrlU= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.0/go.mod h1:IWjQYlqw4EX9jw2g3qnEPPWvCE6bS8fKzhMed1OK7c8= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.9 h1:5r34CgVOD4WZudeEKZ9/iKpiT6cM1JyEROpXjOcdWv8= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.9/go.mod h1:dB12CEbNWPbzO2uC6QSWHteqOg4JfBVJOojbAoAUb5I= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.9 h1:wuZ5uW2uhJR63zwNlqWH2W4aL4ZjeJP3o92/W+odDY4= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.9/go.mod h1:/G58M2fGszCrOzvJUkDdY8O9kycodunH4VdT5oBAqls= +github.com/aws/aws-sdk-go-v2/service/s3 v1.88.4 h1:mUI3b885qJgfqKDUSj6RgbRqLdX0wGmg8ruM03zNfQA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.88.4/go.mod h1:6v8ukAxc7z4x4oBjGUsLnH7KGLY9Uhcgij19UJNkiMg= +github.com/aws/aws-sdk-go-v2/service/sso v1.29.6 h1:A1oRkiSQOWstGh61y4Wc/yQ04sqrQZr1Si/oAXj20/s= +github.com/aws/aws-sdk-go-v2/service/sso v1.29.6/go.mod h1:5PfYspyCU5Vw1wNPsxi15LZovOnULudOQuVxphSflQA= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.1 h1:5fm5RTONng73/QA73LhCNR7UT9RpFH3hR6HWL6bIgVY= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.1/go.mod h1:xBEjWD13h+6nq+z4AkqSfSvqRKFgDIQeaMguAJndOWo= +github.com/aws/aws-sdk-go-v2/service/sts v1.38.6 h1:p3jIvqYwUZgu/XYeI48bJxOhvm47hZb5HUQ0tn6Q9kA= +github.com/aws/aws-sdk-go-v2/service/sts v1.38.6/go.mod h1:WtKK+ppze5yKPkZ0XwqIVWD4beCwv056ZbPQNoeHqM8= +github.com/aws/smithy-go v1.23.0 h1:8n6I3gXzWJB2DxBDnfxgBaSX6oe0d/t10qGz7OKqMCE= +github.com/aws/smithy-go v1.23.0/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= +github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= +github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI= +github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/mmcdole/gofeed v1.3.0 h1:5yn+HeqlcvjMeAI4gu6T+crm7d0anY85+M+v6fIFNG4= +github.com/mmcdole/gofeed v1.3.0/go.mod h1:9TGv2LcJhdXePDzxiuMnukhV2/zb6VtnZt1mS+SjkLE= +github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 h1:Zr92CAlFhy2gL+V1F+EyIuzbQNbSgP4xhTODZtrXUtk= +github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.4.0 h1:Q5QPcMlvfxFTAPV0+07Xz/MpK9NTXu2VDUuy0FeMfaU= +golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= +golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/src/lib/feeds.integration.test.js b/src/lib/feeds.integration.test.js new file mode 100644 index 0000000..c29f31f --- /dev/null +++ b/src/lib/feeds.integration.test.js @@ -0,0 +1,339 @@ +// Integration tests for feed fetching +// Run with: INTEGRATION=true node src/lib/feeds.integration.test.js +// Requires Redis and MinIO to be running (use docker-compose up) + +const fs = require('fs'); +const http = require('http'); +const yaml = require('js-yaml'); +const redis = require('redis'); +const AWS = require('aws-sdk'); + +// Exit if not running in integration mode +if (process.env.INTEGRATION !== 'true') { + console.log('Skipping integration tests (set INTEGRATION=true to run)'); + process.exit(0); +} + +// Load test cases from YAML +const testCasesYaml = fs.readFileSync('./testdata/feed-get-tests.yaml', 'utf8'); +const testCases = yaml.load(testCasesYaml); + +// Setup Redis client +const redisHost = process.env.REDIS_HOST || 'localhost'; +const redisPort = process.env.REDIS_PORT || '6379'; +const redisClient = redis.createClient({ + host: redisHost, + port: redisPort, +}); + +// Setup S3 client (MinIO) +const s3Endpoint = process.env.S3_ENDPOINT || 'http://localhost:9000'; +const s3AccessKey = process.env.S3_ACCESS_KEY || 'minioadmin'; +const s3SecretKey = process.env.S3_SECRET_KEY || 'minioadmin'; +const s3Bucket = process.env.S3_BUCKET || 'feedreader2018-articles'; + +AWS.config.update({ + accessKeyId: s3AccessKey, + secretAccessKey: s3SecretKey, + s3ForcePathStyle: true, + signatureVersion: 'v4', +}); + +const s3 = new AWS.S3({ + endpoint: s3Endpoint, + params: { Bucket: s3Bucket }, +}); + +// Import feed utilities +const { buildRedisKeys, buildArticleKey } = require('./feedUtils.js'); + +// Simple test runner +let passed = 0; +let failed = 0; +let testServer = null; + +function test(name, fn) { + return new Promise((resolve) => { + fn() + .then(() => { + passed++; + console.log(`✓ ${name}`); + resolve(); + }) + .catch((error) => { + failed++; + console.error(`✗ ${name}`); + console.error(` ${error.message}`); + if (error.stack) { + console.error(` ${error.stack}`); + } + resolve(); + }); + }); +} + +// Helper to call feed.get with mock req/res +function callFeedGet(feedURI) { + return new Promise((resolve, reject) => { + // Mock Express request object + const req = { + url: '/api/feed/' + encodeURIComponent(feedURI), + }; + + // Mock Express response object + let responseData = null; + let statusCode = 200; + + const res = { + json: (data) => { + responseData = data; + resolve(responseData); + }, + status: (code) => { + statusCode = code; + return res; // Allow chaining + }, + }; + + // Import feeds.js and call feed.get + // We need to set up the module with our Redis client + const feedsModule = require('../feeds.js'); + + // Call the feed.get function + try { + feedsModule.feed.get(req, res); + } catch (error) { + reject(error); + } + }); +} + +// Helper to clear Redis keys +function clearRedisKeys(feedURI, callback) { + const { feedKey, articlesKey } = buildRedisKeys(feedURI); + redisClient.del([feedKey, articlesKey], callback); +} + +// Run all integration tests +async function runTests() { + console.log('\n=== Testing Feed Fetching (Integration) ===\n'); + + // Test: Fetch and process Atom feed + const atomTest = testCases.feed_get_tests.find(tc => tc.description.includes('Atom')); + if (atomTest) { + await test(atomTest.description, () => { + return new Promise((resolve, reject) => { + // Clear Redis + clearRedisKeys('http://localhost:8888/xkcd', (clearErr) => { + if (clearErr) return reject(clearErr); + + // Read feed fixture + const feedData = fs.readFileSync(atomTest.feed_fixture, 'utf8'); + + // Create test HTTP server + testServer = http.createServer((req, res) => { + res.writeHead(200, { 'Content-Type': 'application/xml' }); + res.end(feedData); + }); + + testServer.listen(8888, async () => { + try { + const result = await callFeedGet('http://localhost:8888/xkcd'); + + // Verify success + if (!result.success) { + throw new Error(`Expected success=true, got ${result.success}`); + } + + // Verify feed metadata + if (result.title !== atomTest.expected_feed_metadata.title) { + throw new Error(`Title mismatch: got ${result.title}, want ${atomTest.expected_feed_metadata.title}`); + } + + if (result.link !== atomTest.expected_feed_metadata.link) { + throw new Error(`Link mismatch: got ${result.link}, want ${atomTest.expected_feed_metadata.link}`); + } + + // Verify article count + if (result.articles.length !== atomTest.expected_articles_count) { + throw new Error(`Article count mismatch: got ${result.articles.length}, want ${atomTest.expected_articles_count}`); + } + + // Verify specific articles + for (const expectedArticle of atomTest.expected_articles) { + if (!result.articles.includes(expectedArticle.hash)) { + throw new Error(`Expected article ${expectedArticle.hash} not found`); + } + + // Verify article is in Redis + const { articlesKey } = buildRedisKeys('http://localhost:8888/xkcd'); + const articleKey = buildArticleKey(expectedArticle.hash); + + await new Promise((res, rej) => { + redisClient.zscore(articlesKey, articleKey, (err, score) => { + if (err) return rej(err); + if (parseInt(score) !== expectedArticle.score) { + return rej(new Error(`Score mismatch: got ${score}, want ${expectedArticle.score}`)); + } + res(); + }); + }); + + // Verify article is in S3 + await new Promise((res, rej) => { + s3.headObject({ Key: expectedArticle.hash + '.json' }, (err) => { + if (err) return rej(new Error(`Article ${expectedArticle.hash} not found in S3`)); + res(); + }); + }); + } + + testServer.close(); + resolve(); + } catch (error) { + testServer.close(); + reject(error); + } + }); + }); + }); + }); + } + + // Test: Fetch and process RSS feed + const rssTest = testCases.feed_get_tests.find(tc => tc.description.includes('RSS')); + if (rssTest) { + await test(rssTest.description, () => { + return new Promise((resolve, reject) => { + clearRedisKeys('http://localhost:8889/hn', (clearErr) => { + if (clearErr) return reject(clearErr); + + const feedData = fs.readFileSync(rssTest.feed_fixture, 'utf8'); + + testServer = http.createServer((req, res) => { + res.writeHead(200, { 'Content-Type': 'application/xml' }); + res.end(feedData); + }); + + testServer.listen(8889, async () => { + try { + const result = await callFeedGet('http://localhost:8889/hn'); + + if (!result.success) { + throw new Error(`Expected success=true, got ${result.success}`); + } + + if (result.title !== rssTest.expected_feed_metadata.title) { + throw new Error(`Title mismatch: got ${result.title}, want ${rssTest.expected_feed_metadata.title}`); + } + + if (result.articles.length !== rssTest.expected_articles_count) { + throw new Error(`Article count mismatch: got ${result.articles.length}, want ${rssTest.expected_articles_count}`); + } + + testServer.close(); + resolve(); + } catch (error) { + testServer.close(); + reject(error); + } + }); + }); + }); + }); + } + + // Test: HTTP 304 caching + const cachingTest = testCases.feed_get_tests.find(tc => tc.caching_test); + if (cachingTest) { + await test(cachingTest.description, () => { + return new Promise((resolve, reject) => { + clearRedisKeys('http://localhost:8890/cache-test', (clearErr) => { + if (clearErr) return reject(clearErr); + + const feedData = fs.readFileSync(cachingTest.feed_fixture, 'utf8'); + const { last_modified, etag } = cachingTest.caching_test.response_headers; + const expectedCount = cachingTest.caching_test.expected_articles_count; + + let requestCount = 0; + + testServer = http.createServer((req, res) => { + requestCount++; + + // Second request should get 304 + if (requestCount > 1) { + if (req.headers['if-modified-since'] !== last_modified) { + testServer.close(); + return reject(new Error(`Expected If-Modified-Since: ${last_modified}, got ${req.headers['if-modified-since']}`)); + } + if (req.headers['if-none-match'] !== etag) { + testServer.close(); + return reject(new Error(`Expected If-None-Match: ${etag}, got ${req.headers['if-none-match']}`)); + } + res.writeHead(304); + res.end(); + return; + } + + // First request returns full feed + res.writeHead(200, { + 'Content-Type': 'application/xml', + 'Last-Modified': last_modified, + 'Etag': etag, + }); + res.end(feedData); + }); + + testServer.listen(8890, async () => { + try { + // First request + const result1 = await callFeedGet('http://localhost:8890/cache-test'); + + if (!result1.success) { + throw new Error('First request should succeed'); + } + + if (result1.articles.length !== expectedCount) { + throw new Error(`Expected ${expectedCount} articles, got ${result1.articles.length}`); + } + + // Second request (should use cache and get 304) + const result2 = await callFeedGet('http://localhost:8890/cache-test'); + + if (!result2.success) { + throw new Error('Second request should succeed'); + } + + if (result2.articles.length !== expectedCount) { + throw new Error(`Expected ${expectedCount} cached articles, got ${result2.articles.length}`); + } + + testServer.close(); + resolve(); + } catch (error) { + testServer.close(); + reject(error); + } + }); + }); + }); + }); + } + + // Print summary + console.log(`\n=== Test Summary ===`); + console.log(`Passed: ${passed}`); + console.log(`Failed: ${failed}`); + console.log(`Total: ${passed + failed}\n`); + + redisClient.quit(); + process.exit(failed > 0 ? 1 : 0); +} + +// Run tests +runTests().catch((error) => { + console.error('Test runner error:', error); + if (testServer) testServer.close(); + redisClient.quit(); + process.exit(1); +}); diff --git a/testdata/feed-get-tests.yaml b/testdata/feed-get-tests.yaml new file mode 100644 index 0000000..0c71d07 --- /dev/null +++ b/testdata/feed-get-tests.yaml @@ -0,0 +1,92 @@ +# Test cases for feed fetching and processing +# These tests use real feed fixtures and verify Redis/S3 storage + +feed_get_tests: + - description: "Fetch and process Atom feed (XKCD)" + feed_fixture: "testdata/feeds/xkcd.atom.xml" + feed_uri: "https://xkcd.com/atom.xml" + expected_feed_metadata: + title: "xkcd.com" + link: "https://xkcd.com/" + expected_articles_count: 3 + expected_articles: + - guid: "https://xkcd.com/3153/" + title: "Test Comic 1" + hash: "13a0bebeed5b348147d880a1a4917587" + score: 1728518400000 + feedurl: "https://xkcd.com/atom.xml" + - guid: "https://xkcd.com/3152/" + title: "Test Comic 2" + hash: "21664da7ee05988c62d1f516f3442411" + score: 1728345600000 + feedurl: "https://xkcd.com/atom.xml" + - guid: "https://xkcd.com/3151/" + title: "Test Comic 3" + hash: "3fa08ba1591ba3683e87265ee9300946" + score: 1728172800000 + feedurl: "https://xkcd.com/atom.xml" + + - description: "Fetch and process RSS feed (Hacker News)" + feed_fixture: "testdata/feeds/hn.rss.xml" + feed_uri: "https://news.ycombinator.com/rss" + expected_feed_metadata: + title: "Hacker News" + link: "https://news.ycombinator.com/" + expected_articles_count: 3 + expected_articles: + - guid: "https://news.ycombinator.com/item?id=12345" + title: "Show HN: My Project" + hash: "e6d98eb69fa44b3807ce21bea815869b" + score: 1728475200000 + feedurl: "https://news.ycombinator.com/rss" + - guid: "https://news.ycombinator.com/item?id=12346" + title: "Ask HN: What are you working on?" + hash: "11eeb3caf6e5906bc165618b0f00575f" + score: 1728471600000 + feedurl: "https://news.ycombinator.com/rss" + - guid: "https://news.ycombinator.com/item?id=12347" + title: "New JavaScript Framework Released" + hash: "73b3dc5e5cb0b970a6eaf06b29b96c4a" + score: 1728468000000 + feedurl: "https://news.ycombinator.com/rss" + + - description: "Skip invalid articles without guid" + feed_fixture: "testdata/feeds/invalid.xml" + feed_uri: "https://example.com/invalid.xml" + expected_articles_count: 0 + + - description: "Use cached feed metadata on subsequent fetch" + feed_fixture: "testdata/feeds/xkcd.atom.xml" + feed_uri: "https://xkcd.com/atom.xml" + cached_feed: + lastModified: "Wed, 09 Oct 2024 12:00:00 GMT" + etag: "\"abc123\"" + expected_request_headers: + If-Modified-Since: "Wed, 09 Oct 2024 12:00:00 GMT" + If-None-Match: "\"abc123\"" + + - description: "Update article score when changed" + feed_fixture: "testdata/feeds/xkcd.atom.xml" + feed_uri: "https://xkcd.com/atom.xml" + existing_articles: + - hash: "13a0bebeed5b348147d880a1a4917587" + score: 1728432000000 # Different score + should_update_s3: true + + - description: "Skip S3 update when score unchanged" + feed_fixture: "testdata/feeds/xkcd.atom.xml" + feed_uri: "https://xkcd.com/atom.xml" + existing_articles: + - hash: "13a0bebeed5b348147d880a1a4917587" + score: 1728518400000 # Same score + should_update_s3: false + + - description: "HTTP 304 caching with lastModified and etag" + feed_fixture: "testdata/feeds/xkcd.atom.xml" + feed_uri: "https://xkcd.com/atom.xml" + caching_test: + response_headers: + last_modified: "Wed, 09 Oct 2024 12:00:00 GMT" + etag: "\"test-etag-123\"" + expected_articles_count: 3 + should_return_304_on_second_request: true diff --git a/testdata/feeds/hn.rss.xml b/testdata/feeds/hn.rss.xml new file mode 100644 index 0000000..20aa879 --- /dev/null +++ b/testdata/feeds/hn.rss.xml @@ -0,0 +1,33 @@ + + + + Hacker News + https://news.ycombinator.com/ + Links for the intellectually curious, ranked by readers. + Wed, 09 Oct 2024 12:00:00 GMT + + + Show HN: My Project + https://news.ycombinator.com/item?id=12345 + https://news.ycombinator.com/item?id=12345 + Wed, 09 Oct 2024 12:00:00 GMT + A description of my project + + + + Ask HN: What are you working on? + https://news.ycombinator.com/item?id=12346 + https://news.ycombinator.com/item?id=12346 + Wed, 09 Oct 2024 11:00:00 GMT + Monthly thread about projects + + + + New JavaScript Framework Released + https://news.ycombinator.com/item?id=12347 + https://news.ycombinator.com/item?id=12347 + Wed, 09 Oct 2024 10:00:00 GMT + Yet another JavaScript framework + + + diff --git a/testdata/feeds/invalid.xml b/testdata/feeds/invalid.xml new file mode 100644 index 0000000..fb5319e --- /dev/null +++ b/testdata/feeds/invalid.xml @@ -0,0 +1,23 @@ + + + Invalid Feed + + https://example.com/ + 2024-10-10T00:00:00Z + + + Article without GUID + + 2024-10-10T00:00:00Z + + This article has no GUID and should be skipped + + + + + Article without description + + 2024-10-10T00:00:00Z + https://example.com/article2 + + diff --git a/testdata/feeds/xkcd.atom.xml b/testdata/feeds/xkcd.atom.xml new file mode 100644 index 0000000..39b918b --- /dev/null +++ b/testdata/feeds/xkcd.atom.xml @@ -0,0 +1,31 @@ + + + xkcd.com + + https://xkcd.com/ + 2024-10-10T00:00:00Z + + + Test Comic 1 + + 2024-10-10T00:00:00Z + https://xkcd.com/3153/ + First test comic description + + + + Test Comic 2 + + 2024-10-08T00:00:00Z + https://xkcd.com/3152/ + Second test comic description + + + + Test Comic 3 + + 2024-10-06T00:00:00Z + https://xkcd.com/3151/ + Third test comic description + +