Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Raw SQL -> Django ORM Code: Query that returns all blog posts with most recent approved comment for a specific user

Tags:

django

I'm working on a simple blog system. Here is my models.py file:

from django.contrib.auth.models import User
from django.db import models


class Comment(models.Model):
    user = models.ForeignKey(User)
    post = models.ForeignKey('Post')
    content = models.TextField()
    approved = models.NullBooleanField()

    class Meta:
        ordering = ('-id',)

    def __unicode__(self):
        return u'Comment by %s' % self.user


class Post(models.Model):
    user = models.ForeignKey(User)
    title = models.CharField(max_length=200)
    slug = models.CharField(max_length=50)
    content = models.TextField()

    class Meta:
        ordering = ('title',)

    def __unicode__(self):
        return self.title

Here is some test data in a fixture which I've named testdata.json (the "some_author" user is a superuser and the password is "Stack Overflow"):

[
{
  "pk": 1, 
  "model": "auth.user", 
  "fields": {
    "username": "some_author", 
    "first_name": "Some", 
    "last_name": "Author", 
    "is_active": true, 
    "is_superuser": true, 
    "is_staff": true, 
    "last_login": "2014-07-02T20:18:49Z", 
    "groups": [], 
    "user_permissions": [], 
    "password": "pbkdf2_sha256$12000$PTl1hfgcIGZy$/0w1jNMBuKi9zk11JXhoS5WrbMBUgMDkZAhEvNEelbs=", 
    "email": "[email protected]", 
    "date_joined": "2014-07-02T20:18:29Z"
  }
},
{
  "pk": 2, 
  "model": "auth.user", 
  "fields": {
    "username": "some_reader", 
    "first_name": "Some", 
    "last_name": "Reader", 
    "is_active": true, 
    "is_superuser": false, 
    "is_staff": false, 
    "last_login": "2014-07-02T20:21:10Z", 
    "groups": [], 
    "user_permissions": [], 
    "password": "pbkdf2_sha256$12000$CtTGfFeOaRhd$oVR6zFSpK2qg1AZ4fgdBG/wt6Sr56dHsEIxFO99mHC8=", 
    "email": "[email protected]", 
    "date_joined": "2014-07-02T20:21:10Z"
  }
},
{
  "pk": 3, 
  "model": "auth.user", 
  "fields": {
    "username": "another_reader", 
    "first_name": "Another", 
    "last_name": "Reader", 
    "is_active": true, 
    "is_superuser": false, 
    "is_staff": false, 
    "last_login": "2014-07-02T20:21:34Z", 
    "groups": [], 
    "user_permissions": [], 
    "password": "pbkdf2_sha256$12000$ZPnmV7fVeie3$08H2vv3A8Py4E92+uVAIiEaeg8CAL5deTyNAZj1YJMs=", 
    "email": "[email protected]", 
    "date_joined": "2014-07-02T20:21:34Z"
  }
},
{
  "pk": 1, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 1 of 1 on post 1: approved", 
    "post": 1, 
    "user": 2, 
    "approved": true
  }
},
{
  "pk": 2, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 1 of 1 on post 2: not approved", 
    "post": 2, 
    "user": 2, 
    "approved": false
  }
},
{
  "pk": 3, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 1 of 2 on post 3: approved", 
    "post": 3, 
    "user": 2, 
    "approved": true
  }
},
{
  "pk": 4, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 2 of 2 on post 3: not approved", 
    "post": 3, 
    "user": 2, 
    "approved": false
  }
},
{
  "pk": 5, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 1 of 2 on post 4: not approved", 
    "post": 4, 
    "user": 2, 
    "approved": false
  }
},
{
  "pk": 6, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 2 of 2 on post 4: approved", 
    "post": 4, 
    "user": 2, 
    "approved": true
  }
},
{
  "pk": 7, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 1 of 2 on post 5: approved", 
    "post": 5, 
    "user": 2, 
    "approved": true
  }
},
{
  "pk": 8, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 2 of 2 on post 5: approved", 
    "post": 5, 
    "user": 2, 
    "approved": true
  }
},
{
  "pk": 9, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 1 of 2 on post 6: not approved", 
    "post": 6, 
    "user": 2, 
    "approved": false
  }
},
{
  "pk": 10, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 2 of 2 on post 6: not approved", 
    "post": 6, 
    "user": 2, 
    "approved": false
  }
},
{
  "pk": 11, 
  "model": "blog.comment", 
  "fields": {
    "content": "Comment 1 of 1 on post 7: approved", 
    "post": 7, 
    "user": 3, 
    "approved": true
  }
},
{
  "pk": 1, 
  "model": "blog.post", 
  "fields": {
    "content": "First post", 
    "slug": "post-1", 
    "user": 1, 
    "title": "Post 1"
  }
},
{
  "pk": 2, 
  "model": "blog.post", 
  "fields": {
    "content": "Second post", 
    "slug": "post-2", 
    "user": 1, 
    "title": "Post 2"
  }
},
{
  "pk": 3, 
  "model": "blog.post", 
  "fields": {
    "content": "Third post", 
    "slug": "post-3", 
    "user": 1, 
    "title": "Post 3"
  }
},
{
  "pk": 4, 
  "model": "blog.post", 
  "fields": {
    "content": "Fourth post", 
    "slug": "post-4", 
    "user": 1, 
    "title": "Post 4"
  }
},
{
  "pk": 5, 
  "model": "blog.post", 
  "fields": {
    "content": "Fifth post", 
    "slug": "post-5", 
    "user": 1, 
    "title": "Post 5"
  }
},
{
  "pk": 6, 
  "model": "blog.post", 
  "fields": {
    "content": "Sixth post", 
    "slug": "post-6", 
    "user": 1, 
    "title": "Post 6"
  }
},
{
  "pk": 7, 
  "model": "blog.post", 
  "fields": {
    "content": "Seventh post", 
    "slug": "post-7", 
    "user": 1, 
    "title": "Post 7"
  }
},
{
  "pk": 8, 
  "model": "blog.post", 
  "fields": {
    "content": "Eighth post", 
    "slug": "post-8", 
    "user": 1, 
    "title": "Post 8"
  }
}
]

I'm trying to query the database for all of the blog posts along with each blog post's most recent comment that meets both of these two conditions:

  1. The comment was made by "Some Reader" (user_id = 2)
  2. The comment has been approved

I want the query to return all of the blog posts, even if they don't have a comment that meets the above two conditions. For the blog posts that don't have a comment that meets the above two conditions, the returned comment column should just be NULL. I have this working with raw SQL:

for p in Post.objects.raw(
    '''
    SELECT blog_post.id,
           blog_post.title,
           blog_comment.content
    FROM   blog_post
           LEFT OUTER JOIN (SELECT post_id,
                                   MAX(id) AS latest
                            FROM   blog_comment
                            WHERE  user_id = 2
                                   AND approved = 1
                            GROUP  BY post_id) AS x
                        ON x.post_id = blog_post.id
           LEFT OUTER JOIN blog_comment
                        ON blog_comment.post_id = x.post_id
                           AND blog_comment.id = x.latest
    ORDER  BY blog_post.id;
    '''
):
    print '%s: %s' % (
        p.title,
        p.content,
    )

The above code outputs this (which is what I want):

Post 1: Comment 1 of 1 on post 1: approved
Post 2: None
Post 3: Comment 1 of 2 on post 3: approved
Post 4: Comment 2 of 2 on post 4: approved
Post 5: Comment 2 of 2 on post 5: approved
Post 6: None
Post 7: None
Post 8: None

My question is this: is it possible to (efficiently) do this same thing, but without resorting to raw SQL? I like to avoid raw queries whenever possible.

like image 902
Nick Avatar asked Nov 13 '13 18:11

Nick


1 Answers

You can't do it without raw sql inside django orm paradigm. But you can do it with two queries to db:

from django.db.models import Max
posts = Post.objects.annotate(Max('comment_set__id'))
comments_cache = Comment.objects.filter(id__in= posts.values('id', flat=True))
comments_dict = dict([(item.id, item) for item in comments_cache])
for item in posts:
     print post, comments_dict[item.id]

I often make complex queries and still can't find better way than get all data I need with few queries inside cache-object and than group it as I need.

Please don't use code like:

#get_comment: return self.comment_set.filter(user=user, approved=True).latest('id')
for post in Post.objects.all():
    print post.get_comment(request.user)

It will produce len(posts) sql-queries to database. It's bad practice.

like image 95
Yevgeniy Shchemelev Avatar answered Nov 08 '22 19:11

Yevgeniy Shchemelev